/*
* #!
* %
* Copyright (C) 2014 - 2016 Humboldt-Universität zu Berlin
* %
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #_
*/
package de.hub.cs.dbis.aeolus.utils;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.generated.GlobalStreamId;
import backtype.storm.generated.Grouping;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
/**
* {@link TimestampMerger} merges all incoming streams (all physical substreams from all tasks) over all logical
* producers in ascending timestamp order. Input tuples must be in ascending timestamp order within each incoming
* substream. The timestamp attribute is expected to be of type {@link Number}.
* <p>
* The internal buffer can be flushed by sending an <strong>ID less</strong> zero-attribute tuple via stream
* {@link #FLUSH_STREAM_ID}.
*
* @author mjsax
*/
public class TimestampMerger implements IRichBolt {
private final static long serialVersionUID = -6930627449574381467L;
private final static Logger logger = LoggerFactory.getLogger(TimestampMerger.class);
/** The name of the flush stream. */
public final static String FLUSH_STREAM_ID = "flush";
/** The original bolt that consumers a stream of input tuples that are ordered by their timestamp attribute. */
private final IRichBolt wrappedBolt;
/** The index of the timestamp attribute ({@code -1} if attribute name or timestamp extractor is used). */
private final int tsIndex;
/** The name of the timestamp attribute ({@code null} if attribute index or timestamp extractor is used). */
private final String tsAttributeName;
/** The extractor for the timestamp ({@code null} if attribute index or name is used). */
private final TimeStampExtractor<Tuple> tsExtractor;
/** Input tuple buffer for merging. */
private StreamMerger<Tuple> merger;
/**
* Instantiates a new {@link TimestampMerger} that wrapped the given bolt.
*
* @param wrappedBolt
* The bolt to be wrapped.
* @param tsIndex
* The index of the timestamp attribute.
*/
public TimestampMerger(IRichBolt wrappedBolt, int tsIndex) {
assert (wrappedBolt != null);
assert (tsIndex >= 0);
logger.debug("Initialize with timestamp index {}", new Integer(tsIndex));
this.wrappedBolt = wrappedBolt;
this.tsIndex = tsIndex;
this.tsAttributeName = null;
this.tsExtractor = null;
}
/**
* Instantiates a new {@link TimestampMerger} that wrapped the given bolt.
*
* @param wrappedBolt
* The bolt to be wrapped.
* @param tsAttributeName
* The name of the timestamp attribute.
*/
public TimestampMerger(IRichBolt wrappedBolt, String tsAttributeName) {
assert (wrappedBolt != null);
assert (tsAttributeName != null);
logger.debug("Initialize with timestamp attribute {}", tsAttributeName);
this.wrappedBolt = wrappedBolt;
this.tsIndex = -1;
this.tsAttributeName = tsAttributeName;
this.tsExtractor = null;
}
/**
* Instantiates a new {@link TimestampMerger} that wrapped the given bolt.
*
* @param wrappedBolt
* The bolt to be wrapped.
* @param tsExtractor
* The extractor for the timestamp.
*/
public TimestampMerger(IRichBolt wrappedBolt, TimeStampExtractor<Tuple> tsExtractor) {
assert (wrappedBolt != null);
assert (tsExtractor != null);
logger.debug("Initialize with timestamp extractor");
this.wrappedBolt = wrappedBolt;
this.tsIndex = -1;
this.tsAttributeName = null;
this.tsExtractor = tsExtractor;
}
@Override
public void prepare(@SuppressWarnings("rawtypes") Map arg0, TopologyContext arg1, OutputCollector arg2) {
// for each logical input stream (ie, each producer bolt), we get an input partition for each of its tasks
LinkedList<Integer> taskIds = new LinkedList<Integer>();
for(Entry<GlobalStreamId, Grouping> inputStream : arg1.getThisSources().entrySet()) {
taskIds.addAll(arg1.getComponentTasks(inputStream.getKey().get_componentId()));
}
logger.debug("Detected producer tasks: {}", taskIds);
if(this.tsIndex != -1) {
assert (this.tsAttributeName == null && this.tsExtractor == null);
this.merger = new StreamMerger<Tuple>(taskIds, this.tsIndex);
} else if(this.tsAttributeName != null) {
assert (this.tsExtractor == null);
this.merger = new StreamMerger<Tuple>(taskIds, this.tsAttributeName);
} else {
assert (this.tsExtractor != null);
this.merger = new StreamMerger<Tuple>(taskIds, this.tsExtractor);
}
this.wrappedBolt.prepare(arg0, arg1, arg2);
}
@Override
public void execute(Tuple tuple) {
if(tuple.getSourceStreamId().equals(TimestampMerger.FLUSH_STREAM_ID) && tuple.getValue(0) == null) {
this.merger.disablePartition(new Integer(tuple.getSourceTask()));
} else {
logger.trace("Adding tuple to internal buffer tuple: {}", tuple);
this.merger.addTuple(new Integer(tuple.getSourceTask()), tuple);
}
Tuple t;
while((t = this.merger.getNextTuple()) != null) {
logger.trace("Extrated tuple from internal buffer for processing: {}", tuple);
this.wrappedBolt.execute(t);
}
if(this.merger.getNumberOpenPartitions() == 0) {
assert (tuple.getSourceStreamId().equals(TimestampMerger.FLUSH_STREAM_ID));
this.wrappedBolt.execute(tuple);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer arg0) {
this.wrappedBolt.declareOutputFields(arg0);
}
@Override
public Map<String, Object> getComponentConfiguration() {
return this.wrappedBolt.getComponentConfiguration();
}
@Override
public void cleanup() {
this.wrappedBolt.cleanup();
}
}