package storm.applications.bolt; import backtype.storm.Config; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.applications.constants.BaseConstants.BaseConf; import storm.applications.constants.TrendingTopicsConstants.Field; import storm.applications.tools.NthLastModifiedTimeTracker; import storm.applications.tools.SlidingWindowCounter; import storm.applications.util.stream.TupleUtils; /** * This bolt performs rolling counts of incoming objects, i.e. sliding window based counting. * <p/> * The bolt is configured by two parameters, the length of the sliding window in seconds (which influences the output * data of the bolt, i.e. how it will count objects) and the emit frequency in seconds (which influences how often the * bolt will output the latest window counts). For instance, if the window length is set to an equivalent of five * minutes and the emit frequency to one minute, then the bolt will output the latest five-minute sliding window every * minute. * <p/> * The bolt emits a rolling count tuple per object, consisting of the object itself, its latest rolling count, and the * actual duration of the sliding window. The latter is included in case the expected sliding window length (as * configured by the user) is different from the actual length, e.g. due to high system load. Note that the actual * window length is tracked and calculated for the window, and not individually for each object within a window. * <p/> * Note: During the startup phase you will usually observe that the bolt warns you about the actual sliding window * length being smaller than the expected length. This behavior is expected and is caused by the way the sliding window * counts are initially "loaded up". You can safely ignore this warning during startup (e.g. you will see this warning * during the first ~ five minutes of startup time if the window length is set to five minutes). */ public class RollingCountBolt extends AbstractBolt { private static final Logger LOG = LoggerFactory.getLogger(RollingCountBolt.class); private static final String WINDOW_LENGTH_WARNING_TEMPLATE = "Actual window length is %d seconds when it should be %d seconds" + " (you can safely ignore this warning during the startup phase)"; private SlidingWindowCounter<Object> counter; private int windowLengthInSeconds; private int emitFrequencyInSeconds; private NthLastModifiedTimeTracker lastModifiedTracker; public RollingCountBolt() { this(60); } public RollingCountBolt(int emitFrequencyInSeconds) { this.emitFrequencyInSeconds = emitFrequencyInSeconds; } @Override public void initialize() { windowLengthInSeconds = config.getInt(String.format(BaseConf.ROLLING_COUNT_WINDOW_LENGTH, configPrefix), 300); int numChunks = windowLengthInSeconds/emitFrequencyInSeconds; counter = new SlidingWindowCounter<>(numChunks); lastModifiedTracker = new NthLastModifiedTimeTracker(numChunks); } @Override public void execute(Tuple tuple) { if (TupleUtils.isTickTuple(tuple)) { LOG.info("Received tick tuple, triggering emit of current window counts"); emitCurrentWindowCounts(); } else { countObjAndAck(tuple); } } private void emitCurrentWindowCounts() { Map<Object, Long> counts = counter.getCountsThenAdvanceWindow(); int actualWindowLengthInSeconds = lastModifiedTracker.secondsSinceOldestModification(); lastModifiedTracker.markAsModified(); if (actualWindowLengthInSeconds != windowLengthInSeconds) { LOG.warn(String.format(WINDOW_LENGTH_WARNING_TEMPLATE, actualWindowLengthInSeconds, windowLengthInSeconds)); } emit(counts, actualWindowLengthInSeconds); } private void emit(Map<Object, Long> counts, int actualWindowLengthInSeconds) { for (Entry<Object, Long> entry : counts.entrySet()) { Object obj = entry.getKey(); Long count = entry.getValue(); collector.emit(new Values(obj, count, actualWindowLengthInSeconds)); } } private void countObjAndAck(Tuple tuple) { Object obj = tuple.getValue(0); counter.incrementCount(obj); collector.ack(tuple); } @Override public Fields getDefaultFields() { return new Fields(Field.OBJ, Field.COUNT, Field.WINDOW_LENGTH); } @Override public Map<String, Object> getComponentConfiguration() { Map<String, Object> conf = new HashMap<>(); conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequencyInSeconds); return conf; } }