package org.calrissian.flowbox.bolt; import backtype.storm.Config; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import org.calrissian.flowbox.model.*; import org.calrissian.flowbox.support.Window; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import static org.calrissian.flowbox.Constants.FLOW_OP_IDX; import static org.calrissian.flowbox.Constants.LAST_STREAM; import static org.calrissian.flowbox.Constants.STREAM_NAME; /** * A basic windowing bolt that uses partitioned in-memory deques. A trigger algorithm is applied to the windows * based on the the trigger policy. The window is kept to a specific size based on the eviction policy. * * This class is an attempt at porting over the Sliding Window from IBM's InfoSphere Streams: * {@see http://pic.dhe.ibm.com/infocenter/streams/v3r2/index.jsp?topic=%2Fcom.ibm.swg.im.infosphere.streams.spl-language-specification.doc%2Fdoc%2Fslidingwindows.html} * * This class can also be used to implement a tumbling window, whereby COUNT policies are used both for eviction and triggering * with the same threshold for each. */ public class WindowBolt extends BaseRichBolt { String ruleStream; Map<String, Flow> rulesMap; Map<String, Cache<String, Window>> buffers; OutputCollector collector; public WindowBolt(String ruleStream) { this.ruleStream = ruleStream; } @Override public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { this.collector = outputCollector; rulesMap = new HashMap<String, Flow>(); buffers = new HashMap<String, Cache<String, Window>>(); } @Override public void execute(Tuple tuple) { /** * Update rules if necessary */ if(ruleStream.equals(tuple.getSourceStreamId())) { Set<Flow> rules = (Set<Flow>) tuple.getValue(0); Set<String> rulesToRemove = new HashSet<String>(); // find deleted rules and remove them for(Flow rule : rulesMap.values()) { if(!rules.contains(rule)) rulesToRemove.add(rule.getId()); } /** * Remove any deleted rules */ for(String ruleId : rulesToRemove) { rulesMap.remove(ruleId); buffers.remove(ruleId); } for(Flow rule : rules) { /** * If a rule has been updated, let's drop the window windows and start out fresh. */ if(rulesMap.get(rule.getId()) != null && !rulesMap.get(rule.getId()).equals(rule) || !rulesMap.containsKey(rule.getId())) { rulesMap.put(rule.getId(), rule); buffers.remove(rule.getId()); } } } else if("__system".equals(tuple.getSourceComponent()) && "__tick".equals(tuple.getSourceStreamId())) { /** * Don't bother evaluating if we don't even have any rules */ if(rulesMap.size() > 0) { for(Flow rule : rulesMap.values()) { AggregateOp op = (AggregateOp) rule.getStreams().iterator().next().getFlowOps().get(0); /** * If we need to trigger any time-based policies, let's do that here. */ if(op.getTriggerPolicy() == Policy.TIME) { Cache<String, Window> buffersForRule = buffers.get(rule.getId()); if(buffersForRule != null) { for (Window buffer : buffersForRule.asMap().values()) { /** * If we need to evict any buffered items, let's do it here */ if(op.getEvictionPolicy() == Policy.TIME) buffer.timeEvict(op.getEvictionThreshold()); if (buffer.getTriggerTicks() == op.getTriggerThreshold()) { collector.emit(new Values(rule.getId(), buffer)); System.out.println("Just emitted buffer: " + buffer); buffer.resetTriggerTicks(); } else { buffer.incrTriggerTicks(); } } } } } } } else { /** * Short circuit if we don't have any rules. */ if (rulesMap.size() > 0) { /** * If we've received an event for an flowbox rule, we need to act on it here. Purposefully, the groupBy * fields have been hashed so that we know the buffer exists on this current bolt for the given rule. * * The hashKey was added to the "fieldsGrouping" in an attempt to share pointers where possible. Different * rules with like fields groupings can store the items in their windows on the same node. */ String ruleId = tuple.getString(0); String hash = tuple.getString(1); Event event = (Event) tuple.getValue(2); int idx = tuple.getIntegerByField(FLOW_OP_IDX); idx++; String streamName = tuple.getStringByField(STREAM_NAME); String lastStream = tuple.getStringByField(LAST_STREAM); Flow rule = rulesMap.get(ruleId); AggregateOp op = (AggregateOp) rule.getStream(streamName).getFlowOps().get(idx); Cache<String, Window> buffersForRule = buffers.get(rule.getId()); Window buffer; if (buffersForRule != null) { buffer = buffersForRule.getIfPresent(hash); if (buffer != null) { // if we have a buffer already, process it /** * If we need to evict any buffered items, let's do it here */ if(op.getEvictionPolicy() == Policy.TIME) buffer.timeEvict(op.getEvictionThreshold()); /** * Perform count-based eviction if necessary */ else if (op.getEvictionPolicy() == Policy.COUNT) { if (buffer.size() == op.getEvictionThreshold()) buffer.expire(); } } } else { buffersForRule = CacheBuilder.newBuilder().expireAfterAccess(60, TimeUnit.MINUTES).build(); // just in case we get some rogue data, we don't wan ti to sit for too long. buffer = op.getEvictionPolicy() == Policy.TIME ? new Window(hash) : new Window(hash, op.getEvictionThreshold()); buffersForRule.put(hash, buffer); buffers.put(rule.getId(), buffersForRule); } buffer.add(event, lastStream); /** * Perform count-based trigger if necessary */ if (op.getTriggerPolicy() == Policy.COUNT) { buffer.incrTriggerTicks(); if(buffer.getTriggerTicks() == op.getTriggerThreshold()) { collector.emit(new Values(ruleId, buffer)); System.out.println("Just emitted buffer: " + buffer); buffer.resetTriggerTicks(); } } } } } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { outputFieldsDeclarer.declare(new Fields("ruleId", "events")); } @Override public Map<String, Object> getComponentConfiguration() { Map<String,Object> config = new HashMap<String, Object>(); config.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 1); return config; } }