/* * Copyright (C) 2014 The Calrissian Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.calrissian.flowmix.core.storm.bolt; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import org.calrissian.flowmix.api.Flow; import org.calrissian.flowmix.core.model.FlowInfo; import org.calrissian.flowmix.api.Policy; import org.calrissian.flowmix.core.model.StreamDef; import org.calrissian.flowmix.core.model.event.AggregatedEvent; import org.calrissian.flowmix.core.model.op.AggregateOp; import org.calrissian.flowmix.core.model.op.FlowOp; import org.calrissian.flowmix.core.model.op.PartitionOp; import org.calrissian.flowmix.api.Aggregator; import org.calrissian.flowmix.core.support.window.AggregatorWindow; import static org.apache.commons.lang.StringUtils.join; import static org.calrissian.flowmix.api.builder.FlowmixBuilder.declareOutputStreams; import static org.calrissian.flowmix.api.builder.FlowmixBuilder.fields; import static org.calrissian.flowmix.core.Constants.FLOW_LOADER_STREAM; import static org.calrissian.flowmix.api.Aggregator.GROUP_BY; import static org.calrissian.flowmix.api.Aggregator.GROUP_BY_DELIM; import static org.calrissian.flowmix.core.support.Utils.exportsToOtherStreams; import static org.calrissian.flowmix.core.support.Utils.getFlowOpFromStream; import static org.calrissian.flowmix.core.support.Utils.hasNextOutput; public class AggregatorBolt extends BaseRichBolt { Map<String,Flow> flows; Map<String, Cache<String, AggregatorWindow>> windows; OutputCollector collector; @Override public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { this.collector = outputCollector; flows = new HashMap<String, Flow>(); windows = new HashMap<String, Cache<String, AggregatorWindow>>(); } @Override public void execute(Tuple tuple) { if(FLOW_LOADER_STREAM.equals(tuple.getSourceStreamId())) { for(Flow flow : (Collection<Flow>)tuple.getValue(0)) flows.put(flow.getId(), flow); } else if("tick".equals(tuple.getSourceStreamId())) { /** * Don't bother evaluating if wwe don't even have any flows */ if(flows.size() > 0) { for(Flow flow : flows.values()) { for(StreamDef curStream : flow.getStreams()) { int idx = 0; for(FlowOp curFlowOp : curStream.getFlowOps()) { if(curFlowOp instanceof AggregateOp) { AggregateOp op = (AggregateOp)curFlowOp; /** * If we need to trigger any time-based policies, let's do that here */ if(op.getTriggerPolicy() == Policy.TIME || op.getEvictionPolicy() == Policy.TIME) { Cache<String, AggregatorWindow> windowCache = windows.get(flow.getId() + "\0" + curStream.getName() + "\0" + idx); if(windowCache != null) { for(AggregatorWindow window : windowCache.asMap().values()) { if(op.getEvictionPolicy() == Policy.TIME) window.timeEvict(op.getEvictionThreshold()); if(op.getTriggerPolicy() == Policy.TIME) window.incrTriggerTicks(); if(window.getTriggerTicks() == op.getTriggerThreshold()) emitAggregate(flow, op, curStream.getName(), idx, window); } } } } idx++; } } } } } else if(!"tick".equals(tuple.getSourceStreamId())){ FlowInfo flowInfo = new FlowInfo(tuple); Flow flow = flows.get(flowInfo.getFlowId()); if(flow != null) { AggregateOp op = getFlowOpFromStream(flow, flowInfo.getStreamName(), flowInfo.getIdx()); Cache<String, AggregatorWindow> windowCache = windows.get(flowInfo.getFlowId() + "\0" + flowInfo.getStreamName() + "\0" + flowInfo.getIdx()); AggregatorWindow window = null; if(windowCache != null) { window = windowCache.getIfPresent(flowInfo.getPartition()); if(window != null) { // if we have a window already constructed, proces it /** * If we need to evict any buffered items, let's do that here */ if(op.getEvictionPolicy() == Policy.TIME) window.timeEvict(op.getEvictionThreshold()); } else { window = buildWindow(op, flowInfo.getStreamName(), flowInfo.getIdx(), flowInfo.getPartition(), flowInfo.getFlowId(), windowCache); } } else { windowCache = CacheBuilder.newBuilder().expireAfterWrite(op.getWindowEvictMillis(), TimeUnit.MILLISECONDS).build(); window = buildWindow(op, flowInfo.getStreamName(), flowInfo.getIdx(), flowInfo.getPartition(), flowInfo.getFlowId(), windowCache); } window.add(flowInfo.getEvent(), flowInfo.getPreviousStream()); windowCache.put(flowInfo.getPartition(), window); // window eviction is on writes, so we need to write to the window to reset our expiration. /** * Perform count-based trigger if necessary */ if(op.getTriggerPolicy() == Policy.COUNT) { window.incrTriggerTicks(); if(window.getTriggerTicks() == op.getTriggerThreshold()) emitAggregate(flow, op, flowInfo.getStreamName(), flowInfo.getIdx(), window); } } } collector.ack(tuple); } private AggregatorWindow buildWindow(AggregateOp op, String stream, int idx, String hash, String flowId, Cache<String, AggregatorWindow> windowCache) { try { Aggregator agg = op.getAggregatorClass().newInstance(); AggregatorWindow window = op.getEvictionPolicy() == Policy.TIME || op.getEvictionPolicy() == Policy.TIME_DELTA_LT ? new AggregatorWindow(agg, hash) : new AggregatorWindow(agg, hash, op.getEvictionThreshold()); Map<String,String> aggConfig = op.getConfig(); if(flows.get(flowId).getStream(stream).getFlowOps().get(idx-1) instanceof PartitionOp) { PartitionOp partitionOp = (PartitionOp)flows.get(flowId).getStream(stream).getFlowOps().get(idx-1); aggConfig.put(GROUP_BY, join(partitionOp.getFields(), GROUP_BY_DELIM)); } agg.configure(aggConfig); windowCache.put(hash, window); windows.put(flowId + "\0" + stream + "\0" + idx, windowCache); return window; } catch (Exception e) { throw new RuntimeException(e); } } private void emitAggregate(Flow flow, AggregateOp op, String stream, int idx, AggregatorWindow window) { Collection<AggregatedEvent> eventsToEmit = window.getAggregate(); String nextStream = idx+1 < flow.getStream(stream).getFlowOps().size() ? flow.getStream(stream).getFlowOps().get(idx+1).getComponentName() : "output"; if(hasNextOutput(flow, stream, nextStream)) { for(AggregatedEvent event : eventsToEmit) { String previousStream = event.getPreviousStream() != null ? event.getPreviousStream() : stream; collector.emit(nextStream, new Values(flow.getId(), event.getEvent(), idx, stream, previousStream)); // Note: If aggregated event isn't keeping the previous stream, it's possible it could be lost } } // send to any other streams that are configured (aside from output) if(exportsToOtherStreams(flow, stream, nextStream)) { for(String output : flow.getStream(stream).getOutputs()) { for(AggregatedEvent event : eventsToEmit) { String outputComponent = flow.getStream(output).getFlowOps().get(0).getComponentName(); collector.emit(outputComponent, new Values(flow.getId(), event.getEvent(), -1, output, stream)); } } } if(op.isClearOnTrigger()) window.clear(); window.resetTriggerTicks(); } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { declareOutputStreams(outputFieldsDeclarer, fields); } }