/*
* Copyright (C) 2014 The Calrissian Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.calrissian.flowmix.core.storm.bolt;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.calrissian.flowmix.api.Flow;
import org.calrissian.flowmix.core.model.FlowInfo;
import org.calrissian.flowmix.api.Policy;
import org.calrissian.flowmix.core.model.StreamDef;
import org.calrissian.flowmix.core.model.op.FlowOp;
import org.calrissian.flowmix.core.model.op.SortOp;
import org.calrissian.flowmix.core.support.EventSortByComparator;
import org.calrissian.flowmix.core.support.window.SortedWindow;
import org.calrissian.flowmix.core.support.window.Window;
import org.calrissian.flowmix.core.support.window.WindowItem;
import static java.util.Collections.singleton;
import static org.calrissian.flowmix.api.builder.FlowmixBuilder.declareOutputStreams;
import static org.calrissian.flowmix.api.builder.FlowmixBuilder.fields;
import static org.calrissian.flowmix.core.Constants.FLOW_LOADER_STREAM;
import static org.calrissian.flowmix.core.support.Utils.exportsToOtherStreams;
import static org.calrissian.flowmix.core.support.Utils.getNextStreamFromFlowInfo;
import static org.calrissian.flowmix.core.support.Utils.hasNextOutput;
/**
* Sorts a window. This is similar to the Sort operator in InfoSphere Streams.
*
* As in infosphere streams:
*
* - Sliding windows only allow count-based trigger and count-based expiration
* - Tumbling windows allows count, delta, and time based trigger
*
* TODO: Need to begin enforcing the different accepted trigger vs. eviction policies
*/
public class SortBolt extends BaseRichBolt {
Map<String, Flow> flowMap;
Map<String, Cache<String,SortedWindow>> windows;
OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
flowMap = new HashMap<String, Flow>();
windows = new HashMap<String, Cache<String, SortedWindow>>();
}
@Override
public void execute(Tuple tuple) {
/**
* Update rules if necessary
*/
if(FLOW_LOADER_STREAM.equals(tuple.getSourceStreamId())) {
Collection<Flow> flows = (Collection<Flow>) tuple.getValue(0);
Set<String> rulesToRemove = new HashSet<String>();
// find deleted rules and remove them
for(Flow flow : flowMap.values()) {
if(!flows.contains(flow))
rulesToRemove.add(flow.getId());
}
/**
* Remove any deleted rules
*/
for(String flowId : rulesToRemove) {
flowMap.remove(flowId);
windows.remove(flowId);
}
for(Flow flow : flows) {
/**
* If a rule has been updated, let's drop the window windows and start out fresh.
*/
if(flowMap.get(flow.getId()) != null && !flowMap.get(flow.getId()).equals(flow) ||
!flowMap.containsKey(flow.getId())) {
flowMap.put(flow.getId(), flow);
windows.remove(flow.getId());
}
}
} else if("tick".equals(tuple.getSourceStreamId())) {
/**
* Don't bother evaluating if wwe don't even have any flows
*/
if(flowMap.size() > 0) {
for(Flow flow : flowMap.values()) {
for(StreamDef curStream : flow.getStreams()) {
int idx = 0;
for(FlowOp curFlowOp : curStream.getFlowOps()) {
if(curFlowOp instanceof SortOp) {
SortOp op = (SortOp)curFlowOp;
/**
* If we need to trigger any time-based policies, let's do that here
*/
if(op.getTriggerPolicy() == Policy.TIME || op.getEvictionPolicy() == Policy.TIME) {
Cache<String, SortedWindow> windowCache = windows.get(flow.getId() + "\0" + curStream.getName() + "\0" + idx);
if(windowCache != null) {
for(SortedWindow window : windowCache.asMap().values()) {
if(op.getEvictionPolicy() == Policy.TIME)
window.timeEvict(op.getEvictionThreshold());
if(op.getTriggerPolicy() == Policy.TIME)
window.incrTriggerTicks();
if(window.getTriggerTicks() == op.getTriggerThreshold()) {
FlowInfo flowInfo = new FlowInfo(flow.getId(), curStream.getName(), idx);
emitWindow(flowInfo, flow, op, window);
}
}
}
}
}
idx++;
}
}
}
}
} else {
/**
* Short circuit if we don't have any rules.
*/
if (flowMap.size() > 0) {
/**
* If we've received an event for an flowmix rule, we need to act on it here. Purposefully, the groupBy
* fields have been hashed so that we know the buffer exists on this current bolt for the given rule.
*
* The hashKey was added to the "fieldsGrouping" in an attempt to share pointers where possible. Different
* rules with like fields groupings can store the items in their windows on the same node.
*/
FlowInfo flowInfo = new FlowInfo(tuple);
Flow flow = flowMap.get(flowInfo.getFlowId());
SortOp op = (SortOp) flow.getStream(flowInfo.getStreamName()).getFlowOps().get(flowInfo.getIdx());
Cache<String, SortedWindow> buffersForRule = windows.get(flow.getId() + "\0" + flowInfo.getStreamName() + "\0" + flowInfo.getIdx());
SortedWindow buffer;
if (buffersForRule != null) {
buffer = buffersForRule.getIfPresent(flowInfo.getPartition());
if (buffer != null) { // if we have a buffer already, process it
if(op.getEvictionPolicy() == Policy.TIME)
buffer.timeEvict(op.getEvictionThreshold());
} else {
buffersForRule = CacheBuilder.newBuilder().expireAfterAccess(60, TimeUnit.MINUTES).build(); // just in case we get some rogue data, we don't wan ti to sit for too long.
buffer = buildWindow(flowInfo.getPartition(), op);
buffersForRule.put(flowInfo.getPartition(), buffer);
windows.put(flow.getId() + "\0" + flowInfo.getStreamName() + "\0" + flowInfo.getIdx(), buffersForRule);
}
} else {
buffersForRule = CacheBuilder.newBuilder().expireAfterAccess(60, TimeUnit.MINUTES).build(); // just in case we get some rogue data, we don't wan ti to sit for too long.
buffer = buildWindow(flowInfo.getPartition(), op);
buffersForRule.put(flowInfo.getPartition(), buffer);
windows.put(flow.getId() + "\0" + flowInfo.getStreamName() + "\0" + flowInfo.getIdx(), buffersForRule);
}
if(op.getEvictionPolicy() == Policy.COUNT && op.getEvictionThreshold() == buffer.size())
buffer.expire();
buffer.add(flowInfo.getEvent(), flowInfo.getPreviousStream());
/**
* Perform count-based trigger if necessary
*/
if (op.getTriggerPolicy() == Policy.COUNT) {
buffer.incrTriggerTicks();
if(buffer.getTriggerTicks() == op.getTriggerThreshold())
emitWindow(flowInfo, flow, op, buffer);
} else if(op.getTriggerPolicy() == Policy.TIME_DELTA_LT && buffer.timeRange() > -1 && buffer.timeRange() <= op.getTriggerThreshold() * 1000)
emitWindow(flowInfo, flow, op, buffer);
// /**
// * If we aren't supposed to clear the window right now, then we need to emit
// */
// else if(!op.isClearOnTrigger()) {
//
// if(op.getEvictionPolicy() != Policy.COUNT || (op.getEvictionPolicy() == Policy.COUNT && op.getTriggerThreshold() == windows.size()))
// emitWindow(flow, streamName, op, buffer, idx);
// }
}
}
collector.ack(tuple);
}
private void emitWindow(FlowInfo flowInfo, Flow flow, SortOp op, Window window) {
/**
* If the window is set to be cleared, we need to emit everything. Otherwise, just emit the last item in the list.
*/
Iterable<WindowItem> items = null;
if(op.isClearOnTrigger())
items = window.getEvents();
else {
if(op.isProgressive() && window.size() == op.getEvictionThreshold()) // we know if it's a progressive window, the eviction policy is count.
items = singleton(window.expire());
else
items = window.getEvents();
}
if(items != null) {
for(WindowItem item : items) {
String nextStream = getNextStreamFromFlowInfo(flow, flowInfo.getStreamName(), flowInfo.getIdx());
if(hasNextOutput(flow, flowInfo.getStreamName(), nextStream))
collector.emit(nextStream, new Values(flow.getId(), item.getEvent(), flowInfo.getIdx(), flowInfo.getStreamName(), item.getPreviousStream()));
// send directly to any non std output streams
if(exportsToOtherStreams(flow, flowInfo.getStreamName(), nextStream)) {
for (String output : flow.getStream(flowInfo.getStreamName()).getOutputs()) {
String outputStream = flow.getStream(output).getFlowOps().get(0).getComponentName();
collector.emit(outputStream, new Values(flow.getId(), item.getEvent(), -1, output, flowInfo.getStreamName()));
}
}
}
if(op.isClearOnTrigger())
window.clear();
}
window.resetTriggerTicks();
}
private SortedWindow buildWindow(String hash, SortOp op) {
Comparator<WindowItem> comparator = new EventSortByComparator(op.getSortBy());
return op.getEvictionPolicy() != Policy.COUNT ? new SortedWindow(hash, comparator, op.isClearOnTrigger()) :
new SortedWindow(hash, comparator, op.getEvictionThreshold(), op.isClearOnTrigger());
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
declareOutputStreams(outputFieldsDeclarer, fields);
}
}