package resa.evaluation.topology.fp;
import backtype.storm.serialization.SerializableSerializer;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import com.esotericsoftware.kryo.DefaultSerializer;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.esotericsoftware.kryo.serializers.DefaultSerializers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import resa.util.ConfigUtil;
import java.io.Serializable;
import java.util.*;
/**
* Created by ding on 14-6-5.
*/
public class Detector extends BaseRichBolt implements Constant {
private static final Logger LOG = LoggerFactory.getLogger(Detector.class);
@DefaultSerializer(SerializableSerializer.class)
public static class Entry implements Serializable {
int count = 0;
boolean detectedBySelf;
int refCount = 0;
boolean flagMFPattern = false;
long timestamp;
public Entry(long timestamp) {
this.timestamp = timestamp;
}
public long setTimestamp(long timestamp) {
long tmp = this.timestamp;
this.timestamp = timestamp;
return tmp;
}
public void setDetectedBySelf(boolean detectedBySelf) {
this.detectedBySelf = detectedBySelf;
}
public boolean isDetectedBySelf() {
return detectedBySelf;
}
public void setMFPattern(boolean flag) {
this.flagMFPattern = flag;
}
public boolean isMFPattern() {
return this.flagMFPattern;
}
int getCount() {
return count;
}
int incCountAndGet() {
return ++count;
}
int decCountAndGet() {
return --count;
}
int getRefCount() {
return refCount;
}
int incRefCountAndGet() {
return ++refCount;
}
int decRefCountAndGet() {
return --refCount;
}
boolean hasReference() {
return this.refCount > 0;
}
boolean unused() {
return count <= 0 && refCount <= 0;
}
String reportCnt() {
return String.format(" cnt: %d, refCnt: %d", this.count, this.refCount);
}
}
private PatternDB patterns;
private int threshold;
private OutputCollector collector;
private List<Integer> targetTasks;
@DefaultSerializer(DefaultSerializers.KryoSerializableSerializer.class)
public static class PatternDB extends LinkedHashMap<WordList, Entry> implements KryoSerializable {
private long maxKeep;
public PatternDB(long maxKeep) {
super(65536, 0.75f, true);
this.maxKeep = maxKeep;
}
public PatternDB() {
this(Long.MAX_VALUE);
}
@Override
protected boolean removeEldestEntry(Map.Entry eldest) {
return System.currentTimeMillis() - ((Entry) eldest.getValue()).timestamp > maxKeep;
}
public void removeExpired(long now) {
for (Iterator<Map.Entry<WordList, Entry>> iter = entrySet().iterator(); iter.hasNext(); ) {
Map.Entry<WordList, Entry> e = iter.next();
if (now - e.getValue().timestamp > maxKeep) {
iter.remove();
} else {
return;
}
}
}
@Override
public void write(Kryo kryo, Output output) {
output.writeLong(maxKeep);
output.writeInt(size());
output.writeLong(System.currentTimeMillis());
forEach((k, v) -> {
kryo.writeClassAndObject(output, k);
kryo.writeClassAndObject(output, v);
});
LOG.info("write out {} patterns", size());
}
@Override
public void read(Kryo kryo, Input input) {
maxKeep = Long.MAX_VALUE;
long maxKeepTmp = input.readLong();
int size = input.readInt();
long last = input.readLong();
// rest timestamp
for (int i = 0; i < size; i++) {
WordList p = (WordList) kryo.readClassAndObject(input);
Entry entry = (Entry) kryo.readClassAndObject(input);
put(p, entry);
}
long toAdd = System.currentTimeMillis() - last + 10000;
forEach((k, v) -> v.setTimestamp(v.timestamp + toAdd));
maxKeep = maxKeepTmp;
LOG.info("read in {} patterns", size);
}
}
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
String patternData = "pattern";
this.patterns = (PatternDB) context.getTaskData(patternData);
if (this.patterns == null) {
long maxKeepInterval = ConfigUtil.getInt(stormConf, MAX_KEEP_PROP, 60000);
context.setTaskData(patternData, (this.patterns = new PatternDB(maxKeepInterval)));
}
this.collector = collector;
this.threshold = ConfigUtil.getInt(stormConf, THRESHOLD_PROP, 20);
targetTasks = context.getComponentTasks(context.getThisComponentId());
Collections.sort(targetTasks);
LOG.info("In Detector, threshold: " + threshold);
}
/////////////////// State Transition Graph, Implementation III/////////////////////////////
/// States: ( Cnt > Threshold ? , hasReference? | isMaxFreqPattern? ) Stable state ///
/// [ , | ] Temp State --> trigger update ///
/// Event and Output: Cnt (+/-), pattern count inc/dec (DefaultStream) ///
/// RefCnt (+/-), reference count update from (Feedback Stream) ///
/// Output to next bolt: *T*/*F* ///
/// ///
/// +--(Cnt-)---(F, F | F)<--(direct update, *F*)--[F, F | T] ///
/// | | ^ ^ ///
/// | (RefCnt-)| | (RefCnt+) (Cnt-)| ///
/// | V | | ///
/// | (F, T | F ) (T, F | T)<--------------+ ///
/// | | ^ | | ///
/// | (Cnt-)| | (Cnt+) |(RefCnt+) | ///
/// | V | V | ///
/// | (T, T | F)<--(direct update, *F*)--[T, T | T] | ///
/// | | ^ | ///
/// | (RefCnt-)| | (RefCnt+) | ///
/// | V | | ///
/// +---------->[T, F | F ]----------->(direct update, *T*)-----------------+ ///
/// ///
///////////////////////////////////////////////////////////////////////////////////////////
@Override
public void execute(Tuple input) {
//doneTODO:
//WordList pattern = (WordList) input.getValueByField(PATTERN_FIELD);
final long now = System.currentTimeMillis();
ArrayList<WordList> wordListArrayList = (ArrayList<WordList>) input.getValueByField(PATTERN_FIELD);
wordListArrayList.forEach((pattern) -> {
Entry entry = patterns.computeIfAbsent(pattern, (k) -> new Entry(now));
if (!input.getSourceStreamId().equals(FEEDBACK_STREAM)) {
///Pattern Count Stream, only affect pattern count
///We only change IncRef and DecRef at two events: [++count == threshold] and [--count == threshold-1]
if (input.getBooleanByField(IS_ADD_FIELD)) {
entry.incCountAndGet();
LOG.debug(
"In DetectorNew(default), cntInc: " + pattern + "," + entry.reportCnt());
if (entry.getCount() == threshold) {
incRefToSubPatternExcludeSelf(pattern.getWords(), collector, input);
LOG.debug(
"In DetectorNew(default), cntInc: " + pattern + ",satisfy thresh and incRef");
}
} else {
entry.decCountAndGet();
LOG.debug(
"In DetectorNew(default), cntDec: " + pattern + "," + entry.reportCnt());
if (entry.getCount() == threshold - 1) {
decRefToSubPatternExcludeSelf(pattern.getWords(), collector, input);
LOG.debug(
"In DetectorNew(default), cntDec: " + pattern + ",dissatisfy thresh and decRef");
}
}
///We separate the action of update refCount and update states
if (!entry.isMFPattern()) {///entry.isMFPattern == false
if (entry.getCount() >= threshold && !entry.hasReference()) {
///State (F, F | F) -> (T, F | T),
///State (T, F | F) -> (T, F | T),
///[output pattern, T]
entry.setMFPattern(true);
collector.emit(REPORT_STREAM, input, Arrays.asList(pattern, true));
LOG.debug(
"In DetectorNew(default), set isMFP" + pattern + "," + entry.reportCnt());
}
} else {///entry.isMFPattern == true
if (entry.hasReference() || (!entry.hasReference() && entry.getCount() < threshold)) {
///State (T, T | T) -> (F, T | F)
///State (T, T | T) -> (T, T | F)
///State (T, F | T) -> (F, F | F)
///[output pattern, F]
entry.setMFPattern(false);
collector.emit(REPORT_STREAM, input, Arrays.asList(pattern, false));
LOG.debug(
"In DetectorNew(default), cancel isMFP" + pattern + "," + entry.reportCnt());
}
}
} else {
///Feedback_STREAM, only affect refCount, also check states.
///State (F, F | F) <--> (F, T | F)
///State (T, F | F) <--> (T, T | F)
///State (T, F | T) <--> (T, T | T)
if (input.getBooleanByField(IS_ADD_FIELD)) {
entry.incRefCountAndGet();
LOG.debug(
"In DetectorNew(FB), incReferenceCnt: " + pattern + ", " + entry.reportCnt());
} else {
entry.decRefCountAndGet();
LOG.debug(
"In DetectorNew(FB), DecReferenceCnt: " + pattern + ", " + entry.reportCnt());
}
///TODO: please double check if we need to anchor tuple here?
///update states
if (entry.hasReference() && entry.isMFPattern()) {
///State [*, T | T] --> (*, T | F), update states and output F
entry.setMFPattern(false);
collector.emit(REPORT_STREAM, input, Arrays.asList(pattern, false));
LOG.debug(
"In DetectorNew(FB), cancel isMFP: " + pattern + ", " + entry.reportCnt());
} else if (!entry.hasReference() && !entry.isMFPattern() && entry.getCount() >= threshold) {
///State [T, F | F] --> (T, F | T) update states and output T
entry.setMFPattern(true);
collector.emit(REPORT_STREAM, input, Arrays.asList(pattern, true));
LOG.debug(
"In DetectorNew(FB), set isMFP" + pattern + "," + entry.reportCnt());
}
}
if (entry.unused()) {
patterns.remove(pattern);
} else {
entry.setTimestamp(now);
}
});
patterns.removeExpired(now);
sleep(wordListArrayList.size());
collector.ack(input);
}
private static void sleep(long t) {
long t1 = System.currentTimeMillis();
do {
for (int i = 0; i < 10; i++) {
Math.atan(Math.sqrt(Math.random() * Integer.MAX_VALUE));
}
} while (System.currentTimeMillis() - t1 < t);
}
private void incRefToSubPatternExcludeSelf(int[] wordIds, OutputCollector collector, Tuple input) {
adjRefToSubPatternExcludeSelf(wordIds, collector, input, true);
}
private void decRefToSubPatternExcludeSelf(int[] wordIds, OutputCollector collector, Tuple input) {
adjRefToSubPatternExcludeSelf(wordIds, collector, input, false);
}
private void adjRefToSubPatternExcludeSelf(int[] wordIds, OutputCollector collector, Tuple input, boolean adj) {
int n = wordIds.length;
int[] buffer = new int[n];
ArrayList<WordList>[] wordListForTargetTask = new ArrayList[targetTasks.size()];
///Note that here we exclude itself as one of the sub-patterns
///for (int i = 1; i < (1 << n); i++) {
for (int i = 1; i < (1 << n) - 1; i++) {
int k = 0;
for (int j = 0; j < n; j++) {
if ((i & (1 << j)) > 0) {
buffer[k++] = wordIds[j];
}
}
//doneTODO:
//collector.emit(FEEDBACK_STREAM, input, Arrays.asList(new WordList(Arrays.copyOf(buffer, k)), adj));
WordList wl = new WordList(Arrays.copyOf(buffer, k));
int targetIndex = WordList.getPartition(targetTasks.size(), wl);
if (wordListForTargetTask[targetIndex] == null) {
wordListForTargetTask[targetIndex] = new ArrayList<>();
}
wordListForTargetTask[targetIndex].add(wl);
}
for (int i = 0; i < wordListForTargetTask.length; i++) {
if (wordListForTargetTask[i] != null && wordListForTargetTask[i].size() > 0) {
collector.emitDirect(
targetTasks.get(i),
FEEDBACK_STREAM,
input, Arrays.asList(wordListForTargetTask[i], adj));
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(REPORT_STREAM, new Fields(PATTERN_FIELD, IS_ADD_MFP));
//declarer.declareStream(FEEDBACK_STREAM, new Fields(PATTERN_FIELD, IS_ADD_FIELD));
//doneTODO: add true for direct grouping
declarer.declareStream(FEEDBACK_STREAM, true, new Fields(PATTERN_FIELD, IS_ADD_FIELD));
}
}