package storm.applications.bolt;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import static storm.applications.constants.MachineOutlierConstants.*;
import storm.applications.model.metadata.MachineMetadata;
import storm.applications.util.math.Entropy;
import storm.applications.util.math.MaximumLikelihoodNormalDistribution;
public class DataInstancesScoreBolt extends AbstractBolt {
private long previousTimestamp;
private Map<Double, List<String>> histogram; // histogram for a batch of data
// instances.
private int totalCountInBatch; // total count for a batch of data instances.
@Override
public void initialize() {
previousTimestamp = 0;
histogram = new HashMap<>();
totalCountInBatch = 0;
}
@Override
public void execute(Tuple input) {
long curTimestamp = input.getLongByField(Field.TIMESTAMP);
String machineIp = input.getStringByField(Field.ID);
if (curTimestamp != previousTimestamp && totalCountInBatch != 0) {
// score data instances of previous batch
MaximumLikelihoodNormalDistribution mlnd = new MaximumLikelihoodNormalDistribution(
totalCountInBatch, histogram);
double minIdle = Double.MAX_VALUE;
for (Double v : histogram.keySet()) {
if (v < minIdle) {
minIdle = v;
}
}
double entropy = Entropy.calculateEntropyNormalDistribution(mlnd.getSigma());
StringBuilder blankLines = new StringBuilder();
for (int i = 0; i < 20; ++i) {
blankLines.append("\n");
}
// emit to stream score bolt
Set<Double> keySet = histogram.keySet();
for (double key : keySet) {
List<String> entityList = histogram.get(key);
String firstEntity = entityList.remove(0);
// estimate parameters for leave-one-out histogram
MaximumLikelihoodNormalDistribution ml = new MaximumLikelihoodNormalDistribution(
totalCountInBatch - 1, histogram);
double leaveOneOutEntropy = Entropy.calculateEntropyNormalDistribution(ml.getSigma());
double entropyReduce = entropy - leaveOneOutEntropy;
entropyReduce = entropyReduce > 0 ? entropyReduce : 0;
double score = entropyReduce * totalCountInBatch;
// put the removed one back to histogram
entityList.add(firstEntity);
for (String entityId : entityList) {
collector.emit(new Values(entityId, curTimestamp, score));
}
}
histogram.clear();
totalCountInBatch = 0;
previousTimestamp = curTimestamp;
}
MachineMetadata machineMetaData = (MachineMetadata) input.getValue(2);
double idleTime = machineMetaData.getCpuIdleTime();
idleTime = idleTime > 0 ? idleTime / 100000 * 100000 : 0;
List<String> instancesList = histogram.get(idleTime);
if (instancesList == null) {
instancesList = new ArrayList<>();
}
instancesList.add(machineIp);
histogram.put(idleTime, instancesList);
++totalCountInBatch;
}
@Override
public Fields getDefaultFields() {
return new Fields(Field.ENTITY_ID, Field.TIMESTAMP, Field.DATAINST_SCORE);
}
}