package nl.us2.cloudpelican.stormprocessor;
/**
* Created by robin on 07/06/15.
*/
import backtype.storm.Config;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import nl.us2.timeseriesoutlierdetection.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import storm.starter.util.TupleHelpers;
import java.util.*;
/**
*
* @author robin
*/
public class OutlierDetectionBolt extends BaseRichBolt {
OutputCollector _collector;
private Settings settings;
private HashMap<String, Long> liveFilters;
private HashMap<String, Long> filterMaxTsAnalayzed;
private JsonParser jsonParser;
private List<ITimeserieAnalyzer> analyzers;
private long startTime;
private static final int MIN_UPTIME = 60; // Seconds before this class starts outlier detection
private static final int NUM_CORES = 4;
private static final Logger LOG = LoggerFactory.getLogger(OutlierDetectionBolt.class);
public OutlierDetectionBolt(Settings settings) {
super();
this.settings = settings;
}
public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
_collector = collector;
liveFilters = new HashMap<String, Long>();
jsonParser = new JsonParser();
filterMaxTsAnalayzed = new HashMap<String, Long>();
// Active analyzers
analyzers = new ArrayList<ITimeserieAnalyzer>();
analyzers.add(new NoopTimeserieAnalyzer());
analyzers.add(new NormalDistributionTimeserieAnalyzer());
analyzers.add(new LogNormalDistributionTimeserieAnalyzer());
analyzers.add(new SimpleRegressionTimeserieAnalyzer());
analyzers.add(new MovingAverageTimeserieAnalyzer());
analyzers.add(new PolynomialRegressionTimeserieAnalyzer());
analyzers.add(new IntervalInterceptorTimeserieAnalyzer());
analyzers.add(new RandomWalkRegressionTimeserieAnalyzer());
analyzers.add(new OneClassSVMTimeserieAnalyzer());
analyzers.add(new TimeBucketSimpleRegressionTimeserieAnalyzer());
analyzers.add(new MultipleLinearRegressionTimeserieAnalyzer());
analyzers.add(new SimpleExponentialSmoothingTimeserieAnalyzer());
// Start time
startTime = now();
}
protected long now() {
return new Date().getTime() / 1000L;
}
public void execute(Tuple tuple) {
if (TupleHelpers.isTickTuple(tuple)) {
executeTick();
} else {
executeTuple(tuple);
}
_collector.ack(tuple);
}
public void executeTick() {
// Only process after a while to prevent issues after a cold start (classifiers still learning, bumps in traffic, etc)
long uptime = now() - startTime;
if (uptime < MIN_UPTIME) {
LOG.info("Not running, uptime is " + uptime + " did not reach threshold of " + MIN_UPTIME + " second(s)");
return;
}
// Clean up list
ArrayList<String> toRemove = new ArrayList<String>();
long maxAge = new Date().getTime() - (1000 * 10 * 60); // Considered stale after X minutes
for (Map.Entry<String, Long> kv : liveFilters.entrySet()) {
if (kv.getValue() < maxAge) {
toRemove.add(kv.getKey());
}
}
for (String k : toRemove) {
liveFilters.remove(k);
LOG.info("Removed stale filter " + k);
}
// Run outlier checks
for (String filterId : liveFilters.keySet()) {
try {
_checkOutlier(filterId);
} catch (Exception e) {
LOG.error("Failed to check outliers of filter " + filterId, e);
}
}
}
protected void _checkOutlier(String filterId) throws Exception {
// Data
HttpGet getStats = new HttpGet(settings.get("supervisor_host") + "filter/" + filterId + "/stats");
String token = new String(Base64.encodeBase64((settings.get("supervisor_username") + ":" + settings.get("supervisor_password")).getBytes()));
getStats.addHeader("Authorization", "Basic " + token);
HttpClient client = HttpClientBuilder.create().build();
HttpResponse resp = client.execute(getStats);
String body = EntityUtils.toString(resp.getEntity());
JsonObject stats = jsonParser.parse(body).getAsJsonObject().get("stats").getAsJsonObject();
// Detect outliers
MutableDataLoader dl = new MutableDataLoader("fkh-" + filterId);
long now = new Date().getTime();
long unixTs = now / 1000L;
int timeResolution = 300; // in seconds
long unixTsBucket = unixTs - (unixTs % timeResolution);
long minTs = unixTsBucket - 24*3600; // x hours in past
int skipLastSeconds = 1*timeResolution;
long maxTs = unixTsBucket - skipLastSeconds; // Skip last X seconds
long dataMaxTs = Long.MIN_VALUE;
int dataPointCount = 0;
for (Map.Entry<String, JsonElement> kv : stats.entrySet()) {
String serieName = kv.getKey().equals("1") ? "regular" : "error";
for (Map.Entry<String, JsonElement> tskv : kv.getValue().getAsJsonObject().entrySet()) {
Long ts = Long.parseLong(tskv.getKey());
if (ts < minTs || ts >= maxTs) {
continue;
}
if (ts > dataMaxTs) {
dataMaxTs = ts;
}
dl.addData(serieName, tskv.getKey(), tskv.getValue().getAsString());
dataPointCount++;
}
}
// Do we have at least 10 datapoints?
if (dataPointCount < 10) {
return;
}
// Check dataMaxTs against local test to reduce overhead
long lastAnalyzed = filterMaxTsAnalayzed.getOrDefault(filterId, 0L);
if (dataMaxTs <= lastAnalyzed) {
// Do nothing
return;
}
filterMaxTsAnalayzed.put(filterId, dataMaxTs);
// Analyze
dl.setDesiredTimeResolution(timeResolution);
dl.setForecastPeriods(1);
dl.load();
dl.analyze(analyzers, NUM_CORES);
List<ValidatedTimeserieOutlier> outliers = dl.validate();
for (ValidatedTimeserieOutlier outlier : outliers) {
LOG.info("Filter " + filterId + " outlier at " + outlier.getTs() + " (" + new Date(outlier.getTs() * 1000L).toString() + ") score " + outlier.getScore());
_collector.emit("outliers", new Values(filterId, outlier.getTs(), outlier.getScore(), outlier.getDetails().toString()));
}
}
public Map<String, Object> getComponentConfiguration() {
Config conf = new Config();
int tickFrequencyInSeconds = 60;
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, tickFrequencyInSeconds);
return conf;
}
public void executeTuple(Tuple tuple) {
String filterId = tuple.getStringByField("filter_id");
liveFilters.put(filterId, new Date().getTime());
// No ack, is handled in outer
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("outliers", new Fields("filter_id", "timestamp", "score", "json_details"));
}
}