package nl.us2.cloudpelican.stormprocessor;
/**
* Created by robin on 07/06/15.
*/
import backtype.storm.Config;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.commons.codec.binary.Base64;
import org.apache.storm.http.HttpResponse;
import org.apache.storm.http.client.HttpClient;
import org.apache.storm.http.client.methods.HttpGet;
import org.apache.storm.http.impl.client.HttpClientBuilder;
import org.apache.storm.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import storm.starter.util.TupleHelpers;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import static backtype.storm.utils.Utils.DEFAULT_STREAM_ID;
/**
*
* @author robin
*/
public class MatchBolt extends BaseRichBolt {
OutputCollector _collector;
HashMap<String, Filter> filters;
JsonParser jsonParser;
private boolean localMode = false;
private String regex;
private Settings settings;
private static final Logger LOG = LoggerFactory.getLogger(MatchBolt.class);
public MatchBolt(Settings settings) {
super();
filters = null;
this.settings = settings;
this.regex = this.settings.get("match_regex");
}
public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
_collector = collector;
jsonParser = new JsonParser();
// Local mode
if (regex != null && !regex.trim().isEmpty()) {
LOG.info("Setting up local regex " + regex);
localMode = true;
JsonObject obj = new JsonObject();
String fakeId = UUID.randomUUID().toString();
obj.addProperty("id", fakeId);
obj.addProperty("regex", regex);
filters = new HashMap<String, Filter>();
filters.put(fakeId, new Filter(obj));
LOG.info("Setup up local regex " + regex);
}
}
public void execute(Tuple tuple) {
try {
if (TupleHelpers.isTickTuple(tuple)) {
executeTick();
} else {
executeTuple(tuple);
}
} catch (Exception e) {
LOG.error("Unexpected error in execute", e);
}
_collector.ack(tuple);
}
public void executeTick() {
// long start = new Date().getTime();
loadFilters();
// Once a minute
if (new Date().getTime()/1000L % 60 == 0) {
dispatchOutlierChecks();
}
// LOG.info("Tick took " + (new Date().getTime() - start));
}
protected void dispatchOutlierChecks() {
if (!Boolean.parseBoolean(settings.getOrDefault("outlier_detection_enabled", "true"))) {
return;
}
// long start = new Date().getTime();
for (Filter filter : getFilters().values()) {
_collector.emit("dispatch_outlier_checks", new Values(filter.Id()));
}
// LOG.info("Outliers dispatch took " + (new Date().getTime() - start));
}
protected void loadFilters() {
// Do not execute in local mode
if (localMode) {
return;
}
// Init
// long start = new Date().getTime();
if (filters == null) {
filters = new HashMap<String, Filter>();
}
// Load
boolean swapFilters = false;
try {
HashMap<String, Filter> tmp = new HashMap<String, Filter>();
HttpClient client = HttpClientBuilder.create()/*.setDefaultCredentialsProvider(credentialsProvider)*/.build();
String url = settings.get("supervisor_host") + "filter";
LOG.debug(url);
HttpGet get = new HttpGet(url);
String token = new String(Base64.encodeBase64((settings.get("supervisor_username") + ":" + settings.get("supervisor_password")).getBytes()));
LOG.debug(token);
get.setHeader("Authorization", "Basic " + token);
HttpResponse resp = client.execute(get);
String body = EntityUtils.toString(resp.getEntity());
LOG.debug(body);
JsonObject outer = jsonParser.parse(body).getAsJsonObject();
JsonArray arr = outer.get("filters").getAsJsonArray();
for (JsonElement elm : arr) {
try {
JsonObject filter = elm.getAsJsonObject();
Filter f = new Filter(filter);
if (!f.isValid()) {
continue;
}
if (!filters.containsKey(f.Id())) {
LOG.info("Loaded filter " + filter.toString());
swapFilters = true;
}
tmp.put(f.Id(), f);
} catch (Exception fe) {
LOG.error("Failed to load filter", fe);
fe.printStackTrace();
}
}
// Swap
if (swapFilters || filters.size() != tmp.size()) {
LOG.info("Swapping filter sets");
filters = tmp;
for (Filter filter : filters.values()) {
filter.compileRegex();
}
LOG.info("Compiled filters");
}
} catch (Exception e) {
LOG.error("Failed to load filters", e);
e.printStackTrace();
}
// LOG.info("Loading filters took " + (new Date().getTime() - start));
}
protected HashMap<String, Filter> getFilters() {
if (filters == null) {
loadFilters();
}
return filters;
}
public Map<String, Object> getComponentConfiguration() {
Config conf = new Config();
int tickFrequencyInSeconds = 1;
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, tickFrequencyInSeconds);
return conf;
}
public void executeTuple(Tuple tuple) {
String msg = tuple.getStringByField("_raw");
// Match filters
for (Filter filter : getFilters().values()) {
// @todo A lot of filters match case-insensitive, so executing 1 toLowerCase() for those who need it will improve efficiency as well
if (filter.matches(msg)) {
// Emit match
_collector.emit(DEFAULT_STREAM_ID, new Values(filter.Id(), tuple.getLongByField("ts"), msg)); // Message
_collector.emit("match_stats", new Values(filter.Id(), tuple.getLongByField("ts"), MetricsEnum.MATCH.getMask(), 1L)); // Counters
}
}
// No ack, is handled in outer
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("filter_id", "ts", "msg"));
declarer.declareStream("match_stats", new Fields("filter_id", "ts", "metric", "increment"));
if (Boolean.parseBoolean(settings.getOrDefault("outlier_detection_enabled", "true"))) {
declarer.declareStream("dispatch_outlier_checks", new Fields("filter_id"));
}
}
}