/*
* Copyright 2012 Nodeable Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamreduce.storm.bolts;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import com.streamreduce.Constants;
import com.streamreduce.core.event.EventId;
import com.streamreduce.queue.CamelFacade;
import org.apache.log4j.Logger;
/*
* The aggregation algorithm in JuggaloaderMessageGeneratorBolt groups things
* together by "targetConnectionId". But when these show up as "insight"
* messages in the stream, different types of metrics can appear in the same
* table, the "friendly name" will be the first column.
*
* Anomaly tuples don't get buffered, they get passed in to become messages
* right away.
*
* Other tuples are checked to ensure they have a targetConnectionId, if not they
* also get passed on.
*
* Otherwise they are bucketted by accountId + targetConnectionId and type (whether
* it's a status or summary). checkBuckets() decide when to flush them in which
* case it aggregates the tuples in the bucket and passes them to be "message'ed".
*/
public class JuggaloaderMessageGeneratorBolt extends NodeableUnreliableBolt {
private static Logger logger = Logger.getLogger(JuggaloaderMessageGeneratorBolt.class);
private static final long serialVersionUID = 476003176010598454L;
private int count = 0;
private Map<String, Object> buckets = new ConcurrentHashMap<>();
private Map<String, Long> accountState = new ConcurrentHashMap<>();
private Map<String, Object> aggregate(Map<String, Object> metric, MessageAggregationBucket bucket) {
ArrayList<Map<String, Object>> items = new ArrayList<>();
float total = 0.0f;
float diff = 0.0f;
for (Object object : bucket) {
Map<String, Object> item = (Map<String, Object>) object;
Map<String, Object> row = new HashMap<>();
row.put("metricCriteria", item.get("metricCriteria"));
row.put("name", item.get("name")); // ie, CONNECTION_ACTIVITY
if (item.containsKey("targetConnectionAlias")) {
row.put("targetConnectionAlias", item.get("targetConnectionAlias")); // ie, Nodeable Cloud
}
if (item.containsKey("targetAlias")) {
row.put("targetAlias", item.get("targetAlias")); // ie, i-a35034c7
}
//row.put("timestamp", item.get("timestamp"));
row.put("value", item.get("value"));
row.put("mean", item.get("mean"));
row.put("stddev", item.get("stddev"));
row.put("diff", item.get("diff"));
row.put("min", item.get("min"));
row.put("max", item.get("max"));
total += (Float) item.get("value");
diff += (Float) item.get("diff");
// insertion sort by stddev value
int idx = 0;
while (idx < items.size() && ((Float) (items.get(idx)).get("stddev") > (Float) row.get("stddev"))) {
idx += 1;
}
items.add(idx, row);
// items.add(row);
}
metric.put("created", bucket.getCreated());
metric.put("items", items);
metric.put("diff", diff); // across all the items
metric.put("total", total); // across all the items
return metric;
}
private void checkBuckets() {
Set<String> keys = buckets.keySet();
for (String key : keys) {
MessageAggregationBucket bucket = (MessageAggregationBucket) buckets.get(key);
if (bucket.isReady()) {
/*
* The first item in the bucket will be the metric used as the
* aggregated one. This has to be done here so no large ugly
* blob of redundant metadata is persisted to mongodb.
*/
Map<String, Object> first = (Map<String, Object>) bucket.get(0);
produce(aggregate(first, bucket));
buckets.remove(key);
}
}
}
private void bucketItem(String account, Map<String, Object> item) {
if (item.get("targetConnectionId") != null) {
String key = account + item.get("type") + item.get("targetConnectionId");
MessageAggregationBucket bucket = (MessageAggregationBucket) buckets.get(key);
if (bucket == null) {
bucket = new MessageAggregationBucket(account);
buckets.put(key, bucket);
}
bucket.add(item);
}
}
/**
* {@inheritDoc}
*/
@Override
public void realExecute(Tuple tuple) {
try {
this.count += 1;
long granularity = tuple.getLongByField("granularity");
boolean isAnomaly = tuple.getBooleanByField("anomaly");
String account = tuple.getStringByField("metricAccount");
String metricName = tuple.getStringByField("metricName");
float diff = tuple.getFloatByField("diff");
boolean acceptMinutes = false;
long now = System.currentTimeMillis();
Long accountBirth = (Long) accountState.get(account);
if (accountBirth == null) {
accountBirth = new Long(now);
accountState.put(account, accountBirth);
}
if ((now - accountBirth.longValue()) < (Constants.PERIOD_MINUTE * 15)) {
acceptMinutes = true;
}
if (
isAnomaly ||
!account.equals("global")
&& ( MetricsWhitelist.whitelist(metricName, (HashMap<String, String>) tuple.getValueByField("metricCriteria")) )
&& ( Math.abs(diff) > 0.001f )
&& (
(granularity == Constants.PERIOD_MINUTE && acceptMinutes) // when starting up, we dont want to wait for hourlies
|| granularity == Constants.PERIOD_HOUR // hourly status
|| granularity == Constants.PERIOD_DAY // daily summary
)
) {
Map<String, Object> metadata = (Map<String, Object>) tuple.getValueByField("metaData");
Map<String, String> criteria = (Map<String, String>) tuple.getValueByField("metricCriteria");
metadata.put("granularity", granularity);
metadata.put("account", account);
metadata.put("name", metricName);
metadata.put("timestamp", tuple.getLongByField("metricTimestamp"));
metadata.put("metricCriteria", criteria);
metadata.put("value", tuple.getFloatByField("metricValue"));
metadata.put("mean", tuple.getFloatByField("avgy"));
metadata.put("stddev", tuple.getFloatByField("stddev"));
metadata.put("diff", diff);
metadata.put("min", tuple.getFloatByField("min"));
metadata.put("max", tuple.getFloatByField("max"));
EventId mType = EventId.NODEBELLY_STATUS;
if (isAnomaly) {
mType = EventId.NODEBELLY_ANOMALY;
}
else if (granularity == Constants.PERIOD_DAY) {
mType = EventId.NODEBELLY_SUMMARY;
}
metadata.put("type", mType.toString());
if (isAnomaly) {
produce(metadata);
} else {
if (metadata.get("targetConnectionId") != null) {
bucketItem(account, metadata);
} /* causes SOBA-1962 else {
produce(metadata);
}
*/
}
}
// checked each time execute() is called, even if
// no new tuple was bucketed
checkBuckets();
} catch (Exception e) {
logger.error("Unknown exception type in JuggaloaderMessageGeneratorBolt " + e.getMessage(), e);
}
}
protected void produce(Map<String, Object> map) {
// SOBA-1663 - we used a minute-granularity above for a fake status
// nodebelly, change it back to an hourly
Long granularity = (Long) map.get("granularity");
String mType = (String) map.get("type");
if (! "NODEBELLY_ANOMALY".equals(mType) && granularity != null && granularity.longValue() == Constants.PERIOD_MINUTE ) {
map.put("granularity", Constants.PERIOD_HOUR);
}
logger.info("JuggaloaderMessageGeneratorBolt produce insight: " + mType);
CamelFacade.sendInsightMessage(map);
}
/**
* {@inheritDoc}
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// none
}
/**
* {@inheritDoc}
*/
@Override
public void cleanup() {
logger.info("JuggaloaderMessageGeneratorBolt saw: " + this.count);
}
}