package com.xavient.dip.storm.bolt;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.xavient.dip.storm.utils.MapOrdering;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class TopNLocationByTweets extends DataIngestionBolt {
private static final long serialVersionUID = 588968842105801987L;
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
super.prepare(stormConf, context, collector);
this.batchSize = rankMaxThreshold*3;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("tableName", "location", "count"));
}
@Override
protected void finishBatch() {
List<Tuple> tuples = new ArrayList<Tuple>();
queue.drainTo(tuples);
Map<String, Integer> counts = groupByField(tuples, "location");
List<Map.Entry<String, Integer>> lists = new ArrayList<>(counts.entrySet());
Collections.sort(lists, new MapOrdering());
lists = lists.size() > rankMaxThreshold ? lists.subList(0, rankMaxThreshold) : lists;
for (Map.Entry<String, Integer> entry : lists){
collector.emit(new Values("tweets_location", entry.getKey(), entry.getValue()));
}
}
private Map<String, Integer> groupByField(List<Tuple> tuples, String field) {
Map<String, Integer> counts = new HashMap<>();
for (Tuple tuple : tuples) {
if (counts.containsKey(tuple.getStringByField(field))) {
counts.put(tuple.getStringByField(field), counts.get(tuple.getStringByField(field)) + 1);
} else {
counts.put(tuple.getStringByField(field), tuple.getIntegerByField("count"));
}
collector.ack(tuple);
}
return counts;
}
}