package resa.examples.wc;
import backtype.storm.Config;
import backtype.storm.StormSubmitter;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.utils.Utils;
import resa.util.ConfigUtil;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
/**
* This topology demonstrates Storm's stream groupings and multilang
* capabilities.
*/
public class WordCountTopology {
public static class SplitSentence extends BaseBasicBolt {
private static final long serialVersionUID = 9182719848878455933L;
public SplitSentence() {
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String sentence = input.getStringByField("sentence");
StringTokenizer tokenizer = new StringTokenizer(sentence.replaceAll("\\p{P}|\\p{S}", " "));
while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken().trim();
if (!word.isEmpty()) {
collector.emit(Arrays.asList((Object) word.toLowerCase()));
}
}
// Utils.sleep(1000);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@Override
public void cleanup() {
System.out.println("Split cleanup");
}
}
public static class WordCount extends BaseBasicBolt {
private static final long serialVersionUID = 4905347466083499207L;
private int numBuckets = 6;
private Map<String, Integer> counters;
@Override
public void prepare(Map stormConf, TopologyContext context) {
super.prepare(stormConf, context);
counters = (Map<String, Integer>) context.getTaskData("words");
if (counters == null) {
counters = new HashMap<>();
context.setTaskData("words", counters);
}
int interval = Utils.getInt(stormConf.get(Config.TOPOLOGY_BUILTIN_METRICS_BUCKET_SIZE_SECS));
context.registerMetric("number-words", this::getNumWords, interval);
}
private long getNumWords() {
//counters.rotate();
return counters.size();
}
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String word = tuple.getStringByField("word");
Integer count = counters.get(word);
if (count == null) {
count = 0;
}
count++;
counters.put(word, count);
//collector.emit(new Values(word, count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
@Override
public void cleanup() {
System.out.println("Word Counter cleanup");
}
}
public static void main(String[] args) throws Exception {
Config conf = ConfigUtil.readConfig(new File(args[1]));
if (conf == null) {
throw new RuntimeException("cannot find conf file " + args[1]);
}
TopologyBuilder builder = new TopologyBuilder();
if (!ConfigUtil.getBoolean(conf, "spout.redis", false)) {
builder.setSpout("say", new RandomSentenceSpout(), ConfigUtil.getInt(conf, "spout.parallelism", 1));
} else {
String host = (String) conf.get("redis.host");
int port = ((Number) conf.get("redis.port")).intValue();
String queue = (String) conf.get("redis.queue");
builder.setSpout("say", new RedisSentenceSpout(host, port, queue),
ConfigUtil.getInt(conf, "spout.parallelism", 1));
}
builder.setBolt("split", new SplitSentence(), ConfigUtil.getInt(conf, "split.parallelism", 1))
.shuffleGrouping("say");
builder.setBolt("counter", new WordCount(), ConfigUtil.getInt(conf, "counter.parallelism", 1))
.fieldsGrouping("split", new Fields("word"));
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
}
}