package udacity.storm;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
class TopNTweetTopology
{
public static void main(String[] args) throws Exception
{
//Variable TOP_N number of words
int TOP_N = 5;
// create the topology
TopologyBuilder builder = new TopologyBuilder();
/*
* In order to create the spout, you need to get twitter credentials
* If you need to use Twitter firehose/Tweet stream for your idea,
* create a set of credentials by following the instructions at
*
* https://dev.twitter.com/discussions/631
*
*/
// now create the tweet spout with the credentials
// credential
TweetSpout tweetSpout = new TweetSpout(
"",
"",
"",
""
);
// attach the tweet spout to the topology - parallelism of 1
builder.setSpout("tweet-spout", tweetSpout, 1);
// attach the parse tweet bolt using shuffle grouping
builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
builder.setBolt("infoBolt", new InfoBolt(), 10).fieldsGrouping("parse-tweet-bolt", new Fields("county_id"));
builder.setBolt("top-words", new TopWords(), 10).fieldsGrouping("infoBolt", new Fields("county_id"));
builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("top-words");
// attach rolling count bolt using fields grouping - parallelism of 5
//builder.setBolt("rolling-count-bolt", new RollingCountBolt(1000, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
//from incubator-storm/.../storm/starter/RollingTopWords.java
//builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N, 10), 2).fieldsGrouping("rolling-count-bolt", new Fields("obj"));
//builder.setBolt("total-ranker", new TotalRankingsBolt(TOP_N, 2)).globalGrouping("intermediate-ranker");
/*
* total-ranker bolt output is broadcast (allGrouping) to all the top-tweets bolt instances so
* that every one of them have access to the top hashtags
* tweet-spout tweet stream will be distributed randomly to the top-tweets bolt instances
*/
//builder.setBolt("top-tweets", new TweetsWithTopHashtagsBolt(), 4)
// .allGrouping("total-ranker")
// .shuffleGrouping("tweet-spout");
// attach the report bolt using global grouping - parallelism of 1
//builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("top-tweets");
// create the default config object
Config conf = new Config();
// set the config in debugging mode
conf.setDebug(true);
if (args != null && args.length > 0) {
// run it in a live cluster
// set the number of workers for running all spout and bolt tasks
conf.setNumWorkers(3);
// create the topology and submit with config
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} else {
// run it in a simulated local cluster
// set the number of threads to run - similar to setting number of workers in live cluster
conf.setMaxTaskParallelism(4);
// create the local cluster instance
LocalCluster cluster = new LocalCluster();
// submit the topology to the local cluster
cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
// let the topology run for 300 seconds. note topologies never terminate!
Utils.sleep(300000000);
// now kill the topology
cluster.killTopology("tweet-word-count");
// we are done, so shutdown the local cluster
cluster.shutdown();
}
}
}