package udacity.storm; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.testing.TestWordSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; import java.util.Map; /** * A bolt that parses the tweet into words */ public class ParseTweetBolt extends BaseRichBolt { // To output tuples from this bolt to the count bolt OutputCollector collector; @Override public void prepare( Map map, TopologyContext topologyContext, OutputCollector outputCollector) { // save the output collector for emitting tuples collector = outputCollector; } @Override public void execute(Tuple tuple) { // get the 1st column 'tweet' from tuple String tweet = tuple.getString(0); // provide the delimiters for splitting the tweet String delims = "[ .,?!]+"; // now split the tweet into tokens String[] tokens = tweet.split(delims); // for each token/word, emit it for (String token: tokens) { collector.emit(new Values(token)); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { // tell storm the schema of the output tuple for this spout // tuple consists of a single column called 'tweet-word' declarer.declare(new Fields("tweet-word")); } }