package org.p7h.storm.sentimentanalysis.bolts; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import com.google.common.base.Charsets; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.io.Resources; import org.p7h.storm.sentimentanalysis.utils.Constants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import twitter4j.Status; import twitter4j.URLEntity; import java.io.IOException; import java.net.URL; import java.util.List; import java.util.Map; import java.util.SortedMap; /** * Breaks each tweet into words and calculates the sentiment of each tweet and assocaites the sentiment value to the State * and logs the same to the console and also logs to the file. * * @author - Prashanth Babu */ public final class SentimentCalculatorBolt extends BaseRichBolt { private static final Logger LOGGER = LoggerFactory.getLogger(SentimentCalculatorBolt.class); private static final long serialVersionUID = 1942195527233725767L; private OutputCollector _outputCollector; private SortedMap<String,Integer> afinnSentimentMap = null; private SortedMap<String,Integer> stateSentimentMap = null; public SentimentCalculatorBolt(){ } @Override public final void prepare(final Map map, final TopologyContext topologyContext, final OutputCollector outputCollector) { afinnSentimentMap = Maps.newTreeMap(); stateSentimentMap = Maps.newTreeMap(); this._outputCollector = outputCollector; //Bolt will read the AFINN Sentiment file [which is in the classpath] and stores the key, value pairs to a Map. try { final URL url = Resources.getResource(Constants.AFINN_SENTIMENT_FILE_NAME); final String text = Resources.toString(url, Charsets.UTF_8); final Iterable<String> lineSplit = Splitter.on("\n").trimResults().omitEmptyStrings().split(text); List<String> tabSplit; for (final String str: lineSplit) { tabSplit = Lists.newArrayList(Splitter.on("\t").trimResults().omitEmptyStrings().split(str)); afinnSentimentMap.put(tabSplit.get(0), Integer.parseInt(tabSplit.get(1))); } } catch (final IOException ioException) { LOGGER.error(ioException.getMessage(), ioException); ioException.printStackTrace(); //Should not occur. If it occurs, we cant continue. So, exiting at this point itself. System.exit(1); } } @Override public final void declareOutputFields(final OutputFieldsDeclarer outputFieldsDeclarer) { outputFieldsDeclarer.declare(new Fields("stateCode", "sentiment")); } @Override public final void execute(final Tuple input) { final String state = (String) input.getValueByField("state"); final Status status = (Status) input.getValueByField("tweet"); final int sentimentOfCurrentTweet = getSentimentOfTweet(status); Integer previousSentiment = stateSentimentMap.get(state); previousSentiment = (null == previousSentiment) ? sentimentOfCurrentTweet : previousSentiment + sentimentOfCurrentTweet; stateSentimentMap.put(state, previousSentiment); //int stateId = Constants.MAP_STATE_CODE_ID.get(state); _outputCollector.emit(new Values(state, previousSentiment)); LOGGER.info("{}:{}", state, previousSentiment); } /** * Gets the sentiment of the current tweet. * * @param status -- Status Object. * @return sentiment of the current tweet. */ private final int getSentimentOfTweet(final Status status) { //Remove all punctuation and new line chars in the tweet. final String tweet = status.getText().replaceAll("\\p{Punct}|\\n", " ").toLowerCase(); //Splitting the tweet on empty space. final Iterable<String> words = Splitter.on(' ') .trimResults() .omitEmptyStrings() .split(tweet); int sentimentOfCurrentTweet = 0; //Loop thru all the wordsd and find the sentiment of this tweet. for (final String word : words) { if(afinnSentimentMap.containsKey(word)){ sentimentOfCurrentTweet += afinnSentimentMap.get(word); } } //LOGGER.debug("Tweet : Sentiment {} ==> {}", tweet, sentimentOfCurrentTweet); return sentimentOfCurrentTweet; } //Ideally we should be knocking off the URLs from the tweet since they don't need to parsed. private String filterOutURLFromTweet(final Status status) { final String tweet = status.getText(); final URLEntity[] urlEntities = status.getURLEntities(); int startOfURL; int endOfURL; String truncatedTweet = ""; for(final URLEntity urlEntity: urlEntities){ startOfURL = urlEntity.getStart(); endOfURL = urlEntity.getEnd(); truncatedTweet += tweet.substring(0, startOfURL) + tweet.substring(endOfURL); } return truncatedTweet; } }