package udacity.storm;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import twitter4j.conf.ConfigurationBuilder;
import twitter4j.FilterQuery;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.StallWarning;
import twitter4j.URLEntity;
import udacity.storm.tools.SentimentAnalyzer;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.Pattern;
/**
* A spout that uses Twitter streaming API for continuously
* getting tweets
*/
public class TweetSpout extends BaseRichSpout
{
// Twitter API authentication credentials
String custkey, custsecret;
String accesstoken, accesssecret;
// To output tuples from spout to the next stage bolt
SpoutOutputCollector collector;
// Twitter4j - twitter stream to get tweets
TwitterStream twitterStream;
// Shared queue for getting buffering tweets received
LinkedBlockingQueue<String> queue = null;
Pattern moodPattern = Pattern.compile("love|hate|happy|angry|sad");
Pattern properPattern = Pattern.compile("^[a-zA-Z0-9 ]+$");
// Class for listening on the tweet stream - for twitter4j
private class TweetListener implements StatusListener {
// Implement the callback function when a tweet arrives
@Override
public void onStatus(Status status)
{
// add the tweet into the queue buffer
String geoInfo = "37.7833,122.4167";
String urlInfo = "n/a";
if(status.getGeoLocation() != null)
{
geoInfo = String.valueOf(status.getGeoLocation().getLatitude()) + "," + String.valueOf(status.getGeoLocation().getLongitude());
if(status.getURLEntities().length > 0)
{
for(URLEntity urlE: status.getURLEntities())
{
urlInfo = urlE.getURL();
}
}
queue.offer(status.getText() + "DELIMITER" + geoInfo + "DELIMITER" + urlInfo);
}
}
@Override
public void onDeletionNotice(StatusDeletionNotice sdn)
{
}
@Override
public void onTrackLimitationNotice(int i)
{
}
@Override
public void onScrubGeo(long l, long l1)
{
}
@Override
public void onStallWarning(StallWarning warning)
{
}
@Override
public void onException(Exception e)
{
e.printStackTrace();
}
};
/**
* Constructor for tweet spout that accepts the credentials
*/
public TweetSpout(
String key,
String secret,
String token,
String tokensecret)
{
custkey = key;
custsecret = secret;
accesstoken = token;
accesssecret = tokensecret;
}
@Override
public void open(
Map map,
TopologyContext topologyContext,
SpoutOutputCollector spoutOutputCollector)
{
// create the buffer to block tweets
queue = new LinkedBlockingQueue<String>(1000);
SentimentAnalyzer.init();
// save the output collector for emitting tuples
collector = spoutOutputCollector;
// build the config with credentials for twitter 4j
ConfigurationBuilder config =
new ConfigurationBuilder()
.setOAuthConsumerKey(custkey)
.setOAuthConsumerSecret(custsecret)
.setOAuthAccessToken(accesstoken)
.setOAuthAccessTokenSecret(accesssecret);
// create the twitter stream factory with the config
TwitterStreamFactory fact =
new TwitterStreamFactory(config.build());
// get an instance of twitter stream
twitterStream = fact.getInstance();
FilterQuery tweetFilterQuery = new FilterQuery(); // See
tweetFilterQuery.locations(new double[][]{new double[]{-124.848974,24.396308},
new double[]{-66.885444,49.384358
}});
tweetFilterQuery.language(new String[]{"en"});
// provide the handler for twitter stream
twitterStream.addListener(new TweetListener());
twitterStream.filter(tweetFilterQuery);
// start the sampling of tweets
twitterStream.sample();
}
@Override
public void nextTuple()
{
// try to pick a tweet from the buffer
String ret = queue.poll();
String geoInfo;
String originalTweet;
// if no tweet is available, wait for 50 ms and return
if (ret==null)
{
Utils.sleep(50);
return;
}
else
{
geoInfo = ret.split("DELIMITER")[1];
originalTweet = ret.split("DELIMITER")[0];
}
if(geoInfo != null && !geoInfo.equals("n/a"))
{
System.out.print("\t DEBUG SPOUT: BEFORE SENTIMENT \n");
int sentiment = SentimentAnalyzer.findSentiment(originalTweet)-2;
System.out.print("\t DEBUG SPOUT: AFTER SENTIMENT (" + String.valueOf(sentiment) + ") for \t" + originalTweet + "\n");
collector.emit(new Values(ret, sentiment));
}
}
@Override
public void close()
{
// shutdown the stream - when we are going to exit
twitterStream.shutdown();
}
/**
* Component specific configuration
*/
@Override
public Map<String, Object> getComponentConfiguration()
{
// create the component config
Config ret = new Config();
// set the parallelism for this spout to be 1
ret.setMaxTaskParallelism(1);
return ret;
}
@Override
public void declareOutputFields(
OutputFieldsDeclarer outputFieldsDeclarer)
{
// tell storm the schema of the output tuple for this spout
// tuple consists of a single column called 'tweet'
outputFieldsDeclarer.declare(new Fields("tweet", "sentiment"));
}
}