package com.realtimecep.pilots.analytics.sns.spouts.twitter.httpclient;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.DefaultHttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
/**
* Twitter Api Streaming Spout Class.
* <p/>
*
* @author <a href="iamtedwon@gmail.com">Ted Won</a>
* @version 0.1.0
*/
public class TwitterApiStreamingSpout extends BaseRichSpout implements Runnable {
private Logger logger = LoggerFactory.getLogger(TwitterApiStreamingSpout.class);
static String STREAMING_API_URL = "https://stream.twitter.com/1/statuses/filter.json?track=";
// static String STREAMING_API_URL = "https://stream.twitter.com/1.1/statuses/sample.json";
private String track;
private String user;
private String password;
private DefaultHttpClient client;
private SpoutOutputCollector collector;
private UsernamePasswordCredentials credentials;
private BasicCredentialsProvider credentialProvider;
LinkedBlockingQueue<String> tweets = new LinkedBlockingQueue<String>();
@Override
public void nextTuple() {
if (tweets.size() > 0) {
Collection<Object> tweetsToEmit = new ArrayList<Object>();
tweets.drainTo(tweetsToEmit);
for (Object tweet : tweetsToEmit) {
logger.trace("# nextTuple: {}", tweet);
collector.emit(new Values(track, tweet));
}
}
}
@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
int spoutsSize = context.getComponentTasks(context.getThisComponentId()).size();
int myIdx = context.getThisTaskIndex();
String[] tracks = ((String) conf.get("track")).split(",");
StringBuffer tracksBuffer = new StringBuffer();
for (int i = 0; i < tracks.length; i++) {
if (i % spoutsSize == myIdx) {
tracksBuffer.append(",");
tracksBuffer.append(tracks[i]);
}
}
if (tracksBuffer.length() == 0)
throw new RuntimeException("No track found for spout" +
" [spoutsSize:" + spoutsSize + ", tracks:" + tracks.length + "] the amount" +
" of tracks must be more then the spout paralellism");
this.track = tracksBuffer.substring(1).toString();
user = (String) conf.get("user");
password = (String) conf.get("password");
credentials = new UsernamePasswordCredentials(user, password);
credentialProvider = new BasicCredentialsProvider();
credentialProvider.setCredentials(AuthScope.ANY, credentials);
this.collector = collector;
new Thread(this).start();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("criteria", "tweet"));
}
@Override
public void run() {
/*
* Create the client call
*/
while (true) {
try {
client = new DefaultHttpClient();
client.setCredentialsProvider(credentialProvider);
HttpGet get = new HttpGet(STREAMING_API_URL + track);
// HttpGet get = new HttpGet(STREAMING_API_URL);
HttpResponse response;
try {
//Execute
response = client.execute(get);
StatusLine status = response.getStatusLine();
if (status.getStatusCode() == 200) {
InputStream inputStream = response.getEntity().getContent();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String in;
//Read line by line
while ((in = reader.readLine()) != null) {
//Parse and emit
tweets.add(in);
}
}
} catch (IOException e) {
logger.error("Error in communication with twitter api [" + get.getURI().toString() + "]");
try {
Thread.sleep(10000);
} catch (InterruptedException e1) {
}
}
} catch (Throwable e) {
logger.error("Exception Occurred: ", e);
}
logger.error("An error ocurs, all runnable thread will be restarted");
try {
Thread.sleep(1000);
} catch (Exception e) {
}
}
}
}