package eu.socialsensor.twcollect;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import eu.socialsensor.twcollect.util.FileUtil;
import twitter4j.FilterQuery;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.TwitterException;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.json.DataObjectFactory;
public class StreamCollector {
public static void main(String[] args) throws TwitterException, IOException{
StreamCollector collector = new StreamCollector();
collector.setMaxJsonFileSize(100*1024); // 100MB batches
collector.open("tweets.json");
long[] seeds = FileUtil.convertStringToLongs(
FileUtil.readTokensFromFile("seeds.txt"));
String[] keywords = FileUtil.readTokensFromFile("keywords.txt");
FilterQuery filter = new FilterQuery(seeds);
filter.track(keywords);
collector.startFilter(filter);
}
protected BufferedWriter writer = null;
protected StatusListener listener = null;
protected long maxJsonFileSize = 0; // (in KB) if >0, then the collector tries to create output json files
// of approximately that size (i.e. by creating multiple files)
protected long currentFileSize = 0; // counts the size of the currently opened file
protected int fileCounter = 0; // counts the number of files written so far
// set maximum JSON file size (in KB)
public void setMaxJsonFileSize(long fileSize){
this.maxJsonFileSize = fileSize;
}
protected void open(final String tweetDump) {
openWriter(tweetDump + ".0");
listener = new StatusListener(){
public void onStatus(Status status) {
try {
String line = DataObjectFactory.getRawJSON(status);
writer.append(line);
writer.newLine();
currentFileSize += line.length();
if (maxJsonFileSize > 0){
if (currentFileSize >= maxJsonFileSize*1024){
closeWriter();
fileCounter++;
currentFileSize = 0;
openWriter(tweetDump + "." + fileCounter);
}
}
} catch (IOException e){
e.printStackTrace();
closeWriter();
}
}
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}
public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}
public void onException(Exception ex) {
ex.printStackTrace();
}
@Override
public void onScrubGeo(long arg0, long arg1) {
}
@Override
public void onStallWarning(StallWarning arg0) {
//System.out.println(arg0.toString());
}
};
Runtime.getRuntime().addShutdownHook(new Shutdown(this));
}
protected void startFilter(FilterQuery filter){
TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
twitterStream.addListener(listener);
twitterStream.filter(filter);
}
protected void close(){
closeWriter();
}
protected void openWriter(String file){
try {
writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(file), FileUtil.UTF8));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
protected void closeWriter(){
if (writer != null){
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* Class in case system is shutdown: Responsible to close all services
* that are running at the time being.
*/
protected class Shutdown extends Thread {
StreamCollector process = null;
public Shutdown(StreamCollector process) {
this.process = process;
}
public void run() {
System.out.println("Shutting down collector...");
if (process != null) {
process.close();
}
System.out.println("Done...");
}
}
}