package eu.socialsensor.twcollect;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* Container class to encapsulate the TweetFields and information about
* the response of twitter.com
* @author kleinmind
*
*/
public class TweetFieldsResponse {
public static void main(String[] args) {
String responseFile = "D:/socialsensor/code/twitter-dataset-collector/snow14_devset_tweets_v2.txt";//"responses.txt";//"D:/socialsensor/data/uselections_tweets/aggregate.txt";//"add a file created from TweetCorpusDownloader";
TweetFieldsResponse.reportResults(responseFile);
}
private final TweetFields tweet;
private final int status;
private final int msecSpent;
private final boolean parsingError;
private final boolean otherError;
private final boolean suspended; // the twitter account is suspended
// constructor to call when some HTTP response is available
public TweetFieldsResponse(TweetFields tweet, int status, int msecSpent){
this(tweet, status, msecSpent, false, false, false);
}
// constructor to call if something went wrong and no HTTP response is available
public TweetFieldsResponse(TweetFields tweet, int status,
int msecSpent, boolean suspended, boolean parsingError, boolean otherError){
this.tweet = tweet;
this.status = status;
this.msecSpent = msecSpent;
this.suspended = suspended;
this.parsingError = parsingError;
this.otherError = otherError;
}
// Getters
public TweetFields getTweet() {
return tweet;
}
public int getStatus() {
return status;
}
public int getMsecSpent() {
return msecSpent;
}
public boolean isSuspended(){
return suspended;
}
public boolean isParseError(){
return parsingError;
}
public boolean isOtherError(){
return otherError;
}
private static String SEPARATOR = "\t";
@Override
public String toString() {
// serialize in a single line
return status + SEPARATOR + suspended + SEPARATOR + parsingError +
SEPARATOR + otherError + SEPARATOR + msecSpent + SEPARATOR + tweet;
}
// de-serialize from an appropriately formatted String
public static TweetFieldsResponse fromString(String tweetFieldsResponseInLine){
String[] parts = tweetFieldsResponseInLine.split(SEPARATOR);
if (parts[7].equals("null")){
// tweet is not available
return new TweetFieldsResponse(new TweetFields(parts[6], null, null, null), Integer.parseInt(parts[0]),
Integer.parseInt(parts[4]), Boolean.parseBoolean(parts[1]),
Boolean.parseBoolean(parts[2]), Boolean.parseBoolean(parts[3]));
} else {
// recreate TweetFields string and parse it with the utility method of TweetFields
TweetFields tweetFields = TweetFields.fromString(
parts[5] + SEPARATOR + parts[6] +
SEPARATOR + parts[7] + SEPARATOR + parts[8] + SEPARATOR +
parts[9] + SEPARATOR + parts[10] + SEPARATOR + parts[11] + SEPARATOR + parts[12]);
return new TweetFieldsResponse(tweetFields, Integer.parseInt(parts[0]),
Integer.parseInt(parts[4]), Boolean.parseBoolean(parts[1]),
Boolean.parseBoolean(parts[2]), Boolean.parseBoolean(parts[3]));
}
}
public String responseSummary(){
if (tweet == null){
return status + " " + tweet.getId() + " " + msecSpent + "msecs";
} else {
return status + " " + "Fail" + " " + msecSpent + "msecs";
}
}
// utility methods
// reads only tweets that have been successfully downloaded
// as well as tweets that were removed or suspended (since there is no chance
// of them being downloaded)
public static Set<String> readIds(String responseLogFile){
return readIds(responseLogFile, 1000);
}
// return the ids of tweets that satisfy the conditions of code:
// - code = 200 -> response.getStatus() == 200
// - code = 404 -> response.getStatus() == 404
// - code = 0 -> user is suspended
// - code = -1 -> is parse error
// - code = -2 -> is other error
// - code = 1000 -> tweets whose ids have been checked, meaning either code 200 or 404 or user suspended
public static Set<String> readIds(String responseLogFile, final int code){
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(responseLogFile));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
Set<String> ids = new HashSet<String>();
String line = null;
try {
while ((line = reader.readLine()) != null){
TweetFieldsResponse response = TweetFieldsResponse.fromString(line);
switch (code){
case -2:
if (response.isOtherError()){
ids.add(response.getTweet().getId());
}
break;
case -1:
if (response.isParseError()){
ids.add(response.getTweet().getId());
}
break;
case 0:
if (response.isSuspended()){
ids.add(response.getTweet().getId());
}
break;
case 200:
if (response.getStatus() == 200){
ids.add(response.getTweet().getId());
}
break;
case 404:
if (response.getStatus() == 404){
ids.add(response.getTweet().getId());
}
break;
case 1000:
if ((response.getStatus() == 200) ||
(response.getStatus() == 404) ||
response.isSuspended()){
ids.add(response.getTweet().getId());
}
break;
default:
// add all ids
ids.add(response.getTweet().getId());
break;
}
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return ids;
}
public static void reportResults(String responseLogFile){
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(responseLogFile));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
int countSuccess = 0;
int countOriginal = 0;
int countRetweets = 0;
int countResponses = 0;
int countSuspended = 0;
int countParseErrors = 0;
int countOtherErrors = 0;
double totalMsec = 0.0;
Map<Integer,Integer> statusMap = new HashMap<Integer,Integer>();
int nrResponses = 0;
String line = null;
try {
while ((line = reader.readLine()) != null){
TweetFieldsResponse response = TweetFieldsResponse.fromString(line);
nrResponses++;
if ((!response.isSuspended()) && (!response.isParseError()) &&
(!response.isOtherError())){ countSuccess++; }
if (response.isSuspended()){ countSuspended++; }
if (response.isParseError()){ countParseErrors++; }
if (response.isOtherError()){ countOtherErrors++; }
if (response.getTweet().isRetweeet()) {
countRetweets++;
}
if (response.getTweet().isReply()) {
countResponses++;
}
if (response.getStatus() == 200 && (!response.getTweet().isRetweeet()
&& response.getTweet().getText()!=null)){
countOriginal++;
}
Integer statusInMap = statusMap.get(response.getStatus());
if (statusInMap == null){
statusMap.put(response.getStatus(), 1);
} else {
statusMap.put(response.getStatus(), statusInMap+1);
}
totalMsec += response.getMsecSpent();
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Success(%): " + (100.0*countSuccess)/nrResponses);
System.out.println("Avg. response time: " + totalMsec/nrResponses + "msecs");
System.out.println("Total: " + nrResponses);
System.out.println("Original: " + countOriginal);
System.out.println("Retweets: " + countRetweets);
System.out.println("Responses: " + countResponses);
System.out.println("Suspended: " + countSuspended);
System.out.println("Parse errors: " + countParseErrors);
System.out.println("Other errors: " + countOtherErrors);
for (Entry<Integer, Integer> entry : statusMap.entrySet()){
System.out.println("Status " + entry.getKey() + ": " + entry.getValue());
}
}
}