import java.net.*;
import java.io.*;
import java.util.*;
import qa.qcri.aidr.common.code.Configurator;
import qa.qcri.aidr.predict.classification.nominal.NominalLabelBC;
import qa.qcri.aidr.predict.common.TaggerConfigurationProperty;
import qa.qcri.aidr.predict.common.TaggerConfigurator;
import qa.qcri.aidr.predict.data.DocumentJSONConverter;
import au.com.bytecode.opencsv.CSVReader;
/**
* Test class that reads labeled training data from a file and sends each item
* to the AIDR pipeline.
*
* To use with the Sandy dataset, configure attributes and labels in a
* deployment and modify the labels and attribute IDs in processSandyData() to
* match the configuration.
*
* @author jrogstadius
*/
public class HttpBatchImportSandy {
static String host = "localhost";
static int port;
static {
Configurator testConfigurator = TaggerConfigurator.getInstance();
testConfigurator.initProperties(TaggerConfigurator.configLoadFileName,
TaggerConfigurationProperty.values());
port = Integer.parseInt(testConfigurator
.getProperty(TaggerConfigurationProperty.HTTP_INPUT_PORT));
}
static Socket socket;
static BufferedReader serverIn;
static PrintWriter serverOut;
static int messageID = 0;
public static void main(String[] args) throws Exception {
System.out.println("ClientTest");
sendMessages();
}
public static void sendMessages() throws Exception {
// Create socket connection
socket = new Socket(host, port);
serverOut = new PrintWriter(socket.getOutputStream(), true);
serverIn = new BufferedReader(new InputStreamReader(
socket.getInputStream()));
// processTweetsFromFile(5, "..\\1k_random_tweets.txt");
processSandyData();
socket.close();
}
public static void getUserInput() {
Scanner scanner = new Scanner(System.in);
String line = "";
while (!(line = scanner.nextLine()).equals("exit")) {
serverOut.println(line);
}
scanner.close();
}
// Reads tweets from a file in which each line is one json-formatted tweet
public static void processJsonTweetsFromFile(String crisisCode,
int crisisID, String filename) throws Exception {
Scanner scanner = new Scanner(System.in);
BufferedReader br;
br = new BufferedReader(new FileReader(filename));
String tweet = br.readLine();
while (tweet != null) {
sendTweetToServer(crisisCode, crisisID, tweet, null);
scanner.nextLine();
tweet = br.readLine();
}
br.close();
scanner.close();
}
public static void processSandyData() throws Exception {
/*
* * SANDY DATASET COLUMNS 0 _unit_id, 1 _golden, 2 _unit_state, 3
* _trusted_judgments, 4 _last_judgment_at, 5
* the_author_of_the_tweet_seems_to_be_an_eye_witness_of_the_event, 6
* the_author_of_the_tweet_seems_to_be_an_eye_witness_of_the_event
* :confidence, 7 type_of_message, 8 type_of_message:confidence, 9 nil,
* 10
* the_author_of_the_tweet_seems_to_be_an_eye_witness_of_the_event_gold,
* 11 tweet, 12 tweet_no, 13 tweet_no_rt, 14 type_of_message_gold, 15
* user
*
* * SANDY DATASET example TUPLE 0 238841781, 1 false, 2 finalized, 3 4,
* 4 1/2/2013 13:37:11, 5 , 6 , 7
* "Informative: offers/gives donations of money, goods, or free services"
* , 8 0.2689, 9 , 10 , 11 important --> @JebBush suggests federal
* gov't not crucial to storm recovery http://t.co/pVsV6qoS #haction
* #2012 #sandy, 12 11899, 13 important --> @JebBush suggests federal
* gov't not crucial to storm recovery http://t.co/pVsV6qoS #haction
* #2012 #sandy, 14 , 15 danholler
*
* * ATTRIBUTE DEFINITIONS 20 Eyewitness null, eyewitness 23 Informative
* null, informative 24 Casualties null, casualties 25 Caution or advice
* null, advice 27 Reaction null, reaction 28 Damage null, damage 29
* Media source null, mediasource 30 Donation null, offer, request 31
* Photo or video null, photo 32 People missing null, missing
*
* * SANDY DATASET LABELS Can not judge (not in English, too short,
* etc.) Informative: casualties (people injured or dead) Informative:
* caution or advice Informative: celebrities or authorities react to
* the event or visit the area Informative: damage (building, road,
* lines, etc.) Informative: information source with extensive coverage
* (radio, tv, website, etc.) Informative: offers/gives donations of
* money, goods, or free services Informative: other Informative: other
* type of photos/videos (not in the above classes) Informative: people
* missing, or lost people found Informative: requests donations of
* money, goods, or free services Not informative: personal only Not
* informative: unrelated to the disaster
*/
String crisisCode = "sandy_hurricane_test";
int crisisID = 5;
String filename = "C:\\projects\\aidr\\develop\\predict\\Sandy-Labeled.csv";
// Attribute IDs
int eyewitnessID = 20;
int informativeID = 23;
int casualtiesID = 24;
int adviceID = 25;
int reactionID = 27;
int damageID = 28;
int mediaID = 29;
int donationID = 30;
int photoID = 31;
int missingID = 32;
// Labels used in the Sandy dataset
String casualtiesStr = "casualties (people injured or dead)";
String adviceStr = "caution or advice";
String reactionStr = "celebrities or authorities react to the event or visit the area";
String damageStr = "damage (building, road, lines, etc.)";
String mediaStr = "information source with extensive coverage (radio, tv, website, etc.)";
String donationStr1 = "offers/gives donations of money, goods, or free services";
String donationStr2 = "requests donations of money, goods, or free services";
String photoStr = "other type of photos/videos (not in the above classes)";
String missingStr = "people missing, or lost people found";
CSVReader reader = new CSVReader(new FileReader(filename));
String[] line;
reader.readNext();
while ((line = reader.readNext()) != null) {
String tweetText = line[11];
long userID = 0;
String tweet = "{text:\"" + tweetText.replaceAll("\"", "'")
+ "\", user: {id:" + userID + "}}";
ArrayList<NominalLabelBC> labels = new ArrayList<>();
// Match labels in the Sandy dataset to attribute IDs/labels
// labels.add(new NominalLabelBC(0, eyewitnessID,
// line[5].equals("true") ? "eyewitness" : "null", 1));
// String typeStr = line[14].equals("")
// && Double.parseDouble(line[8]) > 0.5 ? line[7] : line[14];
// labels.add(new NominalLabelBC(0, informativeID, typeStr
// .contains("Informative") ? "informative" : "null", 1));
// labels.add(new NominalLabelBC(0, casualtiesID, typeStr
// .contains(casualtiesStr) ? "casualties" : "null", 1));
// labels.add(new NominalLabelBC(0, adviceID, typeStr
// .contains(adviceStr) ? "advice" : "null", 1));
// labels.add(new NominalLabelBC(0, reactionID, typeStr
// .contains(reactionStr) ? "reaction" : "null", 1));
// labels.add(new NominalLabelBC(0, damageID, typeStr
// .contains(damageStr) ? "damage" : "null", 1));
// labels.add(new NominalLabelBC(0, mediaID,
// typeStr.contains(mediaStr) ? "mediasource" : "null", 1));
// labels.add(new NominalLabelBC(0, donationID, typeStr
// .contains(donationStr1) ? "offer" : (typeStr
// .contains(donationStr2) ? "request" : "null"), 1));
// labels.add(new NominalLabelBC(0, photoID,
// typeStr.contains(photoStr) ? "photo" : "null", 1));
// labels.add(new NominalLabelBC(0, missingID, typeStr
// .contains(missingStr) ? "missing" : "null", 1));
for (NominalLabelBC l : labels)
l.setHumanLabel(true);
sendTweetToServer(crisisCode, crisisID, tweet, labels);
if (messageID % 150 == 149)
Thread.sleep(20000);
else
Thread.sleep(10);
}
reader.close();
}
static void sendTweetToServer(String crisisCode, int crisisID,
String jsonTweet, List<NominalLabelBC> labels) {
String labelsStr = "";
if (labels != null && labels.size() > 0) {
labelsStr = "nominal_labels: [";
for (int i = 0; i < labels.size(); i++) {
if (i > 0)
labelsStr += ",";
labelsStr += DocumentJSONConverter.getLabelJson(crisisID,
labels.get(i));
}
labelsStr += "]";
}
String info = "{ \"crisis_code\": \"" + crisisCode
+ "\", doctype:\"twitter\", " + labelsStr + " }";
jsonTweet = jsonTweet.substring(0, jsonTweet.length() - 1)
+ ", \"aidr\": " + info + "}";
System.out.println(jsonTweet);
serverOut.println(jsonTweet);
}
public static void getServerOutput() {
String line;
try {
while ((line = serverIn.readLine()) != null) {
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}