/**
* POJO for (de)serialization of tweets coming on the REDIS aidr_predict.* channels
*
* @author Koushik
*/
package qa.qcri.aidr.utils;
import java.io.Serializable;
import java.io.StringReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.TimeZone;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.log4j.Logger;
import qa.qcri.aidr.common.code.DateFormatConfig;
import qa.qcri.aidr.common.filter.ClassifiedFilteredTweet;
import qa.qcri.aidr.common.filter.NominalLabel;
import qa.qcri.aidr.dbmanager.dto.DocumentNominalLabelDTO;
import qa.qcri.aidr.dbmanager.dto.HumanLabeledDocumentDTO;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
public class ClassifiedTweet extends ClassifiedFilteredTweet implements Document, Serializable {
private static final long serialVersionUID = 3780215910578547404L;
private static Logger logger = Logger.getLogger(ClassifiedTweet.class);
/**
*
*/
private String tweetID;
private String message;
private String createdAt;
private long timestamp;
private String userID;
private String userName;
private String userURL;
private String tweetURL;
private String crisisName;
private String crisisCode;
private String attributeName_1;
private String attributeCode_1;
private String labelName_1;
private String labelCode_1;
private String labelDescription_1;
private String confidence_1;
private String humanLabeled_1;
private AidrObject aidr;
public ClassifiedTweet() {
nominal_labels = new ArrayList<NominalLabel>();
aidr = new AidrObject();
}
/**
* @return the userURL
*/
public String getUserURL() {
return userURL;
}
/**
* @param userURL the userURL to set
*/
public void setUserURL(String userURL) {
this.userURL = userURL;
}
/**
* @return the tweetID
*/
public String getTweetID() {
return tweetID;
}
/**
* @param tweetID the tweetID to set
*/
public void setTweetID(String tweetID) {
this.tweetID = tweetID;
}
/**
* @return the message
*/
public String getMessage() {
return message;
}
/**
* @param message the message to set
*/
public void setMessage(String message) {
this.message = message;
}
/**
* @return the userID
*/
public String getUserID() {
return userID;
}
/**
* @param userID the userID to set
*/
public void setUserID(String userID) {
this.userID = userID;
}
/**
* @return the userName
*/
public String getUserName() {
return userName;
}
/**
* @param userName the userName to set
*/
public void setUserName(String userName) {
this.userName = userName;
}
/**
* @return the createdAt
*/
public String getCreatedAtString() {
return createdAt;
}
/**
* @param createdAt the createdAt to set
*/
public void setCreatedAtString(String createdAtString) {
this.createdAt = setDateString(createdAtString);
}
/**
* @return the tweetURL
*/
public String getTweetURL() {
return tweetURL;
}
/**
* @param tweetURL the tweetURL to set
*/
public void setTweetURL(String tweetURL) {
this.tweetURL = tweetURL;
}
public String toString(){
StringBuffer tweetString = new StringBuffer();
tweetString.append(tweetID).append(",")
.append(message).append(",")
.append(createdAt).append(",")
.append(userID).append(",")
.append(userName).append(",")
.append(userURL).append(",")
.append(tweetURL).append(",")
.append(crisisName).append(",")
.append(attributeName_1).append(",")
.append(attributeCode_1).append(",")
.append(labelName_1).append(",")
.append(labelDescription_1).append(",")
.append(confidence_1).append(",")
.append(humanLabeled_1);
return tweetString.toString();
}
/**
* @return the crisisName
*/
public String getCrisisName() {
return crisisName;
}
/**
* @param crisisName the crisisName to set
*/
public void setCrisisName(String crisisName) {
this.crisisName = crisisName;
}
public String getAttributeName_1() {
return attributeName_1;
}
public void setAttributeName_1(String attributeName) {
this.attributeName_1 = attributeName;
}
public String getAttributeCode_1() {
return attributeCode_1;
}
public void setAttributeCode_1(String attributeCode) {
this.attributeCode_1 = attributeCode;
}
/**
* @return the labelName
*/
public String getLabelName_1() {
return labelName_1;
}
/**
* @param labelName the labelName to set
*/
public void setLabelName_1(String labelName) {
this.labelName_1 = labelName;
}
/**
* @return the labelDescription
*/
public String getLabelDescription_1() {
return labelDescription_1;
}
/**
* @param labelDescription the labelDescription to set
*/
public void setLabelDescription_1(String labelDescription) {
this.labelDescription_1 = labelDescription;
}
public String getLabelCode_1() {
return labelCode_1;
}
public void setLabelCode_1(String labelCode) {
this.labelCode_1 = labelCode;
}
/**
* @return the confidence
*/
public String getConfidence_1() {
return confidence_1;
}
/**
* @param confidence the confidence to set
*/
public void setConfidence_1(String confidence) {
this.confidence_1 = confidence;
}
/**
* @return the humanLabeled
*/
public String getHumanLabeled_1() {
return humanLabeled_1;
}
/**
* @param humanLabeled the humanLabeled to set
*/
public void setHumanLabeled_1(String humanLabeled) {
this.humanLabeled_1 = humanLabeled;
}
public Date getDate(String timeString) {
//SimpleDateFormat formatter = new SimpleDateFormat(StandardDateFormat);
DateFormat formatter = new SimpleDateFormat(DateFormatConfig.ISODateFormat);
formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
if (timeString != null) {
try {
Date newDate = formatter.parse(timeString);
//System.out.println("[getDate] Converted date: " + newDate.toString());
return newDate;
} catch (ParseException e) {
logger.error("Parse Error in getting Date string = " + timeString, e);
}
}
logger.warn("[getDate] Warning! returning Date = null for time String = " + timeString);
return null;
}
public String setDateString(String timeString) {
DateFormat dateFormatISO = new SimpleDateFormat(DateFormatConfig.ISODateFormat);
dateFormatISO.setTimeZone(TimeZone.getTimeZone("GMT"));
if (timeString != null) {
try {
SimpleDateFormat formatter = new SimpleDateFormat(DateFormatConfig.StandardDateFormat);
Date newDate = formatter.parse(timeString);
if (newDate != null) setTimestamp(newDate.getTime());
return dateFormatISO.format(newDate);
} catch (ParseException e) {
logger.error("Error in setting createdAt field = " + timeString, e);
}
}
setTimestamp(0);
return timeString;
}
public long getTimestamp() {
return this.timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public String getCrisisCode() {
return this.crisisCode;
}
public void setCrisisCode(String crisisCode) {
this.crisisCode = crisisCode;
}
public Map<String, Object> prettyPrint() {
Map<String, Object> obj = new HashMap<String, Object>();
obj.put("id", this.getTweetID());
obj.put("crisisName", this.getCrisisName());
obj.put("crisisCode", this.getCrisisCode());
obj.put("tweet", this.getMessage());
for (int i = 0; i < this.getNominalLabels().size();i++) {
NominalLabel nb = this.getNominalLabels().get(i);
obj.put("attribute_name_"+i, nb.attribute_name);
obj.put("attribute_code_"+i, nb.attribute_code);
obj.put("label_name_"+i, nb.label_name);
obj.put("label_description_"+i, nb.label_description);
obj.put("label_code_"+i, nb.label_code);
obj.put("confidence_"+i, nb.confidence);
obj.put("humanLabeled_"+i, nb.from_human);
}
return obj;
}
public NominalLabel createDummyNominalLabels(final String crisisCode) {
//System.out.println("Creating dummy nominal labels array");
NominalLabel nLabel = new NominalLabel();
nLabel.attribute_code = "null";
nLabel.label_code = "null";
nLabel.confidence = 0;
nLabel.attribute_name = "null";
nLabel.label_name = "null";
nLabel.attribute_description = "null";
nLabel.label_description = "null";
nLabel.from_human = false;
this.nominal_labels.add(nLabel);
return nLabel;
}
public void toClassifiedTweet(String data) {
this.toClassifiedTweet(data, null);
}
public void toClassifiedTweet(String data, String collectionCode) {
//System.out.println("Received string to deserialize: " + data);
if (data != null) {
try {
StringReader reader = new StringReader(data.trim());
JsonReader jsonReader = new JsonReader(reader);
jsonReader.setLenient(true);
JsonParser parser = new JsonParser();
JsonObject jsonObj = (JsonObject) parser.parse(jsonReader);
//System.out.println("Unparsed tweet data: " + jsonObj.get("id") + ", " + jsonObj.get("created_at") + ", " + jsonObj.get("user") + ", " + jsonObj.get("aidr"));
if (jsonObj.get("id_str") != null) {
this.setTweetID(jsonObj.get("id_str").getAsString());
}
if (jsonObj.get("text") != null) {
this.setMessage(jsonObj.get("text").getAsString());
}
if (jsonObj.get("created_at") != null) {
this.setCreatedAtString(jsonObj.get("created_at").getAsString());
}
JsonObject jsonUserObj = null;
if (jsonObj.get("user") != null) {
jsonUserObj = jsonObj.get("user").getAsJsonObject();
if (jsonUserObj.get("id") != null) {
this.setUserID(jsonUserObj.get("id").getAsString());
}
if (jsonUserObj.get("screen_name") != null) {
this.setUserName(jsonUserObj.get("screen_name").getAsString());
this.setTweetURL("https://twitter.com/" + this.getUserName() + "/status/" + this.getTweetID());
}
if (jsonUserObj.get("url") != null) {
this.setUserURL(jsonUserObj.get("url").toString());
}
}
JsonObject aidrObject = null;
if (jsonObj.has("aidr") && (jsonObj.get("aidr") != null)) {
aidrObject = jsonObj.get("aidr").getAsJsonObject();
if (aidrObject.get("crisis_name") != null) {
this.setCrisisName(aidrObject.get("crisis_name").getAsString());
this.getAidr().setCrisisName(this.getCrisisName());
}
if (aidrObject.get("crisis_code") != null) {
this.setCrisisCode(aidrObject.get("crisis_code").getAsString());
this.getAidr().setCrisisCode(this.getCrisisCode());
}
if (aidrObject.has("nominal_labels") && (aidrObject.get("nominal_labels") != null)) {
//JSONArray nominalLabels = (JSONArray) aidrObject.get("nominal_labels");
JsonArray nominalLabels = aidrObject.get("nominal_labels").getAsJsonArray();
StringBuffer allAttributeNames = new StringBuffer();
StringBuffer allAttributeCodes = new StringBuffer();
StringBuffer allLabelNames = new StringBuffer();
StringBuffer allLabelCodes = new StringBuffer();
StringBuffer allLabelDescriptions = new StringBuffer();
StringBuffer allConfidences = new StringBuffer();
StringBuffer humanLabeled = new StringBuffer();
for (int i = 0; i < nominalLabels.size(); i++) {
//JSONObject label = (JSONObject) nominalLabels.get(i);
JsonObject label = nominalLabels.get(i).getAsJsonObject();
allAttributeNames.append((label.has("attribute_name") && !label.get("attribute_name").isJsonNull()) ? label.get("attribute_name").getAsString() : "null");
allAttributeCodes.append((label.has("attribute_code") && !label.get("attribute_code").isJsonNull()) ? label.get("attribute_code").getAsString() : "null");
allLabelNames.append((label.has("label_name") && !label.get("label_name").isJsonNull()) ? label.get("label_name").getAsString() : "null");
allLabelCodes.append((label.has("label_code") && !label.get("label_code").isJsonNull()) ? label.get("label_code").getAsString() : "null");
allLabelDescriptions.append((label.has("label_description") && !label.get("label_description").isJsonNull()) ? label.get("label_description").getAsString() : "null");
allConfidences.append((label.has("confidence") && !label.get("confidence").isJsonNull()) ? label.get("confidence").getAsFloat() : 0);
humanLabeled.append((label.has("from_human") && !label.get("from_human").isJsonNull()) ? label.get("from_human").getAsBoolean() : false);
NominalLabel nLabel = new NominalLabel();
nLabel.attribute_code = (label.has("attribute_code") && !label.get("attribute_code").isJsonNull()) ? label.get("attribute_code").getAsString() : "null";
nLabel.label_code = (label.has("label_code") && !label.get("label_code").isJsonNull()) ? label.get("label_code").getAsString() : "null";
nLabel.confidence = (label.has("confidence") && !label.get("confidence").isJsonNull()) ? Float.parseFloat(label.get("confidence").getAsString()) : 0;
nLabel.attribute_name = (label.has("attribute_name") && !label.get("attribute_name").isJsonNull()) ? label.get("attribute_name").getAsString() : "null";
nLabel.label_name = (label.has("label_name") && !label.get("label_name").isJsonNull()) ? label.get("label_name").getAsString() : "null";
nLabel.attribute_description = (label.has("attribute_description") && !label.get("attribute_description").isJsonNull()) ? label.get("attribute_description").getAsString() : "null";
nLabel.label_description = (label.has("label_description") && !label.get("label_description").isJsonNull()) ? label.get("label_description").getAsString() : "null";
nLabel.from_human = (label.has("from_human") && !label.get("from_human").isJsonNull()) ? Boolean.parseBoolean(label.get("from_human").getAsString()): false;
this.nominal_labels.add(nLabel);
// remove the ugly ';' from end-of-list
if (i < nominalLabels.size() - 1) {
allAttributeNames.append(";");
allAttributeCodes.append(";");
allLabelNames.append(";");
allLabelDescriptions.append(";");
allConfidences.append(";");
humanLabeled.append(";");
}
}
this.getAidr().setNominalLabels(this.getNominalLabels());
this.setAttributeName_1(allAttributeNames.toString());
this.setAttributeCode_1(allAttributeCodes.toString());
this.setLabelName_1(allLabelNames.toString());
this.setLabelDescription_1(allLabelDescriptions.toString());
this.setConfidence_1(allConfidences.toString());
this.setHumanLabeled_1(humanLabeled.toString());
} else {
//System.out.println("Creating dummy nominal labels");
this.createDummyNominalLabels(collectionCode);
}
} else {
//System.out.println("Creating dummy nominal labels");
this.createDummyAIDRField(collectionCode);
}
} catch (Exception ex) {
logger.error("Exception in deserialization, returning null",ex);
}
}
}
public void createDummyAIDRField(String collectionCode) {
this.getAidr().setCrisisCode(collectionCode);
this.getAidr().getNominalLabels().add(this.createDummyNominalLabels(collectionCode));
this.createDummyNominalLabels(collectionCode);
}
public void toClassifiedTweetFromLabeledDoc(HumanLabeledDocumentDTO doc, String collectionCode) {
if (doc != null) {
try {
this.toClassifiedTweet(doc.getDoc().getData(), collectionCode);
// Now fill up the nominal_label field
if (doc.getLabelData() != null && !doc.getLabelData().isEmpty()) {
// first remove the dummy nominal_label list
this.nominal_labels.clear();
this.getAidr().getNominalLabels().clear();
// Now add the actual nominal_label data
for (DocumentNominalLabelDTO label: doc.getLabelData()) {
NominalLabel nb = new NominalLabel();
nb.from_human = doc.getDoc().getHasHumanLabels();
nb.attribute_code = label.getNominalLabelDTO().getNominalAttributeDTO().getCode();
nb.attribute_description = label.getNominalLabelDTO().getNominalAttributeDTO().getDescription();
nb.attribute_name = label.getNominalLabelDTO().getNominalAttributeDTO().getName();
nb.confidence = 1; // default confidence for human labelers = 1.0
nb.label_code = label.getNominalLabelDTO().getNominalLabelCode();
nb.label_description = label.getNominalLabelDTO().getDescription();
nb.label_name = label.getNominalLabelDTO().getName();
this.nominal_labels.add(nb);
this.getAidr().getNominalLabels().add(nb);
}
}
//System.out.println(this.toJsonString());
} catch (Exception e) {
logger.error("Exception in parsing labeled document", e);
}
}
}
/**
*
* @return serialized JSON string without pretty printing (default behavior)
*/
public String toJsonString() {
return this.toJsonString(false);
}
/**
*
* @param isPrettyPrinting turn PrettyPrinting on/off
* @return serialized JSON string
*/
public String toJsonString(boolean isPrettyPrinting) {
Gson jsonObject = null;
if (isPrettyPrinting) {
jsonObject = new GsonBuilder().serializeNulls().disableHtmlEscaping()
.serializeSpecialFloatingPointValues().setPrettyPrinting()
.create();
} else {
jsonObject = new GsonBuilder().serializeNulls().disableHtmlEscaping()
.serializeSpecialFloatingPointValues()
.create();
}
try {
String jsonString = jsonObject.toJson(this, ClassifiedTweet.class);
jsonString = jsonString.replace("\\\\u", "\\u");
return StringEscapeUtils.unescapeJava(jsonString);
} catch (Exception e) {
logger.error("Error while parsing jsonObject to json string", e);
return null;
}
}
public AidrObject getAidr() {
return this.aidr;
}
public void setAidr(AidrObject aidr) {
this.aidr = aidr;
}
}