package qa.qcri.aidr.common.filter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.TimeZone;
import org.apache.log4j.Logger;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.annotate.JsonSerialize;
import qa.qcri.aidr.common.code.DateFormatConfig;
import com.google.gson.Gson;
public class FilterQueryMatcher {
private static Logger logger = Logger.getLogger(FilterQueryMatcher.class);
public JsonQueryList queryList;
public int next;
public ArrayList<QueryJsonObject> matcherArray;
public FilterQueryMatcher() {
queryList = new JsonQueryList();
next = 0;
matcherArray = new ArrayList<QueryJsonObject>();
}
public boolean isDateQuery(String queryString) {
ObjectMapper mapper = new ObjectMapper();
if (queryString.indexOf(QueryType.date_query.toString()) > -1) {
try {
DateQueryJsonObject dateQuery = mapper.readValue(queryString, DateQueryJsonObject.class);
return true;
} catch (Exception e) {
logger.error("Exception while parsing the date query", e);
}
}
return false;
}
public boolean isClassifierQuery(String queryString) {
ObjectMapper mapper = new ObjectMapper();
if (queryString.indexOf(QueryType.classifier_query.toString()) > -1) {
try {
ClassifierQueryJsonObject classiferQuery = mapper.readValue(queryString, ClassifierQueryJsonObject.class);
return true;
} catch (Exception e) {
logger.debug("Exception in deserializing using Jackson readValue()");
logger.error("Exception in deserializing using Jackson readValue()", e);
}
}
return false;
}
public boolean getMatcherResult(ClassifiedFilteredTweet tweet) {
if (matcherArray != null) {
int i = 0;
boolean matchResult = true;
for (QueryJsonObject q: matcherArray) {
//logger.info("Attempting matching with query predicate: " + q.toString());
++i;
matchResult = matchResult && isQueryMatch(q, tweet);
//logger.info("** Result of matching query with tweet = " + matchResult + " ** \n");
if (!matchResult) {
return false;
}
}
}
return true;
}
public boolean isQueryMatch(QueryJsonObject q, ClassifiedFilteredTweet tweet) {
if (q instanceof DateQueryJsonObject) {
// check DateQueryJsonObject fields
if (q.getComparator().equals(ComparatorType.is_after)) {
boolean matchResult;
if (null == tweet.getCreatedAt()) {
logger.warn("No createdAt field in Tweet!!! tweet = " + tweet);
return false; // default behavior
}
matchResult = tweet.getCreatedAt().compareTo(q.getDate()) >= 0;
//logger.debug("For " + tweet.getCreatedAt().toString() + " comparing after date " + q.getDate().toString() + " : " + matchResult);
return matchResult;
}
if (q.getComparator().equals(ComparatorType.is_before)) {
boolean matchResult;
if (null == tweet.getCreatedAt()) {
logger.warn("No createdAt field in Tweet!!!" + tweet);
return false; // default behavior
}
matchResult = tweet.getCreatedAt().compareTo(q.getDate()) <= 0;
//logger.debug("For " + tweet.getCreatedAt().toString() + " comparing before date" + q.getDate().toString() + " : " + matchResult);
return matchResult;
}
}
if (q instanceof ClassifierQueryJsonObject) {
// check ClassifierQueryJsonObject fields
boolean matchResult = false;
int i = 0;
for (NominalLabel nLabel: tweet.getNominalLabels()) {
//logger.debug("Going for matching nLabel#" + i);
++i;
if (q.getClassifierCode() != null && nLabel.attribute_code != null
&& q.getClassifierCode().equalsIgnoreCase(nLabel.attribute_code)) {
// classifier code matches, next match comparator
matchResult = true;
//logger.info("comparing classifier code '" + nLabel.attibute_code + "': " + matchResult);
// First check confidence parameter
if (q.getComparator().equals(ComparatorType.has_confidence)) {
matchResult = matchResult && (nLabel.confidence >= q.getConfidence());
//logger.info("comparing confidence: " + matchResult);
if (matchResult) break;
continue; // else go for next nLabel
}
// Next check label comparator "is"
if (q.getComparator().equals(ComparatorType.is)
&& q.getLabelCode() != null && nLabel.label_code != null) {
matchResult = matchResult && (q.getLabelCode().equalsIgnoreCase(nLabel.label_code));
// Now check confidence value
matchResult = matchResult && (nLabel.confidence >= q.getConfidence());
//logger.info("comparing tweet label '" + nLabel.label_code + "', confidence: " + nLabel.confidence + " >= " + q.getConfidence() + " with 'is': " + matchResult);
if (matchResult) break;
continue; // else go for next nLabel
}
// Next check label comparator "is_not"
if (q.getComparator().equals(ComparatorType.is_not)
&& q.getLabelCode() != null && nLabel.label_code != null) {
matchResult = matchResult && !(q.getLabelCode().equalsIgnoreCase(nLabel.label_code));
// Now check confidence value
matchResult = (matchResult) && (nLabel.confidence >= q.getConfidence());
//logger.info("comparing tweet label '" + nLabel.label_code + "', confidence: " + nLabel.confidence + " >= " + q.getConfidence() + " with 'is_not': " + matchResult);
if (matchResult) break;
continue; // else go for next nLabel
}
}
}
return matchResult;
}
return false; // should never come here
}
public QueryJsonObject serializeQuery(String queryString) {
QueryJsonObject queryObject = null;
ObjectMapper mapper = new ObjectMapper();
mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_EMPTY);
final DateFormat df = new SimpleDateFormat(DateFormatConfig.ISODateFormat);
df.setTimeZone(TimeZone.getTimeZone("GMT"));
mapper.setDateFormat(df);
if (isDateQuery(queryString)) {
queryObject = new DateQueryJsonObject();
queryObject.setQueryType(QueryType.date_query);
try {
queryObject = mapper.readValue(queryString, DateQueryJsonObject.class);
logger.info("DateQueryObject: " + queryObject.toString());
} catch (JsonParseException e) {
logger.error("JsonParseException for DateQueryJsonObject attempt",e);
} catch (JsonMappingException e) {
logger.error("JsonMappingException for DateQueryJsonObject attempt",e);
} catch (IOException e) {
logger.error("IOException for DateQueryJsonObject attempt",e);
}
} else if (isClassifierQuery(queryString)) {
queryObject = new ClassifierQueryJsonObject();
queryObject.setQueryType(QueryType.classifier_query);
try {
queryObject = mapper.readValue(queryString, ClassifierQueryJsonObject.class);
logger.info("ClassifierQueryObject: " + queryObject.toString());
} catch (JsonParseException e) {
logger.error("JsonParseException for ClassifierQueryJsonObject attempt");
} catch (JsonMappingException e) {
logger.error("JsonMappingException for ClassifierQueryJsonObject attempt");
} catch (IOException e) {
logger.error("IOException for ClassifierQueryJsonObject attempt",e);
}
}
return queryObject;
}
public String getNextQueryObject() {
Gson gson = new Gson();
if (queryList.getConstraints() != null && next < queryList.getConstraints().size()) {
try {
String retValue = gson.toJson(queryList.getConstraints().get(next), QueryJsonObject.class);
++next;
return retValue;
} catch (Exception e) {
logger.error("Failed to serialize queryList: " + queryList.getConstraints().get(next));
}
}
return null;
}
public void buildMatcherArray() {
logger.debug("Attempting to build the Matcher Array from input query");
String query = null;
while ((query = getNextQueryObject()) != null) {
logger.info("Added query to matcher array: " + query);
matcherArray.add(serializeQuery(query));
}
}
public void buildMatcherArray(JsonQueryList qList) {
Gson gson = new Gson();
for (QueryJsonObject query: qList.getConstraints()) {
String queryString = gson.toJson(query, QueryJsonObject.class);
logger.info("Added query to matcher array: " + queryString);
matcherArray.add(serializeQuery(queryString));
}
}
/*public static void main(String args[]) {
System.out.println("In main - testing code");
ArrayList<String> temp = new ArrayList<String>();
temp.add("{\"queryType\":\"date_query\",\"comparator\":\"is_before\",\"time\":\"2014-03-04\"}");
temp.add("{\"queryType\":\"date_query\",\"comparator\":\"is_after\",\"time\":\"2013-05-01\"}");
temp.add("{\"queryType\":\"classifier_query\",\"classifier_code\":\"informative_v1\","
+ "\"label_code\":\"030_not_info\","
+ "\"comparator\":\"is\","
+ "\"confidence\":0.5}");
temp.add("{\"queryType\":\"classifier_query\",\"classifier_code\":\"informative_v1\","
+ "\"label_code\":\"030_info\","
+ "\"comparator\":\"is_not\","
+ "\"confidence\":0.64}");
temp.add("{\"queryType\":\"classifier_query\",\"classifier_code\":\"informative_v1\","
+ "\"label_code\":\"null\","
+ "\"comparator\":\"has_confidence\","
+ "\"confidence\":0.75}");
temp.add("{\"queryType\":\"classifier_query\",\"classifier_code\":\"informative_v1\"}");
FilterQueryMatcher test = new FilterQueryMatcher();
Gson gson = new Gson();
for (int i = 0; i < temp.size();i++) {
test.queryList.createConstraint(gson.fromJson(temp.get(i), GenericInputQuery.class));
}
test.buildMatcherArray();
System.out.println("matcherArray size = " + test.matcherArray.size());
// Now to test matcher functionality with test data
String testString1 = "{\"filter_level\":\"medium\",\"text\":\"@wongt0n You in Japan now?\",\"contributors\":null,\"geo\":null,\"retweeted\":false,\"in_reply_to_screen_name\":\"wongt0n\",\"truncated\":false,\"entities\":{\"hashtags\":[],\"symbols\":[],\"urls\":[],\"user_mentions\":[{\"id\":123834194,\"indices\":[0,8],\"screen_name\":\"wongt0n\",\"id_str\":\"123834194\",\"name\":\"Lazy Wong~\"}]},\"lang\":\"en\",\"in_reply_to_status_id_str\":\"421190875260014592\",\"id\":421193072316149761,\"aidr\":{\"features\":[{\"words\":[\"\",\"japan_now\",\"japan\",\"now\",\"_you\",\"you\",\"in_japan\",\"in\",\"you_in\"],\"type\":\"wordvector\"}],\"crisis_code\":\"japan_chem_explosion\",\"nominal_labels\":[],\"doctype\":\"twitter\",\"crisis_name\":\"Explosion at Japan chemical factory\"},\"source\":\"web\",\"in_reply_to_user_id_str\":\"123834194\",\"favorited\":false,\"in_reply_to_status_id\":421190875260014592,\"retweet_count\":0,\"created_at\":\"Thu Jan 09 08:13:48 +0000 2014\",\"in_reply_to_user_id\":123834194,\"favorite_count\":0,\"id_str\":\"421193072316149761\",\"place\":null,\"user\":{\"location\":\"\",\"default_profile\":false,\"profile_background_tile\":true,\"statuses_count\":10045,\"lang\":\"en\",\"profile_link_color\":\"1212E3\",\"profile_banner_url\":\"https://pbs.twimg.com/profile_banners/457120810/1358321901\",\"id\":457120810,\"following\":null,\"protected\":false,\"favourites_count\":7,\"profile_text_color\":\"E60ED4\",\"contributors_enabled\":false,\"verified\":false,\"description\":\"An ordinary SONE who fall in love with Taeyeon+Jessica+Sunny+Tiffany+Hyoyeon+Yuri+Sooyoung+Yoona+Seohyun = SNSD. Spazzing and sharing is my vacation on twitter.\",\"name\":\"~~\uC18C\uC2DC\uB77C\uC11C \uD589\uBCF5\uD574\uC694~~\",\"profile_sidebar_border_color\":\"FFFFFF\",\"profile_background_color\":\"EDFAFA\",\"created_at\":\"Sat Jan 07 01:51:54 +0000 2012\",\"default_profile_image\":false,\"followers_count\":47,\"geo_enabled\":false,\"profile_image_url_https\":\"https://pbs.twimg.com/profile_images/378800000219857862/9606b10e2dd7d700111f4c5be7384f63_normal.jpeg\",\"profile_background_image_url\":\"http://a0.twimg.com/profile_background_images/889556219/7456374b70ecfea67145b0214f15a988.jpeg\",\"profile_background_image_url_https\":\"https://si0.twimg.com/profile_background_images/889556219/7456374b70ecfea67145b0214f15a988.jpeg\",\"follow_request_sent\":null,\"url\":null,\"utc_offset\":28800,\"time_zone\":\"Kuala Lumpur\",\"notifications\":null,\"profile_use_background_image\":true,\"friends_count\":127,\"profile_sidebar_fill_color\":\"E1D2F5\",\"screen_name\":\"blueagle90\",\"id_str\":\"457120810\",\"profile_image_url\":\"http://pbs.twimg.com/profile_images/378800000219857862/9606b10e2dd7d700111f4c5be7384f63_normal.jpeg\",\"listed_count\":0,\"is_translator\":false},\"coordinates\":null}";
String testString2 = "{\"filter_level\":\"medium\",\"text\":\"@wongt0n You in Japan now?\",\"contributors\":null,\"geo\":null,\"retweeted\":false,\"in_reply_to_screen_name\":\"wongt0n\",\"truncated\":false,\"entities\":{\"hashtags\":[],\"symbols\":[],\"urls\":[],\"user_mentions\":[{\"id\":123834194,\"indices\":[0,8],\"screen_name\":\"wongt0n\",\"id_str\":\"123834194\",\"name\":\"Lazy Wong~\"}]},\"lang\":\"en\",\"in_reply_to_status_id_str\":\"421190875260014592\",\"id\":421193072316149761,\"aidr\":{\"features\":[{\"words\":[\"\",\"2009\",\"edition\",\"_#mp3\",\"#mp3_#music\",\"408_the\",\"the_end\",\"peas_2009\",\"#mp3\",\"the\",\"edition_black\",\"end_japan\",\"eyed_peas\",\"#music_408\",\"408\",\"japan\",\"japan_edition\",\"black_eyed\",\"#music\",\"eyed\",\"end\",\"black\",\"peas\"],\"type\":\"wordvector\"}],\"crisis_code\":\"japan_chem_explosion\",\"nominal_labels\":[{\"label_name\":\"Not related to crisis\",\"source_id\":289,\"from_human\":false,\"attribute_description\":\"Informative messages enhancing situational awareness, v1.0\",\"label_code\":\"030_not_info\",\"confidence\":0.54,\"label_description\":\"Not related to the crisis\",\"attribute_code\":\"informative_v1\",\"attribute_name\":\"Informative v1.0\"}],\"doctype\":\"twitter\",\"crisis_name\":\"Explosion at Japan chemical factory\"},\"source\":\"web\",\"in_reply_to_user_id_str\":\"123834194\",\"favorited\":false,\"in_reply_to_status_id\":421190875260014592,\"retweet_count\":0,\"created_at\":\"Thu Jan 09 08:13:48 +0000 2014\",\"in_reply_to_user_id\":123834194,\"favorite_count\":0,\"id_str\":\"421193072316149761\",\"place\":null,\"user\":{\"location\":\"\",\"default_profile\":false,\"profile_background_tile\":true,\"statuses_count\":10045,\"lang\":\"en\",\"profile_link_color\":\"1212E3\",\"profile_banner_url\":\"https://pbs.twimg.com/profile_banners/457120810/1358321901\",\"id\":457120810,\"following\":null,\"protected\":false,\"favourites_count\":7,\"profile_text_color\":\"E60ED4\",\"contributors_enabled\":false,\"verified\":false,\"description\":\"An ordinary SONE who fall in love with Taeyeon+Jessica+Sunny+Tiffany+Hyoyeon+Yuri+Sooyoung+Yoona+Seohyun = SNSD. Spazzing and sharing is my vacation on twitter.\",\"name\":\"~~\uC18C\uC2DC\uB77C\uC11C \uD589\uBCF5\uD574\uC694~~\",\"profile_sidebar_border_color\":\"FFFFFF\",\"profile_background_color\":\"EDFAFA\",\"created_at\":\"Sat Jan 07 01:51:54 +0000 2012\",\"default_profile_image\":false,\"followers_count\":47,\"geo_enabled\":false,\"profile_image_url_https\":\"https://pbs.twimg.com/profile_images/378800000219857862/9606b10e2dd7d700111f4c5be7384f63_normal.jpeg\",\"profile_background_image_url\":\"http://a0.twimg.com/profile_background_images/889556219/7456374b70ecfea67145b0214f15a988.jpeg\",\"profile_background_image_url_https\":\"https://si0.twimg.com/profile_background_images/889556219/7456374b70ecfea67145b0214f15a988.jpeg\",\"follow_request_sent\":null,\"url\":null,\"utc_offset\":28800,\"time_zone\":\"Kuala Lumpur\",\"notifications\":null,\"profile_use_background_image\":true,\"friends_count\":127,\"profile_sidebar_fill_color\":\"E1D2F5\",\"screen_name\":\"blueagle90\",\"id_str\":\"457120810\",\"profile_image_url\":\"http://pbs.twimg.com/profile_images/378800000219857862/9606b10e2dd7d700111f4c5be7384f63_normal.jpeg\",\"listed_count\":0,\"is_translator\":false},\"coordinates\":null}";
String testString3 = "{\"filter_level\":\"medium\",\"text\":\"@wongt0n You in Japan now?\",\"contributors\":null,\"geo\":null,\"retweeted\":false,\"in_reply_to_screen_name\":\"wongt0n\",\"truncated\":false,\"entities\":{\"hashtags\":[],\"symbols\":[],\"urls\":[],\"user_mentions\":[{\"id\":123834194,\"indices\":[0,8],\"screen_name\":\"wongt0n\",\"id_str\":\"123834194\",\"name\":\"Lazy Wong~\"}]},\"lang\":\"en\",\"in_reply_to_status_id_str\":\"421190875260014592\",\"id\":421193072316149761,\"aidr\":{\"features\":[{\"words\":[\"\",\"2009\",\"edition\",\"_#mp3\",\"#mp3_#music\",\"408_the\",\"the_end\",\"peas_2009\",\"#mp3\",\"the\",\"edition_black\",\"end_japan\",\"eyed_peas\",\"#music_408\",\"408\",\"japan\",\"japan_edition\",\"black_eyed\",\"#music\",\"eyed\",\"end\",\"black\",\"peas\"],\"type\":\"wordvector\"}],\"crisis_code\":\"japan_chem_explosion\",\"nominal_labels\":[{\"label_name\":\"Not related to crisis\",\"source_id\":289,\"from_human\":false,\"attribute_description\":\"Informative messages enhancing situational awareness, v1.0\",\"label_code\":\"030_info\",\"confidence\":0.54,\"label_description\":\"Not related to the crisis\",\"attribute_code\":\"informative_v2\",\"attribute_name\":\"Informative v1.0\"}, {\"label_name\":\"Not related to crisis\",\"source_id\":289,\"from_human\":false,\"attribute_description\":\"Informative messages enhancing situational awareness, v1.0\",\"label_code\":\"030_info\",\"confidence\":0.8433459674,\"label_description\":\"Not related to the crisis\",\"attribute_code\":\"informative_v1\",\"attribute_name\":\"Informative v1.0\"}, {\"label_name\":\"Not related to crisis\",\"source_id\":289,\"from_human\":false,\"attribute_description\":\"Mock Attribute for Testing, v1.0\",\"label_code\":\"030_not_info\",\"confidence\":0.20,\"label_description\":\"related to the crisis\",\"attribute_code\":\"mock attribute\",\"attribute_name\":\"Mock Attribute\"}],\"doctype\":\"twitter\",\"crisis_name\":\"Explosion at Japan chemical factory\"},\"source\":\"web\",\"in_reply_to_user_id_str\":\"123834194\",\"favorited\":false,\"in_reply_to_status_id\":421190875260014592,\"retweet_count\":0,\"created_at\":\"Thu Jan 09 08:13:48 +0000 2014\",\"in_reply_to_user_id\":123834194,\"favorite_count\":0,\"id_str\":\"421193072316149761\",\"place\":null,\"user\":{\"location\":\"\",\"default_profile\":false,\"profile_background_tile\":true,\"statuses_count\":10045,\"lang\":\"en\",\"profile_link_color\":\"1212E3\",\"profile_banner_url\":\"https://pbs.twimg.com/profile_banners/457120810/1358321901\",\"id\":457120810,\"following\":null,\"protected\":false,\"favourites_count\":7,\"profile_text_color\":\"E60ED4\",\"contributors_enabled\":false,\"verified\":false,\"description\":\"An ordinary SONE who fall in love with Taeyeon+Jessica+Sunny+Tiffany+Hyoyeon+Yuri+Sooyoung+Yoona+Seohyun = SNSD. Spazzing and sharing is my vacation on twitter.\",\"name\":\"~~\uC18C\uC2DC\uB77C\uC11C \uD589\uBCF5\uD574\uC694~~\",\"profile_sidebar_border_color\":\"FFFFFF\",\"profile_background_color\":\"EDFAFA\",\"created_at\":\"Sat Jan 07 01:51:54 +0000 2012\",\"default_profile_image\":false,\"followers_count\":47,\"geo_enabled\":false,\"profile_image_url_https\":\"https://pbs.twimg.com/profile_images/378800000219857862/9606b10e2dd7d700111f4c5be7384f63_normal.jpeg\",\"profile_background_image_url\":\"http://a0.twimg.com/profile_background_images/889556219/7456374b70ecfea67145b0214f15a988.jpeg\",\"profile_background_image_url_https\":\"https://si0.twimg.com/profile_background_images/889556219/7456374b70ecfea67145b0214f15a988.jpeg\",\"follow_request_sent\":null,\"url\":null,\"utc_offset\":28800,\"time_zone\":\"Kuala Lumpur\",\"notifications\":null,\"profile_use_background_image\":true,\"friends_count\":127,\"profile_sidebar_fill_color\":\"E1D2F5\",\"screen_name\":\"blueagle90\",\"id_str\":\"457120810\",\"profile_image_url\":\"http://pbs.twimg.com/profile_images/378800000219857862/9606b10e2dd7d700111f4c5be7384f63_normal.jpeg\",\"listed_count\":0,\"is_translator\":false},\"coordinates\":null}";
ClassifiedFilteredTweet testTweet = new ClassifiedFilteredTweet();
testTweet.deserialize(testString2);
int i = 0;
for (NominalLabel t: testTweet.getNominalLabels()) {
System.out.println("i = " + i + ": createAt = " + testTweet.getCreatedAt()
+ ", attribute code = " + t.attribute_code
+ ", label_code = " + t.label_code + ", confidence = " + t.confidence);
++i;
}
System.out.println("[main] Result of matching: " + test.getMatcherResult(testTweet));
}*/
}