package it.acubelab.smaph;
import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.utils.Pair;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;
import it.acubelab.smaph.entityfilters.LibSvmEntityFilter;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Vector;
import org.apache.commons.lang3.tuple.ImmutableTriple;
import org.apache.commons.lang3.tuple.Triple;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
public class SmaphAnnotatorDebugger {
public static final PrintStream out = System.out;
public HashMap<String, List<Triple<String, Integer, HashSet<String>>>> queryToSourceEntityBolds = new HashMap<>();
private List<String> processedQueries = new Vector<>();
private HashMap<String, JSONObject> bingResponsesNS = new HashMap<String, JSONObject>();
private HashMap<String, JSONObject> bingResponsesWS = new HashMap<String, JSONObject>();
private HashMap<String, List<Triple<String, Integer, Double>>> boldPositionED = new HashMap<>();
private HashMap<String, List<String>> boldFilterOutput = new HashMap<>();
private HashMap<String, List<Pair<String, Integer>>> returnedAnnotations = new HashMap<>();
private HashMap<String, HashMap<Triple<Integer, HashMap<String, Double>, Boolean>, String>> ftrToBoldS1 = new HashMap<>();
private HashMap<String, List<Triple<Integer, HashMap<String, Double>, Boolean>>> entityFeaturesS1 = new HashMap<>();
private HashMap<String, List<Triple<Integer, HashMap<String, Double>, Boolean>>> entityFeaturesS2 = new HashMap<>();
private HashMap<String, List<Triple<Integer, HashMap<String, Double>, Boolean>>> entityFeaturesS3 = new HashMap<>();
private HashMap<String, List<Triple<Integer, String, Integer>>> source2SearchResult = new HashMap<>();
private HashMap<String, List<Triple<Integer, String, Integer>>> source3SearchResult = new HashMap<>();
private HashMap<String, HashSet<Integer>> result = new HashMap<>();
private HashMap<String, List<Pair<String, Vector<Pair<Integer, Integer>>>>> snippetsToBolds = new HashMap<>();
public void addProcessedQuery(String query) {
processedQueries.add(query);
}
public void addQueryCandidateBolds(String query, String source, int entity,
HashSet<String> bolds) {
if (!queryToSourceEntityBolds.containsKey(query))
queryToSourceEntityBolds.put(query,
new Vector<Triple<String, Integer, HashSet<String>>>());
boolean update = false;
for (Triple<String, Integer, HashSet<String>> sourceEntityBold : queryToSourceEntityBolds
.get(query))
if (sourceEntityBold.getLeft().equals(source)
&& sourceEntityBold.getMiddle().equals(entity)) {
sourceEntityBold.getRight().addAll(bolds);
update = true;
break;
}
if (!update)
queryToSourceEntityBolds.get(query).add(
new ImmutableTriple<String, Integer, HashSet<String>>(
source, entity, bolds));
}
private static String widToUrl(int wid, WikipediaApiInterface wikiApi) {
try {
return "http://en.wikipedia.org/wiki/"
+ URLEncoder.encode(wikiApi.getTitlebyId(wid), "utf8")
.replace("+", "%20");
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
public JSONObject getBoldsToQuery(WikipediaApiInterface wikiApi)
throws JSONException, IOException {
JSONObject dump = new JSONObject();
JSONArray mentionEntityDump = new JSONArray();
dump.put("dump", mentionEntityDump);
for (String query : queryToSourceEntityBolds.keySet()) {
JSONObject queryData = new JSONObject();
mentionEntityDump.put(queryData);
queryData.put("query", query);
JSONArray boldsEntity = new JSONArray();
queryData.put("boldsEntity", boldsEntity);
for (Triple<String, Integer, HashSet<String>> data : queryToSourceEntityBolds
.get(query)) {
JSONObject entityData = new JSONObject();
boldsEntity.put(entityData);
entityData.put("source", data.getLeft());
entityData.put("wid", data.getMiddle());
entityData.put("title", wikiApi.getTitlebyId(data.getMiddle()));
JSONArray bolds = new JSONArray();
for (String bold : data.getRight())
bolds.put(bold);
entityData.put("bolds", bolds);
entityData.put("url", widToUrl(data.getMiddle(), wikiApi));
}
}
return dump;
}
public void addBingResponseNormalSearch(String query,
JSONObject bingResponse) {
this.bingResponsesNS.put(query, bingResponse);
}
public JSONObject getBingResponseNormalSearch(String query) {
return this.bingResponsesNS.get(query);
}
public void addBingResponseWikiSearch(String query, JSONObject bingResponse) {
this.bingResponsesWS.put(query, bingResponse);
}
public JSONObject getBingResponseWikiSearch(String query) {
return this.bingResponsesWS.get(query);
}
public void addBoldPositionEditDistance(String query,
List<Pair<String, Integer>> bingBoldsAndRanks) {
if (!this.boldPositionED.containsKey(query))
this.boldPositionED.put(query,
new Vector<Triple<String, Integer, Double>>());
for (Pair<String, Integer> bingBoldsAndRank : bingBoldsAndRanks)
this.boldPositionED.get(query).add(
new ImmutableTriple<>(bingBoldsAndRank.first,
bingBoldsAndRank.second, SmaphUtils.getMinEditDist(
query, bingBoldsAndRank.first)));
}
public JSONArray getBoldPositionEditDistance(String query)
throws JSONException {
JSONArray res = new JSONArray();
for (Triple<String, Integer, Double> triple : this.boldPositionED
.get(query)) {
JSONObject tripleJs = new JSONObject();
res.put(tripleJs);
tripleJs.put("bold", triple.getLeft());
tripleJs.put("rank", triple.getMiddle());
tripleJs.put("editDistance", triple.getRight());
}
return res;
}
public void addSnippets(String query,
List<Pair<String, Vector<Pair<Integer, Integer>>>> snippetsToBold) {
this.snippetsToBolds.put(query, snippetsToBold);
}
public JSONArray getSnippets(String query) throws JSONException {
JSONArray res = new JSONArray();
List<Pair<String, Vector<Pair<Integer, Integer>>>> snippetsToBolds = this.snippetsToBolds
.get(query);
for (Pair<String, Vector<Pair<Integer, Integer>>> snippetsToBold : snippetsToBolds) {
JSONObject objI = new JSONObject();
res.put(objI);
objI.put("snippet", snippetsToBold.first);
JSONArray positionsI = new JSONArray();
objI.put("bold_positions", positionsI);
for (Pair<Integer, Integer> startAndLength : snippetsToBold.second) {
JSONObject position = new JSONObject();
positionsI.put(position);
position.put("start", startAndLength.first);
position.put("length", startAndLength.second);
}
}
return res;
}
public void addBoldFilterOutput(String query, List<String> bolds) {
this.boldFilterOutput.put(query, bolds);
}
public JSONArray getBoldFilterOutput(String query) throws JSONException {
JSONArray res = new JSONArray();
for (String bold : this.boldFilterOutput.get(query))
res.put(bold);
return res;
}
public void addReturnedAnnotation(String query,
HashMap<String, Annotation> spotToAnnotation) {
if (!this.returnedAnnotations.containsKey(query))
this.returnedAnnotations.put(query,
new Vector<Pair<String, Integer>>());
for (String bold : spotToAnnotation.keySet())
this.returnedAnnotations.get(query).add(
new Pair<>(bold, spotToAnnotation.get(bold).getConcept()));
}
public JSONArray getReturnedAnnotations(String query,
WikipediaApiInterface wikiApi) throws JSONException, IOException {
JSONArray res = new JSONArray();
for (Pair<String, Integer> p : this.returnedAnnotations.get(query)) {
JSONObject pairJs = new JSONObject();
res.put(pairJs);
pairJs.put("bold", p.first);
pairJs.put("wid", p.second);
pairJs.put("title", wikiApi.getTitlebyId(p.second));
pairJs.put("url", widToUrl(p.second, wikiApi));
}
return res;
}
public void addEntityFeaturesS1(String query, String bold, int wid,
HashMap<String, Double> features, boolean accepted) {
ImmutableTriple<Integer, HashMap<String, Double>, Boolean> ftrTriple = addEntityFeatures(
this.entityFeaturesS1, query, wid, features, accepted);
if (!ftrToBoldS1.containsKey(query))
ftrToBoldS1
.put(query,
new HashMap<Triple<Integer, HashMap<String, Double>, Boolean>, String>());
ftrToBoldS1.get(query).put(ftrTriple, bold);
}
public void addEntityFeaturesS2(String query, int wid,
HashMap<String, Double> features, boolean accepted) {
addEntityFeatures(this.entityFeaturesS2, query, wid, features, accepted);
}
public void addEntityFeaturesS3(String query, int wid,
HashMap<String, Double> features, boolean accepted) {
addEntityFeatures(this.entityFeaturesS3, query, wid, features, accepted);
}
private ImmutableTriple<Integer, HashMap<String, Double>, Boolean> addEntityFeatures(
HashMap<String, List<Triple<Integer, HashMap<String, Double>, Boolean>>> source,
String query, int wid, HashMap<String, Double> features,
boolean accepted) {
if (!source.containsKey(query))
source.put(
query,
new Vector<Triple<Integer, HashMap<String, Double>, Boolean>>());
ImmutableTriple<Integer, HashMap<String, Double>, Boolean> ftrTriple = new ImmutableTriple<>(
wid, features, accepted);
source.get(query).add(ftrTriple);
return ftrTriple;
}
private JSONArray getEntityFeatures(
HashMap<String, List<Triple<Integer, HashMap<String, Double>, Boolean>>> source,
String query, WikipediaApiInterface wikiApi) throws JSONException,
IOException {
JSONArray res = new JSONArray();
if (source.containsKey(query))
for (Triple<Integer, HashMap<String, Double>, Boolean> p : source
.get(query)) {
JSONObject pairJs = new JSONObject();
res.put(pairJs);
String bold = ftrToBoldS1.get(query).get(p);
if (bold != null)
pairJs.put("bold", bold);
pairJs.put("wid", p.getLeft());
pairJs.put("title", wikiApi.getTitlebyId(p.getLeft()));
pairJs.put("url", widToUrl(p.getLeft(), wikiApi));
JSONObject features = new JSONObject();
pairJs.put("features", features);
for (String ftrName : LibSvmEntityFilter.ftrNames)
features.put(ftrName, p.getMiddle().get(ftrName));
pairJs.put("accepted", p.getRight());
}
return res;
}
private void addSourceSearchResult(
HashMap<String, List<Triple<Integer, String, Integer>>> source,
String query, HashMap<Integer, Integer> rankToIdNS,
List<String> urls) {
if (!source.containsKey(query))
source.put(query, new Vector<Triple<Integer, String, Integer>>());
for (int i = 0; i < urls.size(); i++)
source.get(query).add(
new ImmutableTriple<>(i, urls.get(i), rankToIdNS
.containsKey(i) ? rankToIdNS.get(i) : -1));
}
public void addSource2SearchResult(String query,
HashMap<Integer, Integer> rankToIdNS, List<String> urls) {
addSourceSearchResult(source2SearchResult, query, rankToIdNS, urls);
}
public void addSource3SearchResult(String query,
HashMap<Integer, Integer> rankToIdWS, List<String> urls) {
addSourceSearchResult(source3SearchResult, query, rankToIdWS, urls);
}
public JSONArray getSourceSearchResult(
HashMap<String, List<Triple<Integer, String, Integer>>> source,
String query, WikipediaApiInterface wikiApi) throws JSONException,
IOException {
JSONArray res = new JSONArray();
for (Triple<Integer, String, Integer> t : source.get(query)) {
JSONObject triple = new JSONObject();
res.put(triple);
triple.put("rank", t.getLeft());
triple.put("wid", t.getRight());
triple.put("title",
t.getRight() >= 0 ? wikiApi.getTitlebyId(t.getRight())
: "---not a wikipedia page---");
triple.put("url", t.getMiddle());
}
return res;
}
public void addResult(String query, int wid) {
if (!this.result.containsKey(query))
this.result.put(query, new HashSet<Integer>());
this.result.get(query).add(wid);
}
private JSONArray getResults(String query, WikipediaApiInterface wikiApi)
throws JSONException, IOException {
JSONArray res = new JSONArray();
if (result.containsKey(query))
for (Integer wid : result.get(query)) {
JSONObject triple = new JSONObject();
res.put(triple);
triple.put("wid", wid);
triple.put("title", wikiApi.getTitlebyId(wid));
triple.put("url", widToUrl(wid, wikiApi));
}
return res;
}
public JSONObject toJson(WikipediaApiInterface wikiApi)
throws JSONException, IOException {
JSONObject dump = new JSONObject();
for (String query : processedQueries) {
JSONObject queryData = new JSONObject();
dump.put(query, queryData);
JSONObject phase1 = new JSONObject();
JSONObject phase1S1 = new JSONObject();
JSONObject phase1S2 = new JSONObject();
JSONObject phase1S3 = new JSONObject();
queryData.put("bingResponseNS", getBingResponseNormalSearch(query));
queryData.put("bingResponseWS", getBingResponseWikiSearch(query));
queryData.put("phase1", phase1);
phase1.put("source1", phase1S1);
phase1.put("source2", phase1S2);
phase1.put("source3", phase1S3);
/** Populate phase1 - source1 */
phase1S1.put("bolds", getBoldPositionEditDistance(query));
phase1S1.put("snippets", getSnippets(query));
phase1S1.put("filteredBolds", getBoldFilterOutput(query));
phase1S1.put("annotations", getReturnedAnnotations(query, wikiApi));
phase1S1.put("entityFeatures",
getEntityFeatures(this.entityFeaturesS1, query, wikiApi));
/** Populate phase1 - source2 */
phase1S2.put("pages",
getSourceSearchResult(source2SearchResult, query, wikiApi));
phase1S2.put("entityFeatures",
getEntityFeatures(this.entityFeaturesS2, query, wikiApi));
/** Populate phase1 - source3 */
phase1S3.put("pages",
getSourceSearchResult(source3SearchResult, query, wikiApi));
phase1S3.put("entityFeatures",
getEntityFeatures(this.entityFeaturesS3, query, wikiApi));
/** Populate results */
queryData.put("results", getResults(query, wikiApi));
}
return dump;
}
}