package doser.entitydisambiguation.algorithms.collective.dbpedia;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import com.hp.hpl.jena.query.QueryException;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import doser.entitydisambiguation.algorithms.SurfaceForm;
import doser.entitydisambiguation.backend.DisambiguationMainService;
import doser.entitydisambiguation.knowledgebases.EntityCentricKBDBpedia;
class TimeNumberDisambiguation {
private static final HashMap<String, String> TIMEANDNUMBERS = new HashMap<String, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
{
put("monday", "http://dbpedia.org/resource/Monday");
put("tuesday", "http://dbpedia.org/resource/Tuesday");
put("wednesday", "http://dbpedia.org/resource/Wednesday");
put("thursday", "http://dbpedia.org/resource/Thursday");
put("friday", "http://dbpedia.org/resource/Friday");
put("saturday", "http://dbpedia.org/resource/Saturday");
put("sunday", "http://dbpedia.org/resource/Sunday");
put("one", "http://dbpedia.org/resource/1_(number)");
put("two", "http://dbpedia.org/resource/2_(number)");
put("three", "http://dbpedia.org/resource/3_(number)");
put("four", "http://dbpedia.org/resource/4_(number)");
put("five", "http://dbpedia.org/resource/5_(number)");
put("six", "http://dbpedia.org/resource/6_(number)");
put("seven", "http://dbpedia.org/resource/7(number)");
put("eight", "http://dbpedia.org/resource/8_(number)");
put("nine", "http://dbpedia.org/resource/9_(number)");
put("ten", "http://dbpedia.org/resource/10_(number)");
put("eleven", "http://dbpedia.org/resource/11_(number)");
put("twelve", "http://dbpedia.org/resource/12_(number)");
put("thirteen", "http://dbpedia.org/resource/13_(number)");
put("fourteen", "http://dbpedia.org/resource/14_(number)");
put("fifteen", "http://dbpedia.org/resource/15_(number)");
put("sixteen", "http://dbpedia.org/resource/16_(number)");
put("seventeen", "http://dbpedia.org/resource/17_(number)");
put("eighteen", "http://dbpedia.org/resource/18_(number)");
put("nineteen", "http://dbpedia.org/resource/19_(number)");
put("twenty", "http://dbpedia.org/resource/20_(number)");
put("thirty", "http://dbpedia.org/resource/30_(number)");
put("forty", "http://dbpedia.org/resource/40_(number)");
put("fifty", "http://dbpedia.org/resource/50_(number)");
put("sixty", "http://dbpedia.org/resource/60_(number)");
put("seventy", "http://dbpedia.org/resource/70_(number)");
put("eighty", "http://dbpedia.org/resource/80_(number)");
put("ninety", "http://dbpedia.org/resource/90_(number)");
put("hundred", "http://dbpedia.org/resource/100_(number)");
put("year", "http://dbpedia.org/resource/Year");
put("years", "http://dbpedia.org/resource/Year");
put("january", "http://dbpedia.org/resource/January");
put("february", "http://dbpedia.org/resource/February");
put("march", "http://dbpedia.org/resource/March");
put("april", "http://dbpedia.org/resource/April");
put("may", "http://dbpedia.org/resource/May");
put("june", "http://dbpedia.org/resource/June");
put("july", "http://dbpedia.org/resource/July");
put("august", "http://dbpedia.org/resource/August");
put("september", "http://dbpedia.org/resource/September");
put("october", "http://dbpedia.org/resource/October");
put("november", "http://dbpedia.org/resource/November");
put("december", "http://dbpedia.org/resource/December");
put("mile", "http://dbpedia.org/resource/Mile");
put("miles", "http://dbpedia.org/resource/Mile");
put("hour", "http://dbpedia.org/resource/Hour");
put("hours", "http://dbpedia.org/resource/Hour");
put("second", "http://dbpedia.org/resource/Second");
put("week", "http://dbpedia.org/resource/Week");
put("weeks", "http://dbpedia.org/resource/Week");
put("socialist party", "http://dbpedia.org/resource/Socialist_Party_of_Serbia");
}
};
private EntityCentricKBDBpedia eckb;
public TimeNumberDisambiguation(EntityCentricKBDBpedia eckb) {
super();
this.eckb = eckb;
}
void solve(List<SurfaceForm> reps) {
for (SurfaceForm sf : reps) {
String s = sf.getSurfaceForm().toLowerCase();
String redirect = null;
if (TIMEANDNUMBERS.containsKey(s)) {
sf.setDisambiguatedEntity(TIMEANDNUMBERS.get(s));
} else if (isInteger(s, 10)) {
String url = "http://dbpedia.org/resource/" + s + "_(number)";
if (isInIndex(url)) {
sf.setDisambiguatedEntity(url);
} else if ((redirect = getRedirect(url)) != null) {
sf.setDisambiguatedEntity(redirect);
}
}
}
}
private static boolean isInteger(String s, int radix) {
if (s.isEmpty())
return false;
for (int i = 0; i < s.length(); i++) {
if (i == 0 && s.charAt(i) == '-') {
if (s.length() == 1)
return false;
else
continue;
}
if (Character.digit(s.charAt(i), radix) < 0)
return false;
}
return true;
}
private boolean isInIndex(String url) {
IndexSearcher searcher = this.eckb.getSearcher();
Query query = new TermQuery(new Term("Mainlink", url));
try {
TopDocs topdocs = searcher.search(query, 1);
ScoreDoc[] scoredoc = topdocs.scoreDocs;
if (scoredoc.length > 0) {
return true;
}
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
private String getRedirect(String uri) {
final Model model = DisambiguationMainService.getInstance().getDBpediaRedirects();
final String query = "SELECT ?label WHERE{ <" + uri
+ "> <http://dbpedia.org/ontology/wikiPageRedirects> ?label. }";
ResultSet results = null;
QueryExecution qexec = null;
String redirect = null;
try {
final com.hp.hpl.jena.query.Query cquery = QueryFactory.create(query);
qexec = QueryExecutionFactory.create(cquery, model);
results = qexec.execSelect();
} catch (final QueryException e) {
Logger.getRootLogger().error(e.getStackTrace());
} finally {
if (results.hasNext()) {
final QuerySolution sol = results.nextSolution();
redirect = sol.getResource("label").getURI();
}
}
return redirect;
}
}