package doser.entitydisambiguation.algorithms.collective.dbpedia;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import doser.entitydisambiguation.algorithms.SurfaceForm;
import doser.entitydisambiguation.knowledgebases.EntityCentricKBDBpedia;
import doser.lucene.query.TermQuery;
public class TableColumnFilter {
private EntityCentricKBDBpedia eckb;
private String topic;
TableColumnFilter(EntityCentricKBDBpedia eckb, String topic) {
super();
this.eckb = eckb;
this.topic = topic;
}
public void filter(List<SurfaceForm> reps) {
for (SurfaceForm sf : reps) {
List<String> candidates = sf.getCandidates();
if (candidates.size() > 0) {
String s = performLuceneQuery(candidates, topic);
if (s != null) {
sf.setDisambiguatedEntity(s);
}
}
}
}
private String performLuceneQuery(List<String> candidates, String topic) {
String result = null;
IndexSearcher searcher = eckb.getSearcher();
IndexReader reader = searcher.getIndexReader();
BooleanQuery candidateq = new BooleanQuery();
for (String can : candidates) {
candidateq.add(new TermQuery(new Term("Mainlink", can)), Occur.SHOULD);
}
BooleanQuery q = new BooleanQuery();
q.add(candidateq, Occur.MUST);
q.add(new TermQuery(new Term("LongDescription", topic)), Occur.MUST);
TopDocs t = null;
try {
t = searcher.search(q, candidates.size());
} catch (IOException e) {
e.printStackTrace();
}
if (t != null) {
ScoreDoc[] scoredocs = t.scoreDocs;
if (scoredocs.length == 1) {
try {
result = reader.document(scoredocs[0].doc).get("Mainlink");
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
}