package doser.entitydisambiguation.algorithms; import java.io.IOException; import java.util.LinkedList; import java.util.List; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import doser.entitydisambiguation.backend.AbstractDisambiguationTask; import doser.entitydisambiguation.backend.DisambiguationTaskSingle; import doser.entitydisambiguation.dpo.DisambiguatedEntity; import doser.entitydisambiguation.dpo.EntityDisambiguationDPO; import doser.entitydisambiguation.dpo.Response; import doser.entitydisambiguation.knowledgebases.EnCenKBCStable; import doser.entitydisambiguation.knowledgebases.EntityCentricKnowledgeBase; import doser.entitydisambiguation.knowledgebases.AbstractKnowledgeBase; import doser.lucene.features.LuceneFeatures; import doser.lucene.query.LearnToRankClause; import doser.lucene.query.LearnToRankQuery; /** * Simple class which only uses sense prior für computer science tables End of * code Project to disambiguate 20 percent of table contents * * @author Quhfus * */ public class EntityCentricAlgorithmCSTable extends AbstractDisambiguationAlgorithm { private EnCenKBCStable eckb; private DisambiguationTaskSingle task; @Override public boolean checkAndSetInputParameter(AbstractDisambiguationTask task) { AbstractKnowledgeBase kb = task.getKb(); if (!(task instanceof DisambiguationTaskSingle)) { return false; } else if (!(kb instanceof EnCenKBCStable)) { return false; } this.eckb = (EnCenKBCStable) kb; this.task = (DisambiguationTaskSingle) task; return true; } @Override protected boolean preDisambiguation() { return true; } @Override public void processAlgorithm() throws IllegalDisambiguationAlgorithmInputException { final Query query = createQuery(task.getEntityToDisambiguate(), eckb); final IndexSearcher searcher = eckb.getSearcher(); final IndexReader reader = searcher.getIndexReader(); EntityDisambiguationDPO dpo = task.getEntityToDisambiguate(); try { final TopDocs top = searcher.search(query, task.getReturnNr()); final ScoreDoc[] score = top.scoreDocs; final List<DisambiguatedEntity> disList = new LinkedList<DisambiguatedEntity>(); for (int i = 0; i < score.length; i++) { final DisambiguatedEntity entity = new DisambiguatedEntity(); entity.setConfidence(score[i].score); final Document doc = reader.document(score[i].doc); final String mainLink = doc.get("mainlink"); if (score[i].score == 0.0f) { entity.setEntityUri(null); } else { entity.setEntityUri(mainLink); } entity.setText(doc.get("label")); entity.setDescription(doc.get("description")); disList.add(entity); Response response = new Response(); response.setSelectedText(dpo.getSelectedText()); response.setStartPosition(dpo.getStartPosition()); response.setDisEntities(disList); List<Response> resList = new LinkedList<Response>(); resList.add(response); task.setResponse(resList); // if (task.isRetrieveDocClasses()) { // entity.setDoc(doc); // } } } catch (IOException e) { Logger.getRootLogger().error(e.getStackTrace()); } eckb.release(); } private Query createQuery(EntityDisambiguationDPO dpo, EntityCentricKnowledgeBase kb) { LearnToRankQuery query = new LearnToRankQuery(); List<LearnToRankClause> features = new LinkedList<LearnToRankClause>(); // Feature 1 features.add(query.add( LuceneFeatures.querySensePrior(dpo.getSelectedText(), kb.getFeatureDefinition()), "Feature1", false)); features.get(0).setWeight(1f); return query; } }