package doser.entitydisambiguation.algorithms.rules; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.similarities.DefaultSimilarity; import doser.entitydisambiguation.algorithms.SurfaceForm; import doser.entitydisambiguation.knowledgebases.AbstractKnowledgeBase; import doser.lucene.features.LuceneFeatures; import doser.lucene.query.LearnToRankClause; import doser.lucene.query.LearnToRankQuery; import doser.tools.Inflector; /** * Überprüft ob eine surface form im plural angegeben ist und falls ja überprüfe * den singular * * @author stefan * */ class NoCandidatesCheckPlural extends AbstractRule { NoCandidatesCheckPlural(AbstractKnowledgeBase eckb) { super(eckb); } @Override public boolean applyRule(List<SurfaceForm> rep) { for (SurfaceForm r : rep) { if (r.getCandidates().size() == 0) { String sf = r.getSurfaceForm(); String singular = Inflector.getInstance().singularize(sf); if (!sf.equalsIgnoreCase(singular)) { // Try singular search ArrayList<String> lst = queryLucene(singular); if (lst.size() != 0) { r.setCandidates(lst); } } } } return false; } private ArrayList<String> queryLucene(String surfaceForm) { ArrayList<String> list = new ArrayList<String>(); final IndexSearcher searcher = eckb.getSearcher(); final IndexReader reader = searcher.getIndexReader(); LearnToRankQuery query = new LearnToRankQuery(); List<LearnToRankClause> features = new LinkedList<LearnToRankClause>(); DefaultSimilarity defaultSim = new DefaultSimilarity(); features.add(query.add(LuceneFeatures.queryLabelTerm(surfaceForm, "UniqueLabel", defaultSim), "Feature1", true)); try { final TopDocs top = searcher.search(query, 150); final ScoreDoc[] score = top.scoreDocs; if (score.length <= 5) { for (int i = 0; i < score.length; ++i) { final Document doc = reader.document(score[i].doc); list.add(doc.get("Mainlink")); } } } catch (IOException e) { e.printStackTrace(); } return list; } }