package edu.uncc.cs.watsonsim.search; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Collections; import org.apache.lucene.document.Document; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import edu.uncc.cs.watsonsim.Environment; import edu.uncc.cs.watsonsim.Passage; import edu.uncc.cs.watsonsim.Score; import edu.uncc.cs.watsonsim.scorers.Merge; /** * @author Phani Rahul */ public class LucenePassageSearcher extends Searcher { private final IndexSearcher lucene; private final Environment env; public LucenePassageSearcher(Environment env) { super(env); this.lucene = env.lucene; this.env = env; Score.register("LUCENE_SCORE", -1, Merge.Mean); Score.register("LUCENE_RANK", -1, Merge.Mean); } public List<Passage> query(String question_text) { List<Passage> results = new ArrayList<>(); try { ScoreDoc[] hits = env.simpleLuceneQuery(question_text, MAX_RESULTS); // This isn't range based because we need the rank for (int i=0; i < hits.length; i++) { Document doc = lucene.doc(hits[i].doc, Collections.singleton("docno")); results.add(new edu.uncc.cs.watsonsim.Passage( "lucene", // Engine "", // Title "", // Text doc.get("docno")) // Reference .score("LUCENE_RANK", (double) i) // Rank .score("LUCENE_SCORE", (double) hits[i].score) // Source ); } } catch (IOException e) { System.out.println("Failed to query Lucene. Is the index in the correct location?"); e.printStackTrace(); } // Fill any missing full text from sources return fillFromSources(results); } }