package edu.uncc.cs.watsonsim.search; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import edu.uncc.cs.watsonsim.Environment; import edu.uncc.cs.watsonsim.Passage; import edu.uncc.cs.watsonsim.Question; import edu.uncc.cs.watsonsim.Score; import edu.uncc.cs.watsonsim.StringUtils; import edu.uncc.cs.watsonsim.scorers.Merge; import lemurproject.indri.QueryAnnotation; import lemurproject.indri.QueryEnvironment; import lemurproject.indri.ScoredExtentResult; /** * * @author Phani Rahul */ public class IndriSearcher extends Searcher { private final QueryEnvironment q = new QueryEnvironment(); private boolean enabled = true; private final Logger log = Logger.getLogger(getClass()); private final boolean strict; /** * Setup the Indri Query Environment. * The "indri_index" property is the Indri index path * @param config The configuration Properties */ public IndriSearcher(Environment env, boolean strict) { super(env); this.strict = strict; if (env.getConfOrDie("indri_enabled") == "false") { enabled = false; } else { try { q.addIndex(env.getConfOrDie("indri_index")); } catch (Exception e) { System.out.println("Setting up the Indri index failed." + " Is the index in the correct location?" + " Is indri_jni included?"); e.printStackTrace(); enabled=false; } } Score.register("INDRI_ANSWER_SCORE", -1, Merge.Mean); Score.register("INDRI_ANSWER_RANK", -1, Merge.Mean); Score.register("INDRI_ANSWER_PRESENT", 0.0, Merge.Sum); } public List<Passage> query(Question question){ if (!enabled) return new ArrayList<>(); // Develop the query String query = q.reformulateQuery(StringUtils.sanitize( question.getCategory() + " " + question.text )); if (strict) query = query.replaceAll("#combine", "#uw"); log.info("Executing query " + query); ScoredExtentResult[] ser; QueryAnnotation aq; // Fetch all titles, texts String[] docnos; try { aq = q.runAnnotatedQuery(query, MAX_RESULTS); ser = aq.getResults(); docnos = q.documentMetadata(ser, "docno"); } catch (Exception e) { // If any other step fails, give a more general message but don't die. System.out.println("Querying Indri failed. Is the index in the correct location? Is indri_jni included?"); e.printStackTrace(); return new ArrayList<>(); } // Compile them into a uniform format List<Passage> results = new ArrayList<Passage>(); for (int i=0; i<ser.length; i++) { results.add(new Passage( "indri", // Engine "", // Title "", // Full Text docnos[i]) // Reference .score("INDRI_ANSWER_RANK", (double) i) .score("INDRI_ANSWER_SCORE", ser[i].score) .score("INDRI_ANSWER_PRESENT", 1.0)); } return fillFromSources(results); } }