/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.quality; import java.io.IOException; import java.io.PrintWriter; import org.apache.lucene.benchmark.quality.utils.DocNameExtractor; import org.apache.lucene.benchmark.quality.utils.SubmissionReport; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TopDocs; /** * Main entry point for running a quality benchmark. * <p> * There are two main configurations for running a quality benchmark: <ul> * <li>Against existing judgements.</li> * <li>For submission (e.g. for a contest).</li> * </ul> * The first configuration requires a non null * {@link org.apache.lucene.benchmark.quality.Judge Judge}. * The second configuration requires a non null * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}. */ public class QualityBenchmark { /** Quality Queries that this quality benchmark would execute. */ protected QualityQuery qualityQueries[]; /** Parser for turning QualityQueries into Lucene Queries. */ protected QualityQueryParser qqParser; /** Index to be searched. */ protected Searcher searcher; /** index field to extract doc name for each search result; used for judging the results. */ protected String docNameField; /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */ private int maxQueries = Integer.MAX_VALUE; /** maximal number of results to collect for each query. Default: 1000. */ private int maxResults = 1000; /** * Create a QualityBenchmark. * @param qqs quality queries to run. * @param qqParser parser for turning QualityQueries into Lucene Queries. * @param searcher index to be searched. * @param docNameField name of field containing the document name. * This allows to extract the doc name for search results, * and is important for judging the results. */ public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser, Searcher searcher, String docNameField) { this.qualityQueries = qqs; this.qqParser = qqParser; this.searcher = searcher; this.docNameField = docNameField; } /** * Run the quality benchmark. * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. * If null, no judgements would be made. Usually null for a submission run. * @param submitRep submission report is created if non null. * @param qualityLog If not null, quality run data would be printed for each query. * @return QualityStats of each quality query that was executed. * @throws Exception if quality benchmark failed to run. */ public QualityStats [] execute(Judge judge, SubmissionReport submitRep, PrintWriter qualityLog) throws Exception { int nQueries = Math.min(maxQueries, qualityQueries.length); QualityStats stats[] = new QualityStats[nQueries]; for (int i=0; i<nQueries; i++) { QualityQuery qq = qualityQueries[i]; // generate query Query q = qqParser.parse(qq); // search with this query long t1 = System.currentTimeMillis(); TopDocs td = searcher.search(q,null,maxResults); long searchTime = System.currentTimeMillis()-t1; //most likely we either submit or judge, but check both if (judge!=null) { stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime); } if (submitRep!=null) { submitRep.report(qq,td,docNameField,searcher); } } if (submitRep!=null) { submitRep.flush(); } return stats; } /* Analyze/judge results for a single quality query; optionally log them. */ private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException { QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime); ScoreDoc sd[] = td.scoreDocs; long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case. DocNameExtractor xt = new DocNameExtractor(docNameField); for (int i=0; i<sd.length; i++) { String docName = xt.docName(searcher,sd[i].doc); long docNameExtractTime = System.currentTimeMillis() - t1; t1 = System.currentTimeMillis(); boolean isRelevant = judge.isRelevant(docName,qq); stts.addResult(i+1,isRelevant, docNameExtractTime); } if (logger!=null) { logger.println(qq.getQueryID()+" - "+q); stts.log(qq.getQueryID()+" Stats:",1,logger," "); } return stts; } /** * @return the maximum number of quality queries to run. Useful at debugging. */ public int getMaxQueries() { return maxQueries; } /** * Set the maximum number of quality queries to run. Useful at debugging. */ public void setMaxQueries(int maxQueries) { this.maxQueries = maxQueries; } /** * @return the maximum number of results to collect for each quality query. */ public int getMaxResults() { return maxResults; } /** * set the maximum number of results to collect for each quality query. */ public void setMaxResults(int maxResults) { this.maxResults = maxResults; } }