/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.byTask.tasks; import java.io.IOException; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; /** * Read index (abstract) task. * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve() * methods to configure the actual action. * <p>Note: All ReadTasks reuse the reader if it is already open. * Otherwise a reader is opened at start and closed at the end. * <p> * The <code>search.num.hits</code> config parameter sets * the top number of hits to collect during searching. If * <code>print.hits.field</code> is set, then each hit is * printed along with the value of that field.</p> * * <p>Other side effects: none. */ public abstract class ReadTask extends PerfTask { private final QueryMaker queryMaker; public ReadTask(PerfRunData runData) { super(runData); if (withSearch()) { queryMaker = getQueryMaker(); } else { queryMaker = null; } } @Override public int doLogic() throws Exception { int res = 0; // open reader or use existing one IndexSearcher searcher = getRunData().getIndexSearcher(); // (will incRef the reader) IndexReader reader; final boolean closeSearcher; if (searcher == null) { // open our own reader Directory dir = getRunData().getDirectory(); reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); closeSearcher = true; } else { // use existing one; this passes +1 ref to us reader = searcher.getIndexReader(); closeSearcher = false; } // optionally warm and add num docs traversed to count if (withWarm()) { Document doc = null; Bits liveDocs = MultiFields.getLiveDocs(reader); for (int m = 0; m < reader.maxDoc(); m++) { if (null == liveDocs || liveDocs.get(m)) { doc = reader.document(m); res += (doc == null ? 0 : 1); } } } if (withSearch()) { res++; Query q = queryMaker.makeQuery(); Sort sort = getSort(); TopDocs hits = null; final int numHits = numHits(); if (numHits > 0) { if (withCollector() == false) { if (sort != null) { // TODO: instead of always passing false we // should detect based on the query; if we make // the IndexSearcher search methods that take // Weight public again, we can go back to // pulling the Weight ourselves: TopFieldCollector collector = TopFieldCollector.create(sort, numHits, true, withScore(), withMaxScore()); searcher.search(q, collector); hits = collector.topDocs(); } else { hits = searcher.search(q, numHits); } } else { Collector collector = createCollector(); searcher.search(q, collector); //hits = collector.topDocs(); } if (hits != null) { final String printHitsField = getRunData().getConfig().get("print.hits.field", null); if (printHitsField != null && printHitsField.length() > 0) { System.out.println("totalHits = " + hits.totalHits); System.out.println("maxDoc() = " + reader.maxDoc()); System.out.println("numDocs() = " + reader.numDocs()); for(int i=0;i<hits.scoreDocs.length;i++) { final int docID = hits.scoreDocs[i].doc; final Document doc = reader.document(docID); System.out.println(" " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField)); } } res += withTopDocs(searcher, q, hits); } } } if (closeSearcher) { reader.close(); } else { // Release our +1 ref from above reader.decRef(); } return res; } protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception { IndexReader reader = searcher.getIndexReader(); int res = 0; if (withTraverse()) { final ScoreDoc[] scoreDocs = hits.scoreDocs; int traversalSize = Math.min(scoreDocs.length, traversalSize()); if (traversalSize > 0) { boolean retrieve = withRetrieve(); for (int m = 0; m < traversalSize; m++) { int id = scoreDocs[m].doc; res++; if (retrieve) { Document document = retrieveDoc(reader, id); res += document != null ? 1 : 0; } } } } return res; } protected Collector createCollector() throws Exception { return TopScoreDocCollector.create(numHits()); } protected Document retrieveDoc(IndexReader ir, int id) throws IOException { return ir.document(id); } /** * Return query maker used for this task. */ public abstract QueryMaker getQueryMaker(); /** * Return true if search should be performed. */ public abstract boolean withSearch(); public boolean withCollector(){ return false; } /** * Return true if warming should be performed. */ public abstract boolean withWarm(); /** * Return true if, with search, results should be traversed. */ public abstract boolean withTraverse(); /** Whether scores should be computed (only useful with * field sort) */ public boolean withScore() { return true; } /** Whether maxScores should be computed (only useful with * field sort) */ public boolean withMaxScore() { return true; } /** * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0. * <p> * Read task calculates the traversal as: Math.min(hits.length(), traversalSize()) * * @return Integer.MAX_VALUE */ public int traversalSize() { return Integer.MAX_VALUE; } static final int DEFAULT_SEARCH_NUM_HITS = 10; private int numHits; @Override public void setup() throws Exception { super.setup(); numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS); } /** * Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number * of hits that are collected during searching. Must be greater than 0. * * @return 10 by default, or search.num.hits config if set. */ public int numHits() { return numHits; } /** * Return true if, with search and results traversing, docs should be retrieved. */ public abstract boolean withRetrieve(); protected Sort getSort() { return null; } }