ReadTask.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.byTask.tasks;


import java.io.IOException;

import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;


/**
 * Read index (abstract) task.
 * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
 * methods to configure the actual action.
 * <p>Note: All ReadTasks reuse the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 * <p>
 * The <code>search.num.hits</code> config parameter sets
 * the top number of hits to collect during searching.  If
 * <code>print.hits.field</code> is set, then each hit is
 * printed along with the value of that field.</p>
 *
 * <p>Other side effects: none.
 */
public abstract class ReadTask extends PerfTask {

  private final QueryMaker queryMaker;

  public ReadTask(PerfRunData runData) {
    super(runData);
    if (withSearch()) {
      queryMaker = getQueryMaker();
    } else {
      queryMaker = null;
    }
  }
  @Override
  public int doLogic() throws Exception {
    int res = 0;

    // open reader or use existing one
    IndexSearcher searcher = getRunData().getIndexSearcher(); // (will incRef the reader)

    IndexReader reader;

    final boolean closeSearcher;
    if (searcher == null) {
      // open our own reader
      Directory dir = getRunData().getDirectory();
      reader = DirectoryReader.open(dir);
      searcher = new IndexSearcher(reader);
      closeSearcher = true;
    } else {
      // use existing one; this passes +1 ref to us
      reader = searcher.getIndexReader();
      closeSearcher = false;
    }

    // optionally warm and add num docs traversed to count
    if (withWarm()) {
      Document doc = null;
      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int m = 0; m < reader.maxDoc(); m++) {
        if (null == liveDocs || liveDocs.get(m)) {
          doc = reader.document(m);
          res += (doc == null ? 0 : 1);
        }
      }
    }

    if (withSearch()) {
      res++;
      Query q = queryMaker.makeQuery();
      Sort sort = getSort();
      TopDocs hits = null;
      final int numHits = numHits();
      if (numHits > 0) {
        if (withCollector() == false) {
          if (sort != null) {
            // TODO: instead of always passing false we
            // should detect based on the query; if we make
            // the IndexSearcher search methods that take
            // Weight public again, we can go back to
            // pulling the Weight ourselves:
            TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
                                                                   true, withScore(),
                                                                   withMaxScore());
            searcher.search(q, collector);
            hits = collector.topDocs();
          } else {
            hits = searcher.search(q, numHits);
          }
        } else {
          Collector collector = createCollector();
          searcher.search(q, collector);
          //hits = collector.topDocs();
        }

        if (hits != null) {
          final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
          if (printHitsField != null && printHitsField.length() > 0) {
            System.out.println("totalHits = " + hits.totalHits);
            System.out.println("maxDoc()  = " + reader.maxDoc());
            System.out.println("numDocs() = " + reader.numDocs());
            for(int i=0;i<hits.scoreDocs.length;i++) {
              final int docID = hits.scoreDocs[i].doc;
              final Document doc = reader.document(docID);
              System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
            }
          }

          res += withTopDocs(searcher, q, hits);
        }
      }
    }

    if (closeSearcher) {
      reader.close();
    } else {
      // Release our +1 ref from above
      reader.decRef();
    }
    return res;
  }

  protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
    IndexReader reader = searcher.getIndexReader();
    int res = 0;
    if (withTraverse()) {
      final ScoreDoc[] scoreDocs = hits.scoreDocs;
      int traversalSize = Math.min(scoreDocs.length, traversalSize());

      if (traversalSize > 0) {
        boolean retrieve = withRetrieve();
        for (int m = 0; m < traversalSize; m++) {
          int id = scoreDocs[m].doc;
          res++;
          if (retrieve) {
            Document document = retrieveDoc(reader, id);
            res += document != null ? 1 : 0;
          }
        }
      }
    }
    return res;
  }

  protected Collector createCollector() throws Exception {
    return TopScoreDocCollector.create(numHits());
  }


  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
    return ir.document(id);
  }

  /**
   * Return query maker used for this task.
   */
  public abstract QueryMaker getQueryMaker();

  /**
   * Return true if search should be performed.
   */
  public abstract boolean withSearch();

  public boolean withCollector(){
    return false;
  }
  

  /**
   * Return true if warming should be performed.
   */
  public abstract boolean withWarm();

  /**
   * Return true if, with search, results should be traversed.
   */
  public abstract boolean withTraverse();

  /** Whether scores should be computed (only useful with
   *  field sort) */
  public boolean withScore() {
    return true;
  }

  /** Whether maxScores should be computed (only useful with
   *  field sort) */
  public boolean withMaxScore() {
    return true;
  }

  /**
   * Specify the number of hits to traverse.  Tasks should override this if they want to restrict the number
   * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
   * <p>
   * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
   *
   * @return Integer.MAX_VALUE
   */
  public int traversalSize() {
    return Integer.MAX_VALUE;
  }

  static final int DEFAULT_SEARCH_NUM_HITS = 10;
  private int numHits;

  @Override
  public void setup() throws Exception {
    super.setup();
    numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
  }

  /**
   * Specify the number of hits to retrieve.  Tasks should override this if they want to restrict the number
   * of hits that are collected during searching. Must be greater than 0.
   *
   * @return 10 by default, or search.num.hits config if set.
   */
  public int numHits() {
    return numHits;
  }

  /**
   * Return true if, with search and results traversing, docs should be retrieved.
   */
  public abstract boolean withRetrieve();

  protected Sort getSort() {
    return null;
  }

}