package org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.util.PriorityQueue; /** * A base class for all collectors that return a {@link TopDocs} output. This * collector allows easy extension by providing a single constructor which * accepts a {@link PriorityQueue} as well as protected members for that * priority queue and a counter of the number of total hits.<br> * Extending classes can override any of the methods to provide their own * implementation, as well as avoid the use of the priority queue entirely by * passing null to {@link #TopDocsCollector(PriorityQueue)}. In that case * however, you might want to consider overriding all methods, in order to avoid * a NullPointerException. */ public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector { // This is used in case topDocs() is called with illegal parameters, or there // simply aren't (enough) results. protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN); /** * The priority queue which holds the top documents. Note that different * implementations of PriorityQueue give different meaning to 'top documents'. * HitQueue for example aggregates the top scoring documents, while other PQ * implementations may hold documents sorted by other criteria. */ protected PriorityQueue<T> pq; /** The total number of documents that the collector encountered. */ protected int totalHits; protected TopDocsCollector(PriorityQueue<T> pq) { this.pq = pq; } /** * Populates the results array with the ScoreDoc instances. This can be * overridden in case a different ScoreDoc type should be returned. */ protected void populateResults(ScoreDoc[] results, int howMany) { for (int i = howMany - 1; i >= 0; i--) { results[i] = pq.pop(); } } /** * Returns a {@link TopDocs} instance containing the given results. If * <code>results</code> is null it means there are no results to return, * either because there were 0 calls to collect() or because the arguments to * topDocs were invalid. */ protected TopDocs newTopDocs(ScoreDoc[] results, int start) { return results == null ? EMPTY_TOPDOCS : new TopDocs(totalHits, results); } /** The total number of documents that matched this query. */ public int getTotalHits() { return totalHits; } /** Returns the top docs that were collected by this collector. */ public TopDocs topDocs() { // In case pq was populated with sentinel values, there might be less // results than pq.size(). Therefore return all results until either // pq.size() or totalHits. return topDocs(0, totalHits < pq.size() ? totalHits : pq.size()); } /** * Returns the documents in the rage [start .. pq.size()) that were collected * by this collector. Note that if start >= pq.size(), an empty TopDocs is * returned.<br> * This method is convenient to call if the application always asks for the * last results, starting from the last 'page'.<br> * <b>NOTE:</b> you cannot call this method more than once for each search * execution. If you need to call it more than once, passing each time a * different <code>start</code>, you should call {@link #topDocs()} and work * with the returned {@link TopDocs} object, which will contain all the * results this search execution collected. */ public TopDocs topDocs(int start) { // In case pq was populated with sentinel values, there might be less // results than pq.size(). Therefore return all results until either // pq.size() or totalHits. return topDocs(start, totalHits < pq.size() ? totalHits : pq.size()); } /** * Returns the documents in the rage [start .. start+howMany) that were * collected by this collector. Note that if start >= pq.size(), an empty * TopDocs is returned, and if pq.size() - start < howMany, then only the * available documents in [start .. pq.size()) are returned.<br> * This method is useful to call in case pagination of search results is * allowed by the search application, as well as it attempts to optimize the * memory used by allocating only as much as requested by howMany.<br> * <b>NOTE:</b> you cannot call this method more than once for each search * execution. If you need to call it more than once, passing each time a * different range, you should call {@link #topDocs()} and work with the * returned {@link TopDocs} object, which will contain all the results this * search execution collected. */ public TopDocs topDocs(int start, int howMany) { // In case pq was populated with sentinel values, there might be less // results than pq.size(). Therefore return all results until either // pq.size() or totalHits. int size = totalHits < pq.size() ? totalHits : pq.size(); // Don't bother to throw an exception, just return an empty TopDocs in case // the parameters are invalid or out of range. if (start < 0 || start >= size || howMany <= 0) { return newTopDocs(null, start); } // We know that start < pqsize, so just fix howMany. howMany = Math.min(size - start, howMany); ScoreDoc[] results = new ScoreDoc[howMany]; // pq's pop() returns the 'least' element in the queue, therefore need // to discard the first ones, until we reach the requested range. // Note that this loop will usually not be executed, since the common usage // should be that the caller asks for the last howMany results. However it's // needed here for completeness. for (int i = pq.size() - start - howMany; i > 0; i--) { pq.pop(); } // Get the requested results from pq. populateResults(results, howMany); return newTopDocs(results, start); } }