package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.index.AtomicReaderContext; /** * A {@link Collector} implementation that collects the top-scoring hits, * returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to * implement {@link TopDocs}-based search. Hits are sorted by score descending * and then (when the scores are tied) docID ascending. When you create an * instance of this collector you should know in advance whether documents are * going to be collected in doc Id order or not. * * <p><b>NOTE</b>: The values {@link Float#NaN} and * {@link Float#NEGATIVE_INFINITY} are not valid scores. This * collector will not properly collect hits with such * scores. */ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> { // Assumes docs are scored in order. private static class InOrderTopScoreDocCollector extends TopScoreDocCollector { private InOrderTopScoreDocCollector(int numHits) { super(numHits); } @Override public void collect(int doc) throws IOException { float score = scorer.score(); // This collector cannot handle these scores: assert score != Float.NEGATIVE_INFINITY; assert !Float.isNaN(score); totalHits++; if (score <= pqTop.score) { // Since docs are returned in-order (i.e., increasing doc Id), a document // with equal score to pqTop.score cannot compete since HitQueue favors // documents with lower doc Ids. Therefore reject those docs too. return; } pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); } @Override public boolean acceptsDocsOutOfOrder() { return false; } } // Assumes docs are scored in order. private static class InOrderPagingScoreDocCollector extends TopScoreDocCollector { private final ScoreDoc after; // this is always after.doc - docBase, to save an add when score == after.score private int afterDoc; private int collectedHits; private InOrderPagingScoreDocCollector(ScoreDoc after, int numHits) { super(numHits); this.after = after; } @Override public void collect(int doc) throws IOException { float score = scorer.score(); // This collector cannot handle these scores: assert score != Float.NEGATIVE_INFINITY; assert !Float.isNaN(score); totalHits++; if (score > after.score || (score == after.score && doc <= afterDoc)) { // hit was collected on a previous page return; } if (score <= pqTop.score) { // Since docs are returned in-order (i.e., increasing doc Id), a document // with equal score to pqTop.score cannot compete since HitQueue favors // documents with lower doc Ids. Therefore reject those docs too. return; } collectedHits++; pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void setNextReader(AtomicReaderContext context) { super.setNextReader(context); afterDoc = after.doc - docBase; } @Override protected int topDocsSize() { return collectedHits < pq.size() ? collectedHits : pq.size(); } @Override protected TopDocs newTopDocs(ScoreDoc[] results, int start) { return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results); } } // Assumes docs are scored out of order. private static class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector { private OutOfOrderTopScoreDocCollector(int numHits) { super(numHits); } @Override public void collect(int doc) throws IOException { float score = scorer.score(); // This collector cannot handle NaN assert !Float.isNaN(score); totalHits++; if (score < pqTop.score) { // Doesn't compete w/ bottom entry in queue return; } doc += docBase; if (score == pqTop.score && doc > pqTop.doc) { // Break tie in score by doc ID: return; } pqTop.doc = doc; pqTop.score = score; pqTop = pq.updateTop(); } @Override public boolean acceptsDocsOutOfOrder() { return true; } } // Assumes docs are scored out of order. private static class OutOfOrderPagingScoreDocCollector extends TopScoreDocCollector { private final ScoreDoc after; // this is always after.doc - docBase, to save an add when score == after.score private int afterDoc; private int collectedHits; private OutOfOrderPagingScoreDocCollector(ScoreDoc after, int numHits) { super(numHits); this.after = after; } @Override public void collect(int doc) throws IOException { float score = scorer.score(); // This collector cannot handle NaN assert !Float.isNaN(score); totalHits++; if (score > after.score || (score == after.score && doc <= afterDoc)) { // hit was collected on a previous page return; } if (score < pqTop.score) { // Doesn't compete w/ bottom entry in queue return; } doc += docBase; if (score == pqTop.score && doc > pqTop.doc) { // Break tie in score by doc ID: return; } collectedHits++; pqTop.doc = doc; pqTop.score = score; pqTop = pq.updateTop(); } @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void setNextReader(AtomicReaderContext context) { super.setNextReader(context); afterDoc = after.doc - docBase; } @Override protected int topDocsSize() { return collectedHits < pq.size() ? collectedHits : pq.size(); } @Override protected TopDocs newTopDocs(ScoreDoc[] results, int start) { return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results); } } /** * Creates a new {@link TopScoreDocCollector} given the number of hits to * collect and whether documents are scored in order by the input * {@link Scorer} to {@link #setScorer(Scorer)}. * * <p><b>NOTE</b>: The instances returned by this method * pre-allocate a full array of length * <code>numHits</code>, and fill the array with sentinel * objects. */ public static TopScoreDocCollector create(int numHits, boolean docsScoredInOrder) { return create(numHits, null, docsScoredInOrder); } /** * Creates a new {@link TopScoreDocCollector} given the number of hits to * collect, the bottom of the previous page, and whether documents are scored in order by the input * {@link Scorer} to {@link #setScorer(Scorer)}. * * <p><b>NOTE</b>: The instances returned by this method * pre-allocate a full array of length * <code>numHits</code>, and fill the array with sentinel * objects. */ public static TopScoreDocCollector create(int numHits, ScoreDoc after, boolean docsScoredInOrder) { if (numHits <= 0) { throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count"); } if (docsScoredInOrder) { return after == null ? new InOrderTopScoreDocCollector(numHits) : new InOrderPagingScoreDocCollector(after, numHits); } else { return after == null ? new OutOfOrderTopScoreDocCollector(numHits) : new OutOfOrderPagingScoreDocCollector(after, numHits); } } ScoreDoc pqTop; int docBase = 0; Scorer scorer; // prevents instantiation private TopScoreDocCollector(int numHits) { super(new HitQueue(numHits, true)); // HitQueue implements getSentinelObject to return a ScoreDoc, so we know // that at this point top() is already initialized. pqTop = pq.top(); } @Override protected TopDocs newTopDocs(ScoreDoc[] results, int start) { if (results == null) { return EMPTY_TOPDOCS; } // We need to compute maxScore in order to set it in TopDocs. If start == 0, // it means the largest element is already in results, use its score as // maxScore. Otherwise pop everything else, until the largest element is // extracted and use its score as maxScore. float maxScore = Float.NaN; if (start == 0) { maxScore = results[0].score; } else { for (int i = pq.size(); i > 1; i--) { pq.pop(); } maxScore = pq.pop().score; } return new TopDocs(totalHits, results, maxScore); } @Override public void setNextReader(AtomicReaderContext context) { docBase = context.docBase; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }