package org.apache.lucene.index.sorter; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TotalHitCountCollector; /** * A {@link Collector} that early terminates collection of documents on a * per-segment basis, if the segment was sorted according to the given * {@link Sort}. * * <p> * <b>NOTE:</b> the {@code Collector} detects sorted segments according to * {@link SortingMergePolicy}, so it's best used in conjunction with it. Also, * it collects up to a specified {@code numDocsToCollect} from each segment, * and therefore is mostly suitable for use in conjunction with collectors such as * {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}. * <p> * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs} * will be correct. However the total of {@link TopDocsCollector#getTotalHits() * hit count} will be underestimated since not all matching documents will have * been collected. * <p> * <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect * whether a segment was sorted with the same {@code Sort}. This has * two implications: * <ul> * <li>if a custom comparator is not implemented correctly and returns * different identifiers for equivalent instances, this collector will not * detect sorted segments,</li> * <li>if you suddenly change the {@link IndexWriter}'s * {@code SortingMergePolicy} to sort according to another criterion and if both * the old and the new {@code Sort}s have the same identifier, this * {@code Collector} will incorrectly detect sorted segments.</li> * </ul> * * @lucene.experimental */ public class EarlyTerminatingSortingCollector extends Collector { /** The wrapped Collector */ protected final Collector in; /** Sort used to sort the search results */ protected final Sort sort; /** Number of documents to collect in each segment */ protected final int numDocsToCollect; /** Number of documents to collect in the current segment being processed */ protected int segmentTotalCollect; /** True if the current segment being processed is sorted by {@link #sort} */ protected boolean segmentSorted; private int numCollected; /** * Create a new {@link EarlyTerminatingSortingCollector} instance. * * @param in * the collector to wrap * @param sort * the sort you are sorting the search results on * @param numDocsToCollect * the number of documents to collect on each segment. When wrapping * a {@link TopDocsCollector}, this number should be the number of * hits. */ public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) { if (numDocsToCollect <= 0) { throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect); } this.in = in; this.sort = sort; this.numDocsToCollect = numDocsToCollect; } @Override public void setScorer(Scorer scorer) throws IOException { in.setScorer(scorer); } @Override public void collect(int doc) throws IOException { in.collect(doc); if (++numCollected >= segmentTotalCollect) { throw new CollectionTerminatedException(); } } @Override public void setNextReader(AtomicReaderContext context) throws IOException { in.setNextReader(context); segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort); segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE; numCollected = 0; } @Override public boolean acceptsDocsOutOfOrder() { return !segmentSorted && in.acceptsDocsOutOfOrder(); } }