/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
/**
* Sorts documents of a given index by returning a permutation on the document
* IDs.
* @lucene.experimental
*/
final class Sorter {
final Sort sort;
/** Creates a new Sorter to sort the index with {@code sort} */
Sorter(Sort sort) {
if (sort.needsScores()) {
throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
}
this.sort = sort;
}
/**
* A permutation of doc IDs. For every document ID between <tt>0</tt> and
* {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
* return <code>docID</code>.
*/
static abstract class DocMap {
/** Given a doc ID from the original index, return its ordinal in the
* sorted index. */
abstract int oldToNew(int docID);
/** Given the ordinal of a doc ID, return its doc ID in the original index. */
abstract int newToOld(int docID);
/** Return the number of documents in this map. This must be equal to the
* {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
* {@link org.apache.lucene.index.LeafReader} which is sorted. */
abstract int size();
}
/** Check consistency of a {@link DocMap}, useful for assertions. */
static boolean isConsistent(DocMap docMap) {
final int maxDoc = docMap.size();
for (int i = 0; i < maxDoc; ++i) {
final int newID = docMap.oldToNew(i);
final int oldID = docMap.newToOld(newID);
assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
if (i != oldID || newID < 0 || newID >= maxDoc) {
return false;
}
}
return true;
}
/** A comparator of doc IDs. */
static abstract class DocComparator {
/** Compare docID1 against docID2. The contract for the return value is the
* same as {@link Comparator#compare(Object, Object)}. */
public abstract int compare(int docID1, int docID2);
}
private static final class DocValueSorter extends TimSorter {
private final int[] docs;
private final Sorter.DocComparator comparator;
private final int[] tmp;
DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
super(docs.length / 64);
this.docs = docs;
this.comparator = comparator;
tmp = new int[docs.length / 64];
}
@Override
protected int compare(int i, int j) {
return comparator.compare(docs[i], docs[j]);
}
@Override
protected void swap(int i, int j) {
int tmpDoc = docs[i];
docs[i] = docs[j];
docs[j] = tmpDoc;
}
@Override
protected void copy(int src, int dest) {
docs[dest] = docs[src];
}
@Override
protected void save(int i, int len) {
System.arraycopy(docs, i, tmp, 0, len);
}
@Override
protected void restore(int i, int j) {
docs[j] = tmp[i];
}
@Override
protected int compareSaved(int i, int j) {
return comparator.compare(tmp[i], docs[j]);
}
}
/** Computes the old-to-new permutation over the given comparator. */
private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
// check if the index is sorted
boolean sorted = true;
for (int i = 1; i < maxDoc; ++i) {
if (comparator.compare(i-1, i) > 0) {
sorted = false;
break;
}
}
if (sorted) {
return null;
}
// sort doc IDs
final int[] docs = new int[maxDoc];
for (int i = 0; i < maxDoc; i++) {
docs[i] = i;
}
DocValueSorter sorter = new DocValueSorter(docs, comparator);
// It can be common to sort a reader, add docs, sort it again, ... and in
// that case timSort can save a lot of time
sorter.sort(0, docs.length); // docs is now the newToOld mapping
// The reason why we use MonotonicAppendingLongBuffer here is that it
// wastes very little memory if the index is in random order but can save
// a lot of memory if the index is already "almost" sorted
final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
for (int i = 0; i < maxDoc; ++i) {
newToOldBuilder.add(docs[i]);
}
final PackedLongValues newToOld = newToOldBuilder.build();
// invert the docs mapping:
for (int i = 0; i < maxDoc; ++i) {
docs[(int) newToOld.get(i)] = i;
} // docs is now the oldToNew mapping
final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
for (int i = 0; i < maxDoc; ++i) {
oldToNewBuilder.add(docs[i]);
}
final PackedLongValues oldToNew = oldToNewBuilder.build();
return new Sorter.DocMap() {
@Override
public int oldToNew(int docID) {
return (int) oldToNew.get(docID);
}
@Override
public int newToOld(int docID) {
return (int) newToOld.get(docID);
}
@Override
public int size() {
return maxDoc;
}
};
}
/** Returns the native sort type for {@link SortedSetSortField} and {@link SortedNumericSortField},
* {@link SortField#getType()} otherwise */
static SortField.Type getSortFieldType(SortField sortField) {
if (sortField instanceof SortedSetSortField) {
return SortField.Type.STRING;
} else if (sortField instanceof SortedNumericSortField) {
return ((SortedNumericSortField) sortField).getNumericType();
} else {
return sortField.getType();
}
}
/** Wraps a {@link SortedNumericDocValues} as a single-valued view if the field is an instance of {@link SortedNumericSortField},
* returns {@link NumericDocValues} for the field otherwise. */
static NumericDocValues getOrWrapNumeric(LeafReader reader, SortField sortField) throws IOException {
if (sortField instanceof SortedNumericSortField) {
SortedNumericSortField sf = (SortedNumericSortField) sortField;
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(reader, sf.getField()), sf.getSelector(), sf.getNumericType());
} else {
return DocValues.getNumeric(reader, sortField.getField());
}
}
/** Wraps a {@link SortedSetDocValues} as a single-valued view if the field is an instance of {@link SortedSetSortField},
* returns {@link SortedDocValues} for the field otherwise. */
static SortedDocValues getOrWrapSorted(LeafReader reader, SortField sortField) throws IOException {
if (sortField instanceof SortedSetSortField) {
SortedSetSortField sf = (SortedSetSortField) sortField;
return SortedSetSelector.wrap(DocValues.getSortedSet(reader, sf.getField()), sf.getSelector());
} else {
return DocValues.getSorted(reader, sortField.getField());
}
}
static DocComparator getDocComparator(LeafReader reader, SortField sortField) throws IOException {
return getDocComparator(reader.maxDoc(), sortField,
() -> getOrWrapSorted(reader, sortField),
() -> getOrWrapNumeric(reader, sortField));
}
interface NumericDocValuesSupplier {
NumericDocValues get() throws IOException;
}
interface SortedDocValuesSupplier {
SortedDocValues get() throws IOException;
}
/** We cannot use the {@link FieldComparator} API because that API requires that you send it docIDs in order. Note that this API
* allocates arrays[maxDoc] to hold the native values needed for comparison, but 1) they are transient (only alive while sorting this one
* segment), and 2) in the typical index sorting case, they are only used to sort newly flushed segments, which will be smaller than
* merged segments. */
static DocComparator getDocComparator(int maxDoc,
SortField sortField,
SortedDocValuesSupplier sortedProvider,
NumericDocValuesSupplier numericProvider) throws IOException {
final int reverseMul = sortField.getReverse() ? -1 : 1;
final SortField.Type sortType = getSortFieldType(sortField);
switch(sortType) {
case STRING:
{
final SortedDocValues sorted = sortedProvider.get();
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = Integer.MAX_VALUE;
} else {
missingOrd = Integer.MIN_VALUE;
}
final int[] ords = new int[maxDoc];
Arrays.fill(ords, missingOrd);
int docID;
while ((docID = sorted.nextDoc()) != NO_MORE_DOCS) {
ords[docID] = sorted.ordValue();
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
return reverseMul * Integer.compare(ords[docID1], ords[docID2]);
}
};
}
case LONG:
{
final NumericDocValues dvs = numericProvider.get();
long[] values = new long[maxDoc];
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Long) sortField.getMissingValue());
}
while (true) {
int docID = dvs.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
values[docID] = dvs.longValue();
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
return reverseMul * Long.compare(values[docID1], values[docID2]);
}
};
}
case INT:
{
final NumericDocValues dvs = numericProvider.get();
int[] values = new int[maxDoc];
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Integer) sortField.getMissingValue());
}
while (true) {
int docID = dvs.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
values[docID] = (int) dvs.longValue();
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
return reverseMul * Integer.compare(values[docID1], values[docID2]);
}
};
}
case DOUBLE:
{
final NumericDocValues dvs = numericProvider.get();
double[] values = new double[maxDoc];
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Double) sortField.getMissingValue());
}
while (true) {
int docID = dvs.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
values[docID] = Double.longBitsToDouble(dvs.longValue());
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
return reverseMul * Double.compare(values[docID1], values[docID2]);
}
};
}
case FLOAT:
{
final NumericDocValues dvs = numericProvider.get();
float[] values = new float[maxDoc];
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Float) sortField.getMissingValue());
}
while (true) {
int docID = dvs.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
values[docID] = Float.intBitsToFloat((int) dvs.longValue());
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
return reverseMul * Float.compare(values[docID1], values[docID2]);
}
};
}
default:
throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
}
}
/**
* Returns a mapping from the old document ID to its new location in the
* sorted index. Implementations can use the auxiliary
* {@link #sort(int, DocComparator)} to compute the old-to-new permutation
* given a list of documents and their corresponding values.
* <p>
* A return value of <tt>null</tt> is allowed and means that
* <code>reader</code> is already sorted.
* <p>
* <b>NOTE:</b> deleted documents are expected to appear in the mapping as
* well, they will however be marked as deleted in the sorted view.
*/
DocMap sort(LeafReader reader) throws IOException {
SortField fields[] = sort.getSort();
final DocComparator comparators[] = new DocComparator[fields.length];
for (int i = 0; i < fields.length; i++) {
comparators[i] = getDocComparator(reader, fields[i]);
}
return sort(reader.maxDoc(), comparators);
}
DocMap sort(int maxDoc, DocComparator[] comparators) throws IOException {
final DocComparator comparator = new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
for (int i = 0; i < comparators.length; i++) {
int comp = comparators[i].compare(docID1, docID2);
if (comp != 0) {
return comp;
}
}
return Integer.compare(docID1, docID2); // docid order tiebreak
}
};
return sort(maxDoc, comparator);
}
/**
* Returns the identifier of this {@link Sorter}.
* <p>This identifier is similar to {@link Object#hashCode()} and should be
* chosen so that two instances of this class that sort documents likewise
* will have the same identifier. On the contrary, this identifier should be
* different on different {@link Sort sorts}.
*/
public String getID() {
return sort.toString();
}
@Override
public String toString() {
return getID();
}
static final Scorer FAKESCORER = new Scorer(null) {
float score;
int doc = -1;
int freq = 1;
@Override
public int docID() {
return doc;
}
public DocIdSetIterator iterator() {
throw new UnsupportedOperationException();
}
@Override
public int freq() throws IOException {
return freq;
}
@Override
public float score() throws IOException {
return score;
}
};
}