package org.apache.lucene.sandbox.queries; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.RandomAccessOrds; import org.apache.lucene.index.SingletonSortedSetDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; /** * SortField for {@link SortedSetDocValues}. * <p> * A SortedSetDocValues contains multiple values for a field, so sorting with * this technique "selects" a value as the representative sort value for the document. * <p> * By default, the minimum value in the set is selected as the sort value, but * this can be customized. Selectors other than the default do have some limitations * (see below) to ensure that all selections happen in constant-time for performance. * <p> * Like sorting by string, this also supports sorting missing values as first or last, * via {@link #setMissingValue(Object)}. * <p> * Limitations: * <ul> * <li>Fields containing {@link Integer#MAX_VALUE} or more unique values * are unsupported. * <li>Selectors other than the default ({@link Selector#MIN}) require * optional codec support. However several codecs provided by Lucene, * including the current default codec, support this. * </ul> */ public class SortedSetSortField extends SortField { /** Selects a value from the document's set to use as the sort value */ public static enum Selector { /** * Selects the minimum value in the set */ MIN, /** * Selects the maximum value in the set */ MAX, /** * Selects the middle value in the set. * <p> * If the set has an even number of values, the lower of the middle two is chosen. */ MIDDLE_MIN, /** * Selects the middle value in the set. * <p> * If the set has an even number of values, the higher of the middle two is chosen */ MIDDLE_MAX } private final Selector selector; /** * Creates a sort, possibly in reverse, by the minimum value in the set * for the document. * @param field Name of field to sort by. Must not be null. * @param reverse True if natural order should be reversed. */ public SortedSetSortField(String field, boolean reverse) { this(field, reverse, Selector.MIN); } /** * Creates a sort, possibly in reverse, specifying how the sort value from * the document's set is selected. * @param field Name of field to sort by. Must not be null. * @param reverse True if natural order should be reversed. * @param selector custom selector for choosing the sort value from the set. * <p> * NOTE: selectors other than {@link Selector#MIN} require optional codec support. */ public SortedSetSortField(String field, boolean reverse, Selector selector) { super(field, SortField.Type.CUSTOM, reverse); if (selector == null) { throw new NullPointerException(); } this.selector = selector; } /** Returns the selector in use for this sort */ public Selector getSelector() { return selector; } @Override public int hashCode() { return 31 * super.hashCode() + selector.hashCode(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; SortedSetSortField other = (SortedSetSortField) obj; if (selector != other.selector) return false; return true; } @Override public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("<sortedset" + ": \"").append(getField()).append("\">"); if (getReverse()) buffer.append('!'); if (missingValue != null) { buffer.append(" missingValue="); buffer.append(missingValue); } buffer.append(" selector="); buffer.append(selector); return buffer.toString(); } /** * Set how missing values (the empty set) are sorted. * <p> * Note that this must be {@link #STRING_FIRST} or {@link #STRING_LAST}. */ @Override public void setMissingValue(Object missingValue) { if (missingValue != STRING_FIRST && missingValue != STRING_LAST) { throw new IllegalArgumentException("For SORTED_SET type, missing value must be either STRING_FIRST or STRING_LAST"); } this.missingValue = missingValue; } @Override public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException { return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) { @Override protected SortedDocValues getSortedDocValues(AtomicReaderContext context, String field) throws IOException { SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field); if (sortedSet.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported"); } if (sortedSet instanceof SingletonSortedSetDocValues) { // it's actually single-valued in practice, but indexed as multi-valued, // so just sort on the underlying single-valued dv directly. // regardless of selector type, this optimization is safe! return ((SingletonSortedSetDocValues) sortedSet).getSortedDocValues(); } else if (selector == Selector.MIN) { return new MinValue(sortedSet); } else { if (sortedSet instanceof RandomAccessOrds == false) { throw new UnsupportedOperationException("codec does not support random access ordinals, cannot use selector: " + selector); } RandomAccessOrds randomOrds = (RandomAccessOrds) sortedSet; switch(selector) { case MAX: return new MaxValue(randomOrds); case MIDDLE_MIN: return new MiddleMinValue(randomOrds); case MIDDLE_MAX: return new MiddleMaxValue(randomOrds); case MIN: default: throw new AssertionError(); } } } }; } /** Wraps a SortedSetDocValues and returns the first ordinal (min) */ static class MinValue extends SortedDocValues { final SortedSetDocValues in; MinValue(SortedSetDocValues in) { this.in = in; } @Override public int getOrd(int docID) { in.setDocument(docID); return (int) in.nextOrd(); } @Override public void lookupOrd(int ord, BytesRef result) { in.lookupOrd(ord, result); } @Override public int getValueCount() { return (int) in.getValueCount(); } @Override public int lookupTerm(BytesRef key) { return (int) in.lookupTerm(key); } } /** Wraps a SortedSetDocValues and returns the last ordinal (max) */ static class MaxValue extends SortedDocValues { final RandomAccessOrds in; MaxValue(RandomAccessOrds in) { this.in = in; } @Override public int getOrd(int docID) { in.setDocument(docID); final int count = in.cardinality(); if (count == 0) { return -1; } else { return (int) in.ordAt(count-1); } } @Override public void lookupOrd(int ord, BytesRef result) { in.lookupOrd(ord, result); } @Override public int getValueCount() { return (int) in.getValueCount(); } @Override public int lookupTerm(BytesRef key) { return (int) in.lookupTerm(key); } } /** Wraps a SortedSetDocValues and returns the middle ordinal (or min of the two) */ static class MiddleMinValue extends SortedDocValues { final RandomAccessOrds in; MiddleMinValue(RandomAccessOrds in) { this.in = in; } @Override public int getOrd(int docID) { in.setDocument(docID); final int count = in.cardinality(); if (count == 0) { return -1; } else { return (int) in.ordAt((count-1) >>> 1); } } @Override public void lookupOrd(int ord, BytesRef result) { in.lookupOrd(ord, result); } @Override public int getValueCount() { return (int) in.getValueCount(); } @Override public int lookupTerm(BytesRef key) { return (int) in.lookupTerm(key); } } /** Wraps a SortedSetDocValues and returns the middle ordinal (or max of the two) */ static class MiddleMaxValue extends SortedDocValues { final RandomAccessOrds in; MiddleMaxValue(RandomAccessOrds in) { this.in = in; } @Override public int getOrd(int docID) { in.setDocument(docID); final int count = in.cardinality(); if (count == 0) { return -1; } else { return (int) in.ordAt(count >>> 1); } } @Override public void lookupOrd(int ord, BytesRef result) { in.lookupOrd(ord, result); } @Override public int getValueCount() { return (int) in.getValueCount(); } @Override public int lookupTerm(BytesRef key) { return (int) in.lookupTerm(key); } } }