/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.search.node; import java.io.IOException; import java.util.Comparator; import java.util.LinkedList; import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.document.FieldType.NumericType; import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.ToStringUtils; /** * A {@link NodePrimitiveQuery} that matches numeric values within a * specified range. * * <p> * * To use this, you must first index the * numeric values using {@link NumericField} (expert: {@link * NumericTokenStream}). If your terms are instead textual, * you should use {@link NodeTermRangeQuery}.</p> * * <p>You create a new {@link NodeNumericRangeQuery} with the static * factory methods, eg: * * <pre> * Query q = NodeNumericRangeQuery.newFloatRange("weight", 0.03f, 0.10f, true, true); * </pre> * * matches all documents whose float valued "weight" field * ranges from 0.03 to 0.10, inclusive. * * <p>The performance of {@link NodeNumericRangeQuery} is much better * than the corresponding {@link NodeTermRangeQuery} because the * number of terms that must be searched is usually far * fewer, thanks to trie indexing, described below.</p> * * <p>You can optionally specify a <a * href="#precisionStepDesc"><code>precisionStep</code></a> * when creating this query. This is necessary if you've * changed this configuration from its default (4) during * indexing. Lower values consume more disk space but speed * up searching. Suitable values are between <b>1</b> and * <b>8</b>. A good starting point to test is <b>4</b>, * which is the default value for all <code>Numeric*</code> * classes. See <a href="#precisionStepDesc">below</a> for * details. * * <p>This query defaults to {@linkplain * MultiNodeTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} and never relies on * uses {@linkplain MultiNodeTermQuery#CONSTANT_SCORE_FILTER_REWRITE}. Good * performance is expected for * 32 bit (int/float) ranges with precisionStep ≤8 and 64 * bit (long/double) ranges with precisionStep ≤6. * In the other cases, bad performance has to be expected as the * number of terms is likely to be high. * * <p> See {@link NumericRangeQuery} for more information on how it works. * * <p> Code taken from {@link NumericRangeQuery} and adapted for SIREn. **/ public final class NodeNumericRangeQuery<T extends Number> extends MultiNodeTermQuery { private final String pstepDatatype; private NodeNumericRangeQuery(final String field, final int precisionStep, final NumericType dataType, final T min, final T max, final boolean minInclusive, final boolean maxInclusive) { super(field); if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); this.precisionStep = precisionStep; this.min = min; this.max = max; this.minInclusive = minInclusive; this.maxInclusive = maxInclusive; this.datatype = dataType; pstepDatatype = dataType.toString() + precisionStep; // For bigger precisionSteps this query likely // hits too many terms, so set to CONSTANT_SCORE_FILTER right off // (especially as the FilteredTermsEnum is costly if wasted only for AUTO tests because it // creates new enums from IndexReader for each sub-range) switch (dataType) { case LONG: case DOUBLE: // TODO: to uncomment when filter-based rewrite method is implemented // this.setRewriteMethod( (precisionStep > 6) ? // CONSTANT_SCORE_FILTER_REWRITE : // CONSTANT_SCORE_AUTO_REWRITE_DEFAULT // ); this.setRewriteMethod(CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); break; case INT: case FLOAT: // TODO: to uncomment when filter-based rewrite method is implemented // this.setRewriteMethod( (precisionStep > 8) ? // CONSTANT_SCORE_FILTER_REWRITE : // CONSTANT_SCORE_AUTO_REWRITE_DEFAULT // ); this.setRewriteMethod(CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); break; default: // should never happen throw new IllegalArgumentException("Invalid numeric NumericType"); } // shortcut if upper bound == lower bound if (min != null && min.equals(max)) { this.setRewriteMethod(CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); } } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>long</code> * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>. * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Long> newLongRange(final String field, final int precisionStep, final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Long>(field, precisionStep, NumericType.LONG, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>long</code> * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Long> newLongRange(final String field, final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Long>(field, NumericUtils.PRECISION_STEP_DEFAULT, NumericType.LONG, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>int</code> * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>. * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Integer> newIntRange(final String field, final int precisionStep, final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Integer>(field, precisionStep, NumericType.INT, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>int</code> * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Integer> newIntRange(final String field, final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Integer>(field, NumericUtils.PRECISION_STEP_DEFAULT, NumericType.INT, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>double</code> * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>. * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Double> newDoubleRange(final String field, final int precisionStep, final Double min, final Double max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Double>(field, precisionStep, NumericType.DOUBLE, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>double</code> * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Double> newDoubleRange(final String field, final Double min, final Double max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Double>(field, NumericUtils.PRECISION_STEP_DEFAULT, NumericType.DOUBLE, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>float</code> * range using the given <a href="#precisionStepDesc"><code>precisionStep</code></a>. * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Float> newFloatRange(final String field, final int precisionStep, final Float min, final Float max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Float>(field, precisionStep, NumericType.FLOAT, min, max, minInclusive, maxInclusive); } /** * Factory that creates a <code>SirenNumericRangeQuery</code>, that queries a <code>float</code> * range using the default <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to <code>null</code>. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. */ public static NodeNumericRangeQuery<Float> newFloatRange(final String field, final Float min, final Float max, final boolean minInclusive, final boolean maxInclusive) { return new NodeNumericRangeQuery<Float>(field, NumericUtils.PRECISION_STEP_DEFAULT, NumericType.FLOAT, min, max, minInclusive, maxInclusive); } @Override @SuppressWarnings("unchecked") protected TermsEnum getTermsEnum(final Terms terms, final AttributeSource atts) throws IOException { // very strange: java.lang.Number itsself is not Comparable, but all subclasses used here are return (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) ? TermsEnum.EMPTY : new NumericRangeTermsEnum(terms.iterator(null)); } /** Returns <code>true</code> if the lower endpoint is inclusive */ public boolean includesMin() { return minInclusive; } /** Returns <code>true</code> if the upper endpoint is inclusive */ public boolean includesMax() { return maxInclusive; } /** Returns the lower value of this range query */ public T getMin() { return min; } /** Returns the upper value of this range query */ public T getMax() { return max; } /** Returns the precision step. */ public int getPrecisionStep() { return precisionStep; } @Override public String toString(final String field) { final StringBuffer sb = new StringBuffer(); sb.append(minInclusive ? '[' : '{') .append((min == null) ? "*" : min.toString()) .append(" TO ") .append((max == null) ? "*" : max.toString()) .append(maxInclusive ? ']' : '}') .append(ToStringUtils.boost(this.getBoost())); return this.wrapToStringWithDatatype(sb).toString(); } @Override @SuppressWarnings({"rawtypes"}) public final boolean equals(final Object o) { if (o==this) return true; if (!super.equals(o)) return false; if (o instanceof NodeNumericRangeQuery) { final NodeNumericRangeQuery q=(NodeNumericRangeQuery)o; return ( (q.min == null ? min == null : q.min.equals(min)) && (q.max == null ? max == null : q.max.equals(max)) && minInclusive == q.minInclusive && maxInclusive == q.maxInclusive && precisionStep == q.precisionStep ); } return false; } @Override public final int hashCode() { int hash = super.hashCode(); hash += precisionStep^0x64365465; if (min != null) hash += min.hashCode()^0x14fa55fb; if (max != null) hash += max.hashCode()^0x733fa5fe; return hash + (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); } // members (package private, to be also fast accessible by NumericRangeTermEnum) final int precisionStep; final NumericType datatype; final T min, max; final boolean minInclusive,maxInclusive; // used to handle float/double infinity correctly static final long LONG_NEGATIVE_INFINITY = NumericUtils.doubleToSortableLong(Double.NEGATIVE_INFINITY); static final long LONG_POSITIVE_INFINITY = NumericUtils.doubleToSortableLong(Double.POSITIVE_INFINITY); static final int INT_NEGATIVE_INFINITY = NumericUtils.floatToSortableInt(Float.NEGATIVE_INFINITY); static final int INT_POSITIVE_INFINITY = NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); /** * Subclass of FilteredTermsEnum for enumerating all terms that match the * sub-ranges for trie range queries, using flex API. * <p> * WARNING: This term enumeration is not guaranteed to be always ordered by * {@link Term#compareTo}. * The ordering depends on how {@link NumericUtils#splitLongRange} and * {@link NumericUtils#splitIntRange} generates the sub-ranges. For * {@link MultiTermQuery} ordering is not relevant. */ private final class NumericRangeTermsEnum extends FilteredTermsEnum { private BytesRef currentLowerBound, currentUpperBound; private final LinkedList<BytesRef> rangeBounds = new LinkedList<BytesRef>(); private final Comparator<BytesRef> termComp; NumericRangeTermsEnum(final TermsEnum tenum) throws IOException { super(tenum); switch (datatype) { case LONG: case DOUBLE: { // lower long minBound; if (datatype == NumericType.LONG) { minBound = (min == null) ? Long.MIN_VALUE : min.longValue(); } else { assert datatype == NumericType.DOUBLE; minBound = (min == null) ? LONG_NEGATIVE_INFINITY : NumericUtils.doubleToSortableLong(min.doubleValue()); } if (!minInclusive && min != null) { if (minBound == Long.MAX_VALUE) break; minBound++; } // upper long maxBound; if (datatype == NumericType.LONG) { maxBound = (max == null) ? Long.MAX_VALUE : max.longValue(); } else { assert datatype == NumericType.DOUBLE; maxBound = (max == null) ? LONG_POSITIVE_INFINITY : NumericUtils.doubleToSortableLong(max.doubleValue()); } if (!maxInclusive && max != null) { if (maxBound == Long.MIN_VALUE) break; maxBound--; } NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() { @Override public final void addRange(final BytesRef minPrefixCoded, final BytesRef maxPrefixCoded) { final BytesRef min = new BytesRef(pstepDatatype); min.append(minPrefixCoded); final BytesRef max = new BytesRef(pstepDatatype); max.append(maxPrefixCoded); rangeBounds.add(min); rangeBounds.add(max); } }, precisionStep, minBound, maxBound); break; } case INT: case FLOAT: { // lower int minBound; if (datatype == NumericType.INT) { minBound = (min == null) ? Integer.MIN_VALUE : min.intValue(); } else { assert datatype == NumericType.FLOAT; minBound = (min == null) ? INT_NEGATIVE_INFINITY : NumericUtils.floatToSortableInt(min.floatValue()); } if (!minInclusive && min != null) { if (minBound == Integer.MAX_VALUE) break; minBound++; } // upper int maxBound; if (datatype == NumericType.INT) { maxBound = (max == null) ? Integer.MAX_VALUE : max.intValue(); } else { assert datatype == NumericType.FLOAT; maxBound = (max == null) ? INT_POSITIVE_INFINITY : NumericUtils.floatToSortableInt(max.floatValue()); } if (!maxInclusive && max != null) { if (maxBound == Integer.MIN_VALUE) break; maxBound--; } NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() { @Override public final void addRange(final BytesRef minPrefixCoded, final BytesRef maxPrefixCoded) { final BytesRef min = new BytesRef(pstepDatatype); min.append(minPrefixCoded); final BytesRef max = new BytesRef(pstepDatatype); max.append(maxPrefixCoded); rangeBounds.add(min); rangeBounds.add(max); } }, precisionStep, minBound, maxBound); break; } default: // should never happen throw new IllegalArgumentException("Invalid NumericType"); } termComp = this.getComparator(); } private void nextRange() { assert rangeBounds.size() % 2 == 0; currentLowerBound = rangeBounds.removeFirst(); assert currentUpperBound == null || termComp.compare(currentUpperBound, currentLowerBound) <= 0 : "The current upper bound must be <= the new lower bound"; currentUpperBound = rangeBounds.removeFirst(); } @Override protected final BytesRef nextSeekTerm(final BytesRef term) throws IOException { while (rangeBounds.size() >= 2) { this.nextRange(); // if the new upper bound is before the term parameter, the sub-range is never a hit if (term != null && termComp.compare(term, currentUpperBound) > 0) continue; // never seek backwards, so use current term if lower bound is smaller return (term != null && termComp.compare(term, currentLowerBound) > 0) ? term : currentLowerBound; } // no more sub-range enums available assert rangeBounds.isEmpty(); currentLowerBound = currentUpperBound = null; return null; } @Override protected final AcceptStatus accept(final BytesRef term) { while (currentUpperBound == null || termComp.compare(term, currentUpperBound) > 0) { if (rangeBounds.isEmpty()) return AcceptStatus.END; // peek next sub-range, only seek if the current term is smaller than next lower bound if (termComp.compare(term, rangeBounds.getFirst()) < 0) return AcceptStatus.NO_AND_SEEK; // step forward to next range without seeking, as next lower range bound is less or equal current term this.nextRange(); } return AcceptStatus.YES; } } }