package org.apache.lucene.analysis; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** * <b>Expert:</b> This class provides a {@link TokenStream} * for indexing numeric values that can be used by {@link * NumericRangeQuery} or {@link NumericRangeFilter}. * * <p>Note that for simple usage, {@link NumericField} is * recommended. {@link NumericField} disables norms and * term freqs, as they are not usually needed during * searching. If you need to change these settings, you * should use this class. * * <p>See {@link NumericField} for capabilities of fields * indexed numerically.</p> * * <p>Here's an example usage, for an <code>int</code> field: * * <pre> * Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)); * field.setOmitNorms(true); * field.setOmitTermFreqAndPositions(true); * document.add(field); * </pre> * * <p>For optimal performance, re-use the TokenStream and Field instance * for more than one document: * * <pre> * NumericTokenStream stream = new NumericTokenStream(precisionStep); * Field field = new Field(name, stream); * field.setOmitNorms(true); * field.setOmitTermFreqAndPositions(true); * Document document = new Document(); * document.add(field); * * for(all documents) { * stream.setIntValue(value) * writer.addDocument(document); * } * </pre> * * <p>This stream is not intended to be used in analyzers; * it's more for iterating the different precisions during * indexing a specific numeric value.</p> * <p><b>NOTE</b>: as token streams are only consumed once * the document is added to the index, if you index more * than one numeric field, use a separate <code>NumericTokenStream</code> * instance for each.</p> * * <p>See {@link NumericRangeQuery} for more details on the * <a * href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a> * parameter as well as how numeric fields work under the hood.</p> * * @lucene.experimental * * @since 2.9 */ public final class NumericTokenStream extends TokenStream { /** The full precision token gets this token type assigned. */ public static final String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric"; /** The lower precision tokens gets this token type assigned. */ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; /** <b>Expert:</b> Use this attribute to get the details of the currently generated token * @lucene.experimental * @since 4.0 */ public interface NumericTermAttribute extends Attribute { /** Returns current shift value, undefined before first token */ int getShift(); /** Returns {@link NumericTokenStream}'s raw value as {@code long} */ long getRawValue(); /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */ int getValueSize(); } private static final class NumericAttributeFactory extends AttributeFactory { private final AttributeFactory delegate; private NumericTokenStream ts = null; NumericAttributeFactory(AttributeFactory delegate) { this.delegate = delegate; } @Override public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) { if (attClass == NumericTermAttribute.class) return new NumericTermAttributeImpl(ts); if (CharTermAttribute.class.isAssignableFrom(attClass)) throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute."); return delegate.createAttributeInstance(attClass); } } private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { private final NumericTokenStream ts; public NumericTermAttributeImpl(NumericTokenStream ts) { this.ts = ts; } public int toBytesRef(BytesRef bytes) { try { assert ts.valSize == 64 || ts.valSize == 32; return (ts.valSize == 64) ? NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) : NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes); } catch (IllegalArgumentException iae) { // return empty token before first bytes.length = 0; return 0; } } public int getShift() { return ts.shift; } public long getRawValue() { return ts.value; } public int getValueSize() { return ts.valSize; } @Override public void clear() { // this attribute has no contents to clear } @Override public boolean equals(Object other) { return other == this; } @Override public int hashCode() { return System.identityHashCode(this); } @Override public void copyTo(AttributeImpl target) { // this attribute has no contents to copy } @Override public Object clone() { // cannot throw CloneNotSupportedException (checked) throw new UnsupportedOperationException(); } } /** * Creates a token stream for numeric values using the default <code>precisionStep</code> * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. */ public NumericTokenStream() { this(NumericUtils.PRECISION_STEP_DEFAULT); } /** * Creates a token stream for numeric values with the specified * <code>precisionStep</code>. The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. */ public NumericTokenStream(final int precisionStep) { super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); // we must do this after the super call :( ((NumericAttributeFactory) getAttributeFactory()).ts = this; addAttribute(NumericTermAttribute.class); this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); shift = -precisionStep; } /** * Expert: Creates a token stream for numeric values with the specified * <code>precisionStep</code> using the given * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. */ public NumericTokenStream(AttributeFactory factory, final int precisionStep) { super(new NumericAttributeFactory(factory)); // we must do this after the super call :( ((NumericAttributeFactory) getAttributeFactory()).ts = this; addAttribute(NumericTermAttribute.class); this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); shift = -precisionStep; } /** * Initializes the token stream with the supplied <code>long</code> value. * @param value the value, for which this TokenStream should enumerate tokens. * @return this instance, because of this you can use it the following way: * <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code> */ public NumericTokenStream setLongValue(final long value) { this.value = value; valSize = 64; shift = -precisionStep; return this; } /** * Initializes the token stream with the supplied <code>int</code> value. * @param value the value, for which this TokenStream should enumerate tokens. * @return this instance, because of this you can use it the following way: * <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code> */ public NumericTokenStream setIntValue(final int value) { this.value = value; valSize = 32; shift = -precisionStep; return this; } /** * Initializes the token stream with the supplied <code>double</code> value. * @param value the value, for which this TokenStream should enumerate tokens. * @return this instance, because of this you can use it the following way: * <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code> */ public NumericTokenStream setDoubleValue(final double value) { this.value = NumericUtils.doubleToSortableLong(value); valSize = 64; shift = -precisionStep; return this; } /** * Initializes the token stream with the supplied <code>float</code> value. * @param value the value, for which this TokenStream should enumerate tokens. * @return this instance, because of this you can use it the following way: * <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code> */ public NumericTokenStream setFloatValue(final float value) { this.value = NumericUtils.floatToSortableInt(value); valSize = 32; shift = -precisionStep; return this; } @Override public void reset() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); shift = -precisionStep; } @Override public boolean incrementToken() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); shift += precisionStep; if (shift >= valSize) { // reset so the attribute still works after exhausted stream shift -= precisionStep; return false; } clearAttributes(); // the TermToBytesRefAttribute is directly accessing shift & value. typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC); posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0); return true; } @Override public String toString() { final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize); sb.append(",precisionStep=").append(precisionStep).append(')'); return sb.toString(); } // members private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); int shift, valSize = 0; // valSize==0 means not initialized private final int precisionStep; long value = 0L; }