package org.apache.lucene.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
* <b>Expert:</b> This class provides a {@link TokenStream}
* for indexing numeric values that can be used by {@link
* NumericRangeQuery} or {@link NumericRangeFilter}.
*
* <p>Note that for simple usage, {@link NumericField} is
* recommended. {@link NumericField} disables norms and
* term freqs, as they are not usually needed during
* searching. If you need to change these settings, you
* should use this class.
*
* <p>See {@link NumericField} for capabilities of fields
* indexed numerically.</p>
*
* <p>Here's an example usage, for an <code>int</code> field:
*
* <pre>
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* document.add(field);
* </pre>
*
* <p>For optimal performance, re-use the TokenStream and Field instance
* for more than one document:
*
* <pre>
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
* Field field = new Field(name, stream);
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* Document document = new Document();
* document.add(field);
*
* for(all documents) {
* stream.setIntValue(value)
* writer.addDocument(document);
* }
* </pre>
*
* <p>This stream is not intended to be used in analyzers;
* it's more for iterating the different precisions during
* indexing a specific numeric value.</p>
* <p><b>NOTE</b>: as token streams are only consumed once
* the document is added to the index, if you index more
* than one numeric field, use a separate <code>NumericTokenStream</code>
* instance for each.</p>
*
* <p>See {@link NumericRangeQuery} for more details on the
* <a
* href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* parameter as well as how numeric fields work under the hood.</p>
*
* @lucene.experimental
*
* @since 2.9
*/
public final class NumericTokenStream extends TokenStream {
/** The full precision token gets this token type assigned. */
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
/** The lower precision tokens gets this token type assigned. */
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
/** <b>Expert:</b> Use this attribute to get the details of the currently generated token
* @lucene.experimental
* @since 4.0
*/
public interface NumericTermAttribute extends Attribute {
/** Returns current shift value, undefined before first token */
int getShift();
/** Returns {@link NumericTokenStream}'s raw value as {@code long} */
long getRawValue();
/** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */
int getValueSize();
}
private static final class NumericAttributeFactory extends AttributeFactory {
private final AttributeFactory delegate;
private NumericTokenStream ts = null;
NumericAttributeFactory(AttributeFactory delegate) {
this.delegate = delegate;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
if (attClass == NumericTermAttribute.class)
return new NumericTermAttributeImpl(ts);
if (CharTermAttribute.class.isAssignableFrom(attClass))
throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute.");
return delegate.createAttributeInstance(attClass);
}
}
private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
private final NumericTokenStream ts;
public NumericTermAttributeImpl(NumericTokenStream ts) {
this.ts = ts;
}
public int toBytesRef(BytesRef bytes) {
try {
assert ts.valSize == 64 || ts.valSize == 32;
return (ts.valSize == 64) ?
NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) :
NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes);
} catch (IllegalArgumentException iae) {
// return empty token before first
bytes.length = 0;
return 0;
}
}
public int getShift() { return ts.shift; }
public long getRawValue() { return ts.value; }
public int getValueSize() { return ts.valSize; }
@Override
public void clear() {
// this attribute has no contents to clear
}
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override
public void copyTo(AttributeImpl target) {
// this attribute has no contents to copy
}
@Override
public Object clone() {
// cannot throw CloneNotSupportedException (checked)
throw new UnsupportedOperationException();
}
}
/**
* Creates a token stream for numeric values using the default <code>precisionStep</code>
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
* before using set a value using the various set<em>???</em>Value() methods.
*/
public NumericTokenStream() {
this(NumericUtils.PRECISION_STEP_DEFAULT);
}
/**
* Creates a token stream for numeric values with the specified
* <code>precisionStep</code>. The stream is not yet initialized,
* before using set a value using the various set<em>???</em>Value() methods.
*/
public NumericTokenStream(final int precisionStep) {
super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
// we must do this after the super call :(
((NumericAttributeFactory) getAttributeFactory()).ts = this;
addAttribute(NumericTermAttribute.class);
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
shift = -precisionStep;
}
/**
* Expert: Creates a token stream for numeric values with the specified
* <code>precisionStep</code> using the given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
* The stream is not yet initialized,
* before using set a value using the various set<em>???</em>Value() methods.
*/
public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
super(new NumericAttributeFactory(factory));
// we must do this after the super call :(
((NumericAttributeFactory) getAttributeFactory()).ts = this;
addAttribute(NumericTermAttribute.class);
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
shift = -precisionStep;
}
/**
* Initializes the token stream with the supplied <code>long</code> value.
* @param value the value, for which this TokenStream should enumerate tokens.
* @return this instance, because of this you can use it the following way:
* <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
*/
public NumericTokenStream setLongValue(final long value) {
this.value = value;
valSize = 64;
shift = -precisionStep;
return this;
}
/**
* Initializes the token stream with the supplied <code>int</code> value.
* @param value the value, for which this TokenStream should enumerate tokens.
* @return this instance, because of this you can use it the following way:
* <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
*/
public NumericTokenStream setIntValue(final int value) {
this.value = value;
valSize = 32;
shift = -precisionStep;
return this;
}
/**
* Initializes the token stream with the supplied <code>double</code> value.
* @param value the value, for which this TokenStream should enumerate tokens.
* @return this instance, because of this you can use it the following way:
* <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
*/
public NumericTokenStream setDoubleValue(final double value) {
this.value = NumericUtils.doubleToSortableLong(value);
valSize = 64;
shift = -precisionStep;
return this;
}
/**
* Initializes the token stream with the supplied <code>float</code> value.
* @param value the value, for which this TokenStream should enumerate tokens.
* @return this instance, because of this you can use it the following way:
* <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
*/
public NumericTokenStream setFloatValue(final float value) {
this.value = NumericUtils.floatToSortableInt(value);
valSize = 32;
shift = -precisionStep;
return this;
}
@Override
public void reset() {
if (valSize == 0)
throw new IllegalStateException("call set???Value() before usage");
shift = -precisionStep;
}
@Override
public boolean incrementToken() {
if (valSize == 0)
throw new IllegalStateException("call set???Value() before usage");
shift += precisionStep;
if (shift >= valSize) {
// reset so the attribute still works after exhausted stream
shift -= precisionStep;
return false;
}
clearAttributes();
// the TermToBytesRefAttribute is directly accessing shift & value.
typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
return true;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize);
sb.append(",precisionStep=").append(precisionStep).append(')');
return sb.toString();
}
// members
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
int shift, valSize = 0; // valSize==0 means not initialized
private final int precisionStep;
long value = 0L;
}