/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.analysis.attributes;
import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.sindice.siren.analysis.NumericTokenizer;
import org.sindice.siren.search.node.NodeNumericRangeQuery;
/**
* <b>Expert:</b> This class provides an {@link Attribute} for the
* {@link NumericTokenizer} for indexing numeric values that can be used by {@link
* NodeNumericRangeQuery}.
* <p>
* This attribute provides a stream of tokens which iterates over
* the different precisions of a given numeric value.
* <p>
* The string representation of each precision is prefixed by:
* <ul>
* <li> the numeric type of the value;
* <li> the precision step;
* </ul>
* This prefix is in fact encoding the numeric type and precision step inside
* the dictionary. This prefix is necessary for two reasons:
* <ul>
* <li> it avoids overlapping value of different numeric type, and therefore
* avoid getting false-positive;
* <li> enables better clustering of the values of a particular numeric type
* in the dictionary.
* </ul>
*/
public interface NodeNumericTermAttribute extends Attribute {
/**
* Return the numeric type of the value
*/
NumericType getNumericType();
/**
* Returns the current shift value
* <p>
* Undefined before first call to
* {@link #incrementShift(CharTermAttribute, NumericType)}
*/
int getShift();
/**
* Returns the value size in bits (32 for {@code float}, {@code int}; 64 for
* {@code double}, {@code long})
*/
int getValueSize();
/**
* Set the precision step
*/
void setPrecisionStep(int precisionStep);
/**
* Returns the precision step
*/
int getPrecisionStep();
/**
* Initialise this attribute
*/
void init(NumericType numericType, long value, int valSize);
/**
* Reset the current shift value to 0
*/
void resetShift();
/**
* Increment the shift and generate the next token.
* <p>
* The original Lucene's {@link NumericTermAttribute} implements
* {@link TermToBytesRefAttribute}. There is a conflict problem with the
* {@link CharTermAttribute} used in higher-level SIREn's analyzers, which also
* implements {@link TermToBytesRefAttribute}.
* The problem is that the {@link AttributeSource} is not able to choose
* between the two when requested an attribute implementing
* {@link TermToBytesRefAttribute}, e.g., in TermsHashPerField.
* <p>
* The current solution is to fill the {@link BytesRef} attribute of the
* {@link CharTermAttribute} with the encoded numeric value.
*
* @return True if there are still tokens, false if we reach the end of the
* stream.
*/
boolean incrementShift(CharTermAttribute termAtt);
}