/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.analysis;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.sindice.siren.analysis.NumericAnalyzer.NumericParser;
import org.sindice.siren.analysis.attributes.NodeNumericTermAttribute;
/**
* This class provides a TokenStream for indexing numeric values that is used in
* {@link NumericAnalyzer}.
*
* <p>
*
* This tokenizer expects to receive a string representation of a numeric value
* as input. It parses the input using {@link NumericParser#parseAndConvert(Reader)},
* and uses a {@link NodeNumericTermAttribute} to generate the numeric token.
*/
public class NumericTokenizer extends Tokenizer {
private final NodeNumericTermAttribute numericAtt = this.addAttribute(NodeNumericTermAttribute.class);
private final CharTermAttribute termAtt = this.addAttribute(CharTermAttribute.class);
private final TypeAttribute typeAtt = this.addAttribute(TypeAttribute.class);
private final PositionIncrementAttribute posIncrAtt = this.addAttribute(PositionIncrementAttribute.class);
private final NumericParser<?> parser;
private boolean isInitialised = false;
/**
* Creates a token stream for numeric values with the specified
* <code>precisionStep</code>.
*/
public NumericTokenizer(final Reader input,
final NumericParser<? extends Number> parser,
final int precisionStep) {
this(input, parser, precisionStep, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
}
/**
* Expert: Creates a token stream for numeric values with the specified
* <code>precisionStep</code> using the given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
*/
public NumericTokenizer(final Reader input,
final NumericParser<? extends Number> parser,
final int precisionStep,
final AttributeFactory factory) {
super(factory, input);
this.parser = parser;
numericAtt.setPrecisionStep(precisionStep);
}
@Override
public void reset() throws IOException {
isInitialised = false;
}
@Override
public final boolean incrementToken() throws IOException {
// initialise the numeric attribute
if (!isInitialised) {
final long value = parser.parseAndConvert(this.input);
numericAtt.init(parser.getNumericType(), value, parser.getValueSize());
isInitialised = true;
}
// this will only clear all other attributes in this TokenStream
this.clearAttributes();
// increment the shift and generate next token
final boolean hasNext = numericAtt.incrementShift(termAtt);
// set other attributes after the call to incrementShift since getShift
// is undefined before first call
typeAtt.setType((numericAtt.getShift() == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC);
posIncrAtt.setPositionIncrement((numericAtt.getShift() == 0) ? 1 : 0);
return hasNext;
}
}