package org.fnlp.app.lucene;
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
public abstract class FilteringTokenFilter extends TokenFilter {
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private boolean enablePositionIncrements; // no init needed, as ctor enforces setting value!
public FilteringTokenFilter(boolean enablePositionIncrements, TokenStream input){
super(input);
this.enablePositionIncrements = enablePositionIncrements;
}
/** Override this method and return if the current input token should be returned by {@link #incrementToken}. */
protected abstract boolean accept() throws IOException;
@Override
public final boolean incrementToken() throws IOException {
if (enablePositionIncrements) {
int skippedPositions = 0;
while (input.incrementToken()) {
if (accept()) {
if (skippedPositions != 0) {
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
}
return true;
}
skippedPositions += posIncrAtt.getPositionIncrement();
}
} else {
while (input.incrementToken()) {
if (accept()) {
return true;
}
}
}
// reached EOS -- return false
return false;
}
/**
* @see #setEnablePositionIncrements(boolean)
*/
public boolean getEnablePositionIncrements() {
return enablePositionIncrements;
}
/**
* If true, this TokenFilter will preserve
* positions of the incoming tokens (ie, accumulate and
* set position increments of the removed tokens).
* Generally, true is best as it does not
* lose information (positions of the original tokens)
* during indexing.
*
*
When set, when a token is stopped
* (omitted), the position increment of the following
* token is incremented.
*
*
NOTE: be sure to also
* set org.apache.lucene.queryparser.classic.QueryParser#setEnablePositionIncrements if
* you use QueryParser to create queries.
*/
public void setEnablePositionIncrements(boolean enable) {
this.enablePositionIncrements = enable;
}
}