package org.xbib.elasticsearch.index.analysis.baseform;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeSource;
import org.xbib.elasticsearch.common.fsa.Dictionary;
import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.util.LinkedList;
/**
*
*/
public class BaseformTokenFilter extends TokenFilter {
private final LinkedList<String> tokens;
private final Dictionary dictionary;
private final boolean respectKeywords;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
private AttributeSource.State current;
protected BaseformTokenFilter(TokenStream input, Dictionary dictionary, boolean respectKeywords) {
super(input);
this.tokens = new LinkedList<>();
this.dictionary = dictionary;
this.respectKeywords = respectKeywords;
}
@Override
public final boolean incrementToken() throws IOException {
if (!tokens.isEmpty()) {
if (current == null) {
throw new IllegalArgumentException("current is null");
}
String token = tokens.removeFirst();
restoreState(current);
termAtt.setEmpty().append(token);
posIncAtt.setPositionIncrement(0);
return true;
}
if (!input.incrementToken()) {
return false;
}
if (respectKeywords && keywordAtt.isKeyword()) {
return true;
}
baseform();
if (!tokens.isEmpty()) {
current = captureState();
}
return true;
}
private void baseform() throws CharacterCodingException {
CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
CharSequence s = dictionary.lookup(term);
if (s != null && s.length() > 0) {
tokens.add(s.toString());
}
}
@Override
public void reset() throws IOException {
super.reset();
tokens.clear();
current = null;
}
@Override
public boolean equals(Object object) {
return object instanceof BaseformTokenFilter &&
tokens.equals(((BaseformTokenFilter)object).tokens) &&
dictionary.equals(((BaseformTokenFilter)object).dictionary) &&
respectKeywords == ((BaseformTokenFilter)object).respectKeywords;
}
@Override
public int hashCode() {
return tokens.hashCode() ^ dictionary.hashCode() ^ Boolean.hashCode(respectKeywords);
}
}