package org.apache.solr.analysis.author;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class creates different spellings and variations of the
* author names as they are indexed (it was initially called
* AuthorAutoSynonymFilter)
*/
public final class AuthorTransliterationFilter extends TokenFilter {
public static final Logger log = LoggerFactory.getLogger(AuthorTransliterationFilter.class);
private String tokenType;
public AuthorTransliterationFilter(TokenStream input, String tokenType) {
super(input);
this.termAtt = addAttribute(CharTermAttribute.class);
this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
this.transliterationStack = null;
this.typeAtt = addAttribute(TypeAttribute.class);
this.tokenType = tokenType;
this.N = 0;
}
private int N;
private List<String> transliterationStack;
private AttributeSource.State current;
private final CharTermAttribute termAtt;
private final PositionIncrementAttribute posIncrAtt;
private final TypeAttribute typeAtt;
/* (non-Javadoc)
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
*/
@Override
public boolean incrementToken() throws IOException {
if (this.N > 0) {
String syn = this.transliterationStack.get(--N);
this.restoreState(this.current);
this.termAtt.setEmpty();
this.termAtt.append(syn);
this.posIncrAtt.setPositionIncrement(0);
this.typeAtt.setType(AuthorUtils.AUTHOR_TRANSLITERATED);
return true;
}
if (!input.incrementToken()) return false;
if (tokenType == null && this.genVariants()) { // null means process all tokens
this.current = captureState();
}
else if (typeAtt.type().equals(tokenType) && this.genVariants()) {
this.current = this.captureState();
}
return true;
}
private boolean genVariants() {
//log.debug("generating name variants for: " + authorName);
ArrayList<String> synonyms = AuthorUtils.getAsciiTransliteratedVariants(termAtt.toString());
if (synonyms != null && synonyms.size() > 0) {
//log.debug("variants: " + synonyms);
transliterationStack = synonyms;
N = synonyms.size();
return true;
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
transliterationStack = null;
N = 0;
current = null;
}
}