package org.apache.solr.analysis.author; import java.io.IOException; import java.util.ArrayDeque; import java.util.Deque; import java.util.HashSet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /* * rchyla: I have decided to make all the author search use wildcards instead * of regexes (WHENEVER it is possible), therefore this filter will * translate the regex into wildcards (where possible). * */ public final class AuthorQueryVariationsFilter extends TokenFilter { public static final Logger log = LoggerFactory.getLogger(AuthorQueryVariationsFilter.class); public AuthorQueryVariationsFilter(TokenStream input) { super(input); this.termAtt = addAttribute(CharTermAttribute.class); this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); this.variationStack = new ArrayDeque<String>(); this.typeAtt = addAttribute(TypeAttribute.class); } private Deque<String> variationStack; private AttributeSource.State current; private final CharTermAttribute termAtt; private final PositionIncrementAttribute posIncrAtt; private final TypeAttribute typeAtt; @Override public boolean incrementToken() throws IOException { if (this.variationStack.size() > 0) { String syn = this.variationStack.pop(); this.restoreState(this.current); this.termAtt.setEmpty(); this.termAtt.append(syn); this.posIncrAtt.setPositionIncrement(0); this.typeAtt.setType(AuthorUtils.AUTHOR_QUERY_VARIANT); return true; } if (!input.incrementToken()) return false; if (this.genVariations()) { this.current = this.captureState(); } return true; } private boolean genVariations() { String authorName = termAtt.toString(); //log.debug("generating variations for " + authorName); HashSet<String> variations = AuthorQueryVariations.getQueryVariationsInclRegex(authorName); if (variations.size() > 0) { //log.debug("variations: " + variations); for (String s : variations) { if (s.endsWith(".*") && !s.substring(0,s.length()-2).contains("\\b")) { s = s.replace(".*", "*"); } variationStack.add(s); } return true; } return false; } @Override public void reset() throws IOException { super.reset(); variationStack.clear(); current = null; } }