package weiweiwang.github.search.analysis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenFilter; import org.apache.lucene.util.Version; import java.io.Reader; /** * @author wangweiwei * Date: 8/5/12 * Time: 4:46 PM */ public class NGramAnalyzer extends Analyzer { protected final Version matchVersion; private int minGram; private int maxGram; public NGramAnalyzer(Version version,int minGram,int maxGram) { matchVersion = version; this.minGram = minGram; this.maxGram= maxGram; } @Override protected TokenStreamComponents createComponents(String s, Reader reader) { Tokenizer source = new WhitespaceTokenizer(matchVersion,reader); TokenStream filter = new NGramTokenFilter(new LowerCaseFilter(matchVersion,source),minGram,maxGram); return new TokenStreamComponents(source, filter); } }