package org.fastcatsearch.ir.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Reader;
/**
* 1-gram, 2-gram과 3-gram으로 뽑아낸다.
* */
public class FourGramWordAnalyzer extends Analyzer {
private static final Logger logger = LoggerFactory.getLogger(FourGramWordAnalyzer.class);
public FourGramWordAnalyzer() {
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 4, 4, true);
TokenFilter filter = new StandardFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
}