package org.ansj.lucene4;
import java.io.Reader;
import java.util.Set;
import org.ansj.lucene.util.AnsjTokenizer;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
public class AnsjAnalysis extends Analyzer {
boolean pstemming;
public Set<String> filter;
/**
* @param filter
* 停用词
* @param pstemming
* 是否分析词干
*/
public AnsjAnalysis(Set<String> filter, boolean pstemming) {
this.filter = filter;
this.pstemming = pstemming;
}
/**
* @param pstemming
* 是否分析词干.进行单复数,时态的转换
*/
public AnsjAnalysis(boolean pstemming) {
this.pstemming = pstemming;
}
public AnsjAnalysis() {
super();
}
@Override
protected TokenStreamComponents createComponents(String fieldName, final Reader reader) {
Tokenizer tokenizer = new AnsjTokenizer(new ToAnalysis(reader), reader, filter, pstemming);
return new TokenStreamComponents(tokenizer);
}
}