package experiments.collective.entdoccentric.query; import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; public class PositionalPorterStopAnalyzer extends Analyzer { private Version matchVersion; /** * An unmodifiable set containing some common English words that are usually * not useful for searching. */ public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** * Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). * * @param matchVersion * Lucene version to match See * {@link <a href="#version">above</a>} */ public PositionalPorterStopAnalyzer(Version matchVersion) { super(); this.matchVersion = matchVersion; } @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final CalbCTokenizer src = new CalbCTokenizer(matchVersion, reader); TokenStream stream = new PorterStemFilter(new StopFilter(matchVersion, src, STOP_WORDS_SET)); return new TokenStreamComponents(src, stream) { @Override protected void setReader(final Reader reader) throws IOException { super.setReader(reader); } }; } }