package com.tistory.devyongsik.analyzer; import java.io.IOException; import java.util.Map; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.tistory.devyongsik.analyzer.dictionary.DictionaryFactory; public class KoreanStopFilter extends TokenFilter { private boolean enablePositionIncrements = false; private CharTermAttribute charTermAtt; private PositionIncrementAttribute posIncrAtt; private Logger logger = LoggerFactory.getLogger(KoreanStopFilter.class); private static Map<String, String> stopWordsMap = null; protected KoreanStopFilter(TokenStream input) { super(input); if(logger.isInfoEnabled()) { logger.info("init KoreanStopFilter"); } charTermAtt = getAttribute(CharTermAttribute.class); posIncrAtt = getAttribute(PositionIncrementAttribute.class); DictionaryFactory dictionaryFactory = DictionaryFactory.getFactory(); stopWordsMap = dictionaryFactory.getStopWordDictionaryMap(); } public void setEnablePositionIncrements(boolean enable) { this.enablePositionIncrements = enable; } public boolean getEnablePositionIncrements() { return enablePositionIncrements; } @Override public final boolean incrementToken() throws IOException { if(logger.isDebugEnabled()) logger.debug("incrementToken KoreanStopFilter"); // return the first non-stop word found int skippedPositions = 0; while(input.incrementToken()) { if(logger.isDebugEnabled()) logger.debug("원래 리턴 될 TermAtt : " + charTermAtt.toString() + " , stopWordDic.isExist : " + stopWordsMap.containsKey(charTermAtt.toString())); if(!stopWordsMap.containsKey(charTermAtt.toString())) { if(enablePositionIncrements) { posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); } return true; } skippedPositions += posIncrAtt.getPositionIncrement(); } return false; } }