package com.tistory.devyongsik.analyzer;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.Lists;
import com.tistory.devyongsik.analyzer.dictionary.DictionaryFactory;
import com.tistory.devyongsik.analyzer.util.AnalyzerTestUtil;
import com.tistory.devyongsik.analyzer.util.TestToken;
public class AnalyzerTest extends AnalyzerTestUtil {
private List<TestToken> nouns = null;
private DictionaryFactory dictionaryFactory;
@Before
public void initDictionary() {
nouns = Lists.newArrayList();
dictionaryFactory = DictionaryFactory.getFactory();
}
@Test
public void testCase1() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("고속도로", null);
customNounDictionaryMap.put("고속", null);
customNounDictionaryMap.put("도로", null);
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
StringReader reader = new StringReader("고속도로");
nouns.add(getToken("고속도로", 0, 4));
nouns.add(getToken("고속도", 0, 3));
nouns.add(getToken("고속", 0, 2));
nouns.add(getToken("속도", 1, 3));
Analyzer analyzer = new KoreanAnalyzer(true);
TokenStream stream = analyzer.tokenStream("dummy", reader);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
analyzer.close();
verify(nouns, extractedTokens);
}
}