package com.tistory.devyongsik.analyzer; import java.io.StringReader; import java.util.List; import org.apache.lucene.analysis.TokenStream; import org.junit.Before; import org.junit.Test; import com.google.common.collect.Lists; import com.tistory.devyongsik.analyzer.util.AnalyzerTestUtil; import com.tistory.devyongsik.analyzer.util.TestToken; /** * @author need4spd, need4spd@cplanet.co.kr, 2011. 10. 14. * */ public class KoreanMorphEngineTest extends AnalyzerTestUtil { private List<TestToken> nouns = null; private List<Engine> engines = null; @Before public void initDictionary() { nouns = Lists.newArrayList(); engines = Lists.newArrayList(); engines.add(new KoreanMorphEngine()); } @Test public void testCase1() throws Exception { StringReader reader = new StringReader("기본사전이변경되었습니다"); nouns.add(getToken("기본사전이변경", 0, 7)); nouns.add(getToken("기본", 0, 2)); nouns.add(getToken("전이", 3, 5)); nouns.add(getToken("변경", 5, 7)); nouns.add(getToken("기본사전이변경되었습니다", 0, 12)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase2() throws Exception { StringReader reader = new StringReader("worldcup경기장"); nouns.add(getToken("worldcup", 0, 8)); nouns.add(getToken("경기장", 8, 11)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } }