package com.tistory.devyongsik.analyzer;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.tistory.devyongsik.analyzer.dictionary.DictionaryFactory;
import com.tistory.devyongsik.analyzer.util.AnalyzerTestUtil;
import com.tistory.devyongsik.analyzer.util.TestToken;
public class KoreanCompoundNounEngineTest extends AnalyzerTestUtil {
private List<TestToken> compondNouns = Lists.newArrayList();
private StringReader reader = new StringReader("월드컵조직위원회분과위");
private List<Engine> engines = new ArrayList<Engine>();
private DictionaryFactory dictionaryFactory;
@Before
public void initDictionary() {
compondNouns.add(getToken("분과위", 8, 11));
compondNouns.add(getToken("위원회", 5, 8));
compondNouns.add(getToken("조직", 3, 5));
compondNouns.add(getToken("월드컵", 0, 3));
compondNouns.add(getToken("월드컵조직위원회분과위", 0, 11));
dictionaryFactory = DictionaryFactory.getFactory();
}
@Test
public void testCompoundNounExtract() throws Exception {
Map<String, List<String>> compoundNounDictionaryMap = Maps.newHashMap();
List<String> compoundList = Lists.newArrayList();
compoundList.add("분과위");
compoundList.add("위원회");
compoundList.add("조직");
compoundList.add("월드컵");
compoundNounDictionaryMap.put("월드컵조직위원회분과위", compoundList);
dictionaryFactory.setCompoundDictionaryMap(compoundNounDictionaryMap);
createEngines();
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(compondNouns, extractedTokens);
}
private void createEngines() {
engines.add(new KoreanCompoundNounEngine());
}
}