package com.tistory.devyongsik.analyzer; import java.io.StringReader; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.TokenStream; import org.junit.Before; import org.junit.Test; import com.google.common.collect.Lists; import com.tistory.devyongsik.analyzer.dictionary.DictionaryFactory; import com.tistory.devyongsik.analyzer.util.AnalyzerTestUtil; import com.tistory.devyongsik.analyzer.util.TestToken; public class KoreanLongestNounEngineTest extends AnalyzerTestUtil { private List<TestToken> nouns = null; private DictionaryFactory dictionaryFactory = null; private List<Engine> engines = null; @Before public void initDictionary() { nouns = Lists.newArrayList(); engines = Lists.newArrayList(); dictionaryFactory = DictionaryFactory.getFactory(); } @Test public void testCase1() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("서울지방", null); customNounDictionaryMap.put("경찰청", null); customNounDictionaryMap.put("서울지방경찰청", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("서울지방경찰청"); nouns.add(getToken("서울지방경찰청", 0, 7)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase2() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("서울지방", null); customNounDictionaryMap.put("경찰청", null); customNounDictionaryMap.put("서울지방경찰청", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("서울지방경찰청을"); nouns.add(getToken("서울지방경찰청", 0, 7)); nouns.add(getToken("서울지방경찰청을", 0, 8)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase3() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("서울지방", null); customNounDictionaryMap.put("경찰청", null); customNounDictionaryMap.put("서울지방경찰청", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("서울지방경찰청읔"); nouns.add(getToken("서울지방경찰청", 0, 7)); nouns.add(getToken("서울지방경찰청읔", 0, 8)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase4() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("서울지방", null); customNounDictionaryMap.put("경찰청", null); customNounDictionaryMap.put("서울지방경찰청", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("읔서울지방경찰청"); nouns.add(getToken("서울지방경찰청", 1, 8)); nouns.add(getToken("읔서울지방경찰청", 0, 8)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase5() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("삼성전자", null); customNounDictionaryMap.put("연수원", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("삼성전자연수원"); nouns.add(getToken("연수원", 4, 7)); nouns.add(getToken("삼성전자", 0, 4)); nouns.add(getToken("삼성전자연수원", 0, 7)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase6() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("검색", null); customNounDictionaryMap.put("엔진", null); customNounDictionaryMap.put("검색엔진", null); customNounDictionaryMap.put("개발", null); customNounDictionaryMap.put("개발자", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("검색엔진개발자"); nouns.add(getToken("개발자", 4, 7)); nouns.add(getToken("검색엔진", 0, 4)); nouns.add(getToken("검색엔진개발자", 0, 7)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } @Test public void testCase8() throws Exception { Map<String, String> customNounDictionaryMap = new HashMap<String, String>(); customNounDictionaryMap.put("출장소", null); customNounDictionaryMap.put("상품", null); customNounDictionaryMap.put("판매", null); customNounDictionaryMap.put("상품판매출장소", null); dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap); createEngines(); StringReader reader = new StringReader("상품판매읔출장소"); nouns.add(getToken("출장소", 5, 8)); nouns.add(getToken("상품", 0, 2)); nouns.add(getToken("판매", 2, 4)); nouns.add(getToken("상품판매읔출장소", 0, 8)); TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines); stream.reset(); List<TestToken> extractedTokens = collectExtractedNouns(stream); stream.close(); verify(nouns, extractedTokens); } private void createEngines() { engines.add(new KoreanLongestNounEngine()); } }