package com.tistory.devyongsik.analyzer;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.tistory.devyongsik.analyzer.dictionary.DictionaryFactory;
import com.tistory.devyongsik.analyzer.util.AnalyzerTestUtil;
import com.tistory.devyongsik.analyzer.util.TestToken;
public class KoreanBaseNounEngineTest extends AnalyzerTestUtil {
private List<TestToken> nouns = null;
private List<Engine> engines = null;
private DictionaryFactory dictionaryFactory;
@Before
public void initDictionary() {
nouns = Lists.newArrayList();
dictionaryFactory = DictionaryFactory.getFactory();
engines = Lists.newArrayList();
}
@Test
public void testCase1() throws Exception {
Map<String, String> customNounDictionaryMap = Maps.newHashMap();
customNounDictionaryMap.put("사랑", null);
customNounDictionaryMap.put("회사", null);
customNounDictionaryMap.put("동료", null);
customNounDictionaryMap.put("동산", null);
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("사랑하고회사동료동산");
nouns.add(getToken("동산", 8, 10));
nouns.add(getToken("동료", 6, 8));
nouns.add(getToken("회사", 4, 6));
nouns.add(getToken("사랑", 0, 2));
nouns.add(getToken("사랑하고회사동료동산", 0, 10));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase2() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("서울", null);
customNounDictionaryMap.put("지방", null);
customNounDictionaryMap.put("경찰", null);
customNounDictionaryMap.put("경찰청", null);
customNounDictionaryMap.put("경", null);
customNounDictionaryMap.put("방", null);
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("서울지방경찰청");
nouns.add(getToken("경찰청", 4, 7));
nouns.add(getToken("경찰", 4, 6));
nouns.add(getToken("경", 4, 5));
nouns.add(getToken("지방", 2, 4));
nouns.add(getToken("서울", 0, 2));
nouns.add(getToken("서울지방경찰청", 0, 7));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase3() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("삼성전자", null);
customNounDictionaryMap.put("연수원", null);
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("삼성전자연수원");
nouns.add(getToken("연수원", 4, 7));
nouns.add(getToken("삼성전자", 0, 4));
nouns.add(getToken("삼성전자연수원", 0, 7));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase4() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("검", null);
customNounDictionaryMap.put("검색", null);
customNounDictionaryMap.put("엔진", null);
customNounDictionaryMap.put("개발자", null);
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("검색엔진개발자");
nouns.add(getToken("개발자", 4, 7));
nouns.add(getToken("엔진", 2, 4));
nouns.add(getToken("검색", 0, 2));
nouns.add(getToken("검", 0, 1));
nouns.add(getToken("검색엔진개발자", 0, 7));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase5() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("색인", null);
customNounDictionaryMap.put("방법", null);
customNounDictionaryMap.put("실시", null);;
customNounDictionaryMap.put("사용", null);;
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("여러가지 방법을 사용해서 색인을 실시합니다.");
nouns.add(getToken("실시", 18, 20));
nouns.add(getToken("실시합니다", 18, 23));
nouns.add(getToken("색인", 14, 16));
nouns.add(getToken("색인을", 14, 17));
nouns.add(getToken("사용", 9, 11));
nouns.add(getToken("사용해서", 9, 13));
nouns.add(getToken("방법", 5, 7));
nouns.add(getToken("방법을", 5, 8));
nouns.add(getToken("여러가지", 0, 4));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase6() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("상품", null);
customNounDictionaryMap.put("판매", null);
customNounDictionaryMap.put("출장소", null);;
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("상품판매읔출장소");
nouns.add(getToken("출장소", 5, 8));
nouns.add(getToken("판매", 2, 4));
nouns.add(getToken("상품", 0, 2));
nouns.add(getToken("상품판매읔출장소", 0, 8));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase7() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("검색", null);
customNounDictionaryMap.put("엔진", null);
customNounDictionaryMap.put("개발자", null);;
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("검색엔진개발자읔");
nouns.add(getToken("개발자", 4, 7));
nouns.add(getToken("엔진", 2, 4));
nouns.add(getToken("검색", 0, 2));
nouns.add(getToken("검색엔진개발자읔", 0, 8));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
@Test
public void testCase8() throws Exception {
Map<String, String> customNounDictionaryMap = new HashMap<String, String>();
customNounDictionaryMap.put("랑콤", null);
customNounDictionaryMap.put("엔진", null);
dictionaryFactory.setCustomNounDictionaryMap(customNounDictionaryMap);
createEngines();
StringReader reader = new StringReader("랑콤");
nouns.add(getToken("랑콤", 0, 2));
TokenStream stream = new KoreanNounFilter(new KoreanCharacterTokenizer(reader), engines);
stream.reset();
List<TestToken> extractedTokens = collectExtractedNouns(stream);
stream.close();
verify(nouns, extractedTokens);
}
private void createEngines() {
engines.add(new KoreanBaseNounEngine());
}
}