package org.fastcatsearch.ir.dictionary;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.fastcatsearch.ir.io.CharVector;
import org.junit.Assert;
import org.junit.Test;
public class DictionaryTest {
@Test
public void testSynonymDictionary() throws IOException {
SynonymDictionary dictionary = new SynonymDictionary(true);
dictionary.addEntry("마우스", new String[] { "mouse, 로지텍" });
dictionary.addEntry(null, new String[] { "엘지모니터, monitor, 광시야각, 마우스" });
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dictionary.writeTo(baos);
byte[] data = baos.toByteArray();
ByteArrayInputStream bais = new ByteArrayInputStream(data);
SynonymDictionary dictionary2 = new SynonymDictionary();
dictionary2.readFrom(bais);
System.out.println("---- synonym map---");
Map<CharVector, CharVector[]> map = dictionary2.map();
for(Entry<CharVector, CharVector[]> entry : map.entrySet()){
System.out.println(entry.getKey() + ": " +join(entry.getValue()));
}
System.out.println("---- word set---");
for(CharVector cv : dictionary2.getWordSet()){
System.out.println(cv);
}
String word = "MONitor";
CharVector[] r = dictionary.map().get(new CharVector(word));
System.out.println(word + " >> "+ join(r));
}
private String join(CharVector[] list){
String result = "";
for(int i=0;i<list.length; i++){
result += list[i].toString();
if(i < list.length - 1){
result += ", ";
}
}
return result;
}
@Test
public void testMapDictionary() throws IOException {
boolean ignoreCase = true;
MapDictionary dictionary = new MapDictionary(ignoreCase);
dictionary.addEntry("마우스", new String[] { "mouse", "로지텍" });
dictionary.addEntry("모니터", new String[] { "엘지모니터", "monitor", "광시야각" });
ByteArrayOutputStream out = new ByteArrayOutputStream();
dictionary.writeTo(out);
out.close();
CharVector[] result = dictionary.getUnmodifiableMap().get(new CharVector("모니터"));
System.out.println(result[0]+","+result[1]);
byte[] buffer = out.toByteArray();
//다시 읽고.
ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
MapDictionary dictionary2 = new MapDictionary(bais, ignoreCase);
bais.close();
CharVector[] result2 = dictionary2.getUnmodifiableMap().get(new CharVector("모니터"));
System.out.println(result2[0]+","+result2[1]);
//다시 쓰고.
ByteArrayOutputStream out2 = new ByteArrayOutputStream();
dictionary2.writeTo(out2);
out2.close();
//다시 읽고.
ByteArrayInputStream bais2 = new ByteArrayInputStream(buffer);
MapDictionary dictionary3 = new MapDictionary(bais2, ignoreCase);
bais2.close();
CharVector[] result3 = dictionary3.getUnmodifiableMap().get(new CharVector("모니터"));
System.out.println(result3[0]+","+result3[1]);
byte[] buffer2 = out2.toByteArray();
Assert.assertEquals(buffer.length, buffer2.length);
for (int i = 0; i < buffer2.length; i++) {
System.out.println(buffer[i]+":"+buffer2[i]);
if(buffer[i] != buffer2[i]){
System.out.println(">>>>>>>>>>>>>>>>");
}
}
// Assert.assertArrayEquals(buffer, buffer2);
}
@Test
public void testCustomDictionary() throws IOException {
boolean ignoreCase = true;
CustomDictionary dictionary = new CustomDictionary(ignoreCase);
dictionary.addEntry("마우스", new Object[] { "mouse", "로지텍", 1 });
dictionary.addEntry("모니터", new Object[] { 0,"엘지모니터", "monitor", "광시야각", 2 });
ByteArrayOutputStream out = new ByteArrayOutputStream();
dictionary.writeTo(out);
out.close();
//Object[] result = dictionary.getUnmodifiableMap().get(new CharVector("모니터"));
Object[] result = dictionary.map().get(new CharVector("모니터"));
System.out.println(result.length+":"+result[0]);
System.out.println(result[0]+","+result[1]);
byte[] buffer = out.toByteArray();
//다시 읽고.
ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
CustomDictionary dictionary2 = new CustomDictionary(bais, ignoreCase);
bais.close();
Object[] result2 = dictionary2.getUnmodifiableMap().get(new CharVector("모니터"));
System.out.println(result2[0]+","+result2[1]);
//다시 쓰고.
ByteArrayOutputStream out2 = new ByteArrayOutputStream();
dictionary2.writeTo(out2);
out2.close();
//다시 읽고.
ByteArrayInputStream bais2 = new ByteArrayInputStream(buffer);
CustomDictionary dictionary3 = new CustomDictionary(bais2, ignoreCase);
bais2.close();
Object[] result3 = dictionary3.getUnmodifiableMap().get(new CharVector("모니터"));
System.out.println(result3[0]+","+result3[1]);
byte[] buffer2 = out2.toByteArray();
Assert.assertEquals(buffer.length, buffer2.length);
for (int i = 0; i < buffer2.length; i++) {
System.out.println(buffer[i]+":"+buffer2[i]);
if(buffer[i] != buffer2[i]){
System.out.println(">>>>>>>>>>>>>>>>");
}
}
// Assert.assertArrayEquals(buffer, buffer2);
}
@Test
public void testHashSet() throws IOException {
SetDictionary dictionary = new SetDictionary();
String[] terms = new String[] { "삼성", "LG", "애플" };
for(String term : terms) {
dictionary.addEntry(term, null);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
dictionary.writeTo(out);
out.close();
for(String term : terms) {
boolean contains = dictionary.getUnmodifiableSet().contains(new CharVector(term));
System.out.println("is set has term "+term+" ? "+contains);
}
byte[] buffer = out.toByteArray();
//다시 읽고.
ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
SetDictionary dictionary2 = new SetDictionary(bais, true);
bais.close();
for(String term : terms) {
boolean contains = dictionary2.getUnmodifiableSet().contains(new CharVector(term));
System.out.println("is set2 has term "+term+" ? "+contains);
}
//다시 쓰고.
ByteArrayOutputStream out2 = new ByteArrayOutputStream();
dictionary2.writeTo(out2);
out2.close();
//다시 읽고.
ByteArrayInputStream bais2 = new ByteArrayInputStream(buffer);
SetDictionary dictionary3 = new SetDictionary(bais2, true);
bais2.close();
for(String term : terms) {
boolean contains = dictionary3.getUnmodifiableSet().contains(new CharVector(term));
System.out.println("is set3 has term "+term+" ? "+contains);
}
byte[] buffer2 = out2.toByteArray();
Assert.assertEquals(buffer.length, buffer2.length);
for (int i = 0; i < buffer2.length; i++) {
System.out.println(buffer[i]+":"+buffer2[i]);
if(buffer[i] != buffer2[i]){
System.out.println(">>>>>>>>>>>>>>>>");
}
}
// Assert.assertArrayEquals(buffer, buffer2);
}
@Test
public void testTagProbDictionary() throws IOException {
SetDictionary dictionary = new SetDictionary();
String[] terms = new String[] { "삼성", "LG", "애플" };
for(String term : terms) {
// dictionary.addEntry(term);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
dictionary.writeTo(out);
out.close();
for(String term : terms) {
boolean contains = dictionary.getUnmodifiableSet().contains(new CharVector(term));
System.out.println("is set has term "+term+" ? "+contains);
}
byte[] buffer = out.toByteArray();
//다시 읽고.
ByteArrayInputStream bais = new ByteArrayInputStream(buffer);
SetDictionary dictionary2 = new SetDictionary(bais, true);
bais.close();
for(String term : terms) {
boolean contains = dictionary2.getUnmodifiableSet().contains(new CharVector(term));
System.out.println("is set2 has term "+term+" ? "+contains);
}
//다시 쓰고.
ByteArrayOutputStream out2 = new ByteArrayOutputStream();
dictionary2.writeTo(out2);
out2.close();
//다시 읽고.
ByteArrayInputStream bais2 = new ByteArrayInputStream(buffer);
SetDictionary dictionary3 = new SetDictionary(bais2, true);
bais2.close();
for(String term : terms) {
boolean contains = dictionary3.getUnmodifiableSet().contains(new CharVector(term));
System.out.println("is set3 has term "+term+" ? "+contains);
}
byte[] buffer2 = out2.toByteArray();
Assert.assertEquals(buffer.length, buffer2.length);
for (int i = 0; i < buffer2.length; i++) {
System.out.println(buffer[i]+":"+buffer2[i]);
if(buffer[i] != buffer2[i]){
System.out.println(">>>>>>>>>>>>>>>>");
}
}
// Assert.assertArrayEquals(buffer, buffer2);
}
@Test
public void testSpaceDictionary(){
SpaceDictionary spaceDictionary = new SpaceDictionary(true);
spaceDictionary.addEntry("nike, air", null);
spaceDictionary.addEntry("진, 청바지", null);
spaceDictionary.addEntry("맥북, 프로, 레티나", null);
for(Map.Entry<CharVector,CharVector[]> entry : spaceDictionary.map().entrySet()){
System.out.println(entry.getKey() + " : " + join(entry.getValue()));
}
CharVector[] result = spaceDictionary.map().get(new CharVector("nIKEAIR"));
Assert.assertEquals("nike", result[0].toString());
Assert.assertEquals("air", result[1].toString());
result = spaceDictionary.map().get(new CharVector("진청바지"));
Assert.assertEquals("진", result[0].toString());
Assert.assertEquals("청바지", result[1].toString());
result = spaceDictionary.map().get(new CharVector("맥북프로레티나"));
Assert.assertEquals("맥북", result[0].toString());
Assert.assertEquals("프로", result[1].toString());
Assert.assertEquals("레티나", result[2].toString());
}
}