package org.fastcatsearch.ir.dictionary;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.fastcatsearch.ir.io.CharVector;
import org.fastcatsearch.ir.io.DataInput;
import org.fastcatsearch.ir.io.DataOutput;
import org.fastcatsearch.ir.util.CharVectorHashSet;
import org.fastcatsearch.plugin.analysis.AnalysisPluginSetting.ColumnSetting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CustomDictionary extends SourceDictionary {
private static Logger logger = LoggerFactory.getLogger(MapDictionary.class);
private Set<CharVector> wordSet;
private Map<CharVector, Object[]> map;
public CustomDictionary() {
this(false);
}
public CustomDictionary(boolean ignoreCase) {
super(ignoreCase);
map = new HashMap<CharVector, Object[]>();
wordSet = new CharVectorHashSet(ignoreCase);
}
public CustomDictionary(File file, boolean ignoreCase) {
super(ignoreCase);
wordSet = new CharVectorHashSet(ignoreCase);
if (!file.exists()) {
map = new HashMap<CharVector, Object[]>();
logger.error("사전파일이 존재하지 않습니다. file={}", file.getAbsolutePath());
return;
}
InputStream is = null;
try {
is = new FileInputStream(file);
readFrom(is);
is.close();
} catch (IOException e) {
logger.error("", e);
}
}
public CustomDictionary(InputStream is, boolean ignoreCase) {
super(ignoreCase);
try {
readFrom(is);
} catch (IOException e) {
logger.error("",e);
}
}
public Set<CharVector> getWordSet() {
return wordSet;
}
public Map<CharVector, Object[]> getUnmodifiableMap() {
return Collections.unmodifiableMap(map);
}
public Map<CharVector, Object[]> map() {
return map;
}
public void setMap(Map<CharVector, Object[]> map) {
this.map = map;
}
@Override
public void writeTo(OutputStream out) throws IOException {
DataOutput output = (DataOutput) new OutputStreamDataOutput(out);
Iterator<CharVector> keySet = map.keySet().iterator();
// write size of map
output.writeVInt(map.size());
// write key and value map
for (; keySet.hasNext();) {
// write key
CharVector key = keySet.next();
output.writeUString(key.array(), key.start(), key.length());
// write values
Object[] values = map.get(key);
output.writeVInt(values.length);
for (Object value : values) {
if(value instanceof CharVector) {
output.writeByte(1);
CharVector v = (CharVector) value;
output.writeUString(v.array(), v.start(), v.length());
} else if(value instanceof CharVector[]) {
output.writeByte(2);
CharVector[] list = (CharVector[]) value;
output.writeVInt(list.length);
for (CharVector v : list) {
output.writeUString(v.array(), v.start(), v.length());
}
}
}
}
output.writeVInt(wordSet.size());
Iterator<CharVector> iterator = wordSet.iterator();
while (iterator.hasNext()) {
CharVector value = iterator.next();
output.writeUString(value.array(), value.start(), value.length());
}
}
@Override
public void readFrom(InputStream in) throws IOException {
DataInput input = new InputStreamDataInput(in);
map = new HashMap<CharVector, Object[]>();
int size = input.readVInt();
for (int entryInx = 0; entryInx < size; entryInx++) {
CharVector key = new CharVector(input.readUString());
int valueLength = input.readVInt();
Object[] values = new Object[valueLength];
for (int valueInx = 0; valueInx < valueLength; valueInx++) {
int type = input.readByte();
if(type == 1 ) {
values[valueInx] = new CharVector(input.readUString());
} else if(type == 2 ) {
int len = input.readVInt();
CharVector[] list = new CharVector[len];
for (int j = 0; j < len; j++) {
list[j] = new CharVector(input.readUString());
}
}
}
map.put(key, values);
}
wordSet = new CharVectorHashSet(ignoreCase);
size = input.readVInt();
for (int entryInx = 0; entryInx < size; entryInx++) {
wordSet.add(new CharVector(input.readUString()));
}
}
@Override
public void addEntry(String keyword, Object[] values, List<ColumnSetting> columnSettingList) {
if (keyword == null) {
return;
}
keyword = keyword.trim();
if(keyword.length() == 0) {
return;
}
CharVector cv = new CharVector(keyword).removeWhitespaces();
Object[] list = new Object[values.length];
for (int i = 0; i < values.length; i++) {
String value = values[i].toString();
ColumnSetting columnSetting = columnSettingList.get(i);
String separator = columnSetting.getSeparator();
//separator가 존재하면 쪼개서 CharVector[] 로 넣고 아니면 그냥 CharVector 로 넣는다.
if(separator != null && separator.length() > 0){
String[] e = value.split(separator);
// list[i] = new CharVector[e.length];
CharVector[] el = new CharVector[e.length];
for(int j = 0; j< e.length; j++){
el[j] = new CharVector(e[j].trim());
wordSet.add(el[j]);
}
list[i] = el;
}else {
CharVector val = new CharVector(value);
list[i] = val;
wordSet.add(val);
}
}
map.put(cv, list);
}
@Override
public void addSourceLineEntry(String line) {
String[] kv = line.split("\t");
if (kv.length == 1) {
String value = kv[0].trim();
addEntry(null, new Object[] { value }, null);
} else if (kv.length == 2) {
String keyword = kv[0].trim();
String value = kv[1].trim();
addEntry(keyword, new Object[] { value }, null);
}
}
@Override
public void reload(Object object) throws IllegalArgumentException {
if(object != null && object instanceof CustomDictionary){
CustomDictionary customDictionary = (CustomDictionary) object;
this.map = customDictionary.map();
}else{
throw new IllegalArgumentException("Reload dictionary argument error. argument = " + object);
}
}
@Override
public void clear() {
super.clear();
wordSet.clear();
map.clear();
}
public void setWordSet(Set<CharVector> wordSet) {
this.wordSet = wordSet;
}
}