package org.fastcatsearch.ir.dictionary;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.fastcatsearch.ir.io.CharVector;
import org.fastcatsearch.ir.io.DataInput;
import org.fastcatsearch.ir.io.DataOutput;
import org.fastcatsearch.ir.util.CharVectorHashSet;
import org.fastcatsearch.plugin.analysis.AnalysisPluginSetting.ColumnSetting;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
public class SpaceDictionary extends MapDictionary {
private final static String DELIMITER = "\\s";
private Set<CharVector> wordSet;
public SpaceDictionary() {
this(false);
}
public SpaceDictionary(boolean ignoreCase) {
super(ignoreCase);
if(wordSet == null) {
wordSet = new CharVectorHashSet(ignoreCase);
}
}
public SpaceDictionary(File file, boolean ignoreCase) {
super(file, ignoreCase);
}
public SpaceDictionary(InputStream is, boolean ignoreCase) {
super(is, ignoreCase);
}
public Set<CharVector> getWordSet() {
return wordSet;
}
public void setWordSet(Set<CharVector> wordSet) {
this.wordSet = wordSet;
}
public Set<CharVector> getUnmodifiableWordSet() {
return Collections.unmodifiableSet(wordSet);
}
private static final Pattern ptn = Pattern.compile("^[\\x00-\\x7F]*$");
@Override
public void addEntry(String word, Object[] values, List<ColumnSetting> columnList) {
String keyword = word.replaceAll(DELIMITER, "");
wordSet.add(new CharVector(keyword));
String[] list = word.split(DELIMITER);
super.addEntry(keyword, list, columnList);
for (int i = 0; i < list.length; i++) {
String str = list[i].trim();
//ASCII 골라내기
if(!ptn.matcher(str).find()) {
wordSet.add(new CharVector(list[i].trim()));
}
}
}
@Override
public void writeTo(OutputStream out) throws IOException {
super.writeTo(out);
DataOutput output = new OutputStreamDataOutput(out);
// write size of synonyms
output.writeVInt(wordSet.size());
// write synonyms
Iterator<CharVector> synonymIter = wordSet.iterator();
for (; synonymIter.hasNext();) {
CharVector value = synonymIter.next();
output.writeUString(value.array(), value.start(), value.length());
}
}
@Override
public void readFrom(InputStream in) throws IOException {
super.readFrom(in);
DataInput input = new InputStreamDataInput(in);
wordSet = new CharVectorHashSet(ignoreCase);
int size = input.readVInt();
for (int entryInx = 0; entryInx < size; entryInx++) {
wordSet.add(new CharVector(input.readUString()));
}
}
@Override
public void reload(Object object) throws IllegalArgumentException {
if (object != null && object instanceof SpaceDictionary) {
super.reload(object);
SpaceDictionary spaceDictionary = (SpaceDictionary) object;
this.wordSet = spaceDictionary.getWordSet();
} else {
throw new IllegalArgumentException("Reload dictionary argument error. argument = " + object);
}
}
@Override
public void clear() {
super.clear();
wordSet.clear();
}
}