// Copyright 2013 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.morph;
import java.util.List;
import java.util.Set;
import marmot.util.CollectableSet;
import marmot.util.LineIterator;
import marmot.util.StringUtils;
import marmot.util.SymbolTable;
import marmot.util.Trie;
public class SuffixDictionary extends HashDictionary {
private static final long serialVersionUID = 1L;
private Trie<CollectableSet> trie_;
public SuffixDictionary() {
this(null);
}
public SuffixDictionary(SymbolTable<String> table) {
super(table);
}
@Override
public int[] getIndexes(String word_form) {
int[] indexes = super.getIndexes(word_form);
if (indexes == null) {
word_form = StringUtils.normalize(word_form, getOptions().getNormalize());
if (getOptions().getReverse()) {
word_form = StringUtils.reverse(word_form);
}
CollectableSet cset = trie_.getContent(word_form);
if (cset != null) {
Set<Object> set = cset.getValue();
indexes = toArray(set);
}
}
return indexes;
}
private int[] toArray(Set<Object> set) {
int[] array = new int[set.size()];
int i = 0;
for (Object o : set) {
array[i++] = (Integer) o;
}
return array;
}
@Override
public void init(MorphDictionaryOptions options) {
super.init(options);
LineIterator iterator = new LineIterator(options.getFilename());
trie_ = new Trie<CollectableSet>();
int[] row_indexes = options.getIndexes();
SymbolTable<String> table = getTable();
while (iterator.hasNext()) {
List<String> line = iterator.next();
if (!line.isEmpty()) {
String form = line.get(0);
form = StringUtils.normalize(form, options.getNormalize());
if (options.getReverse()) {
form = StringUtils.reverse(form);
}
Trie<CollectableSet> trie = trie_.addWord(form, null);
for (int row_index : row_indexes) {
if (row_index > 0 && row_index < line.size()) {
String tag = line.get(row_index);
int index = table.toIndex(tag, true);
trie.add(new CollectableSet(index));
}
}
}
}
trie_.propagateContent(options.getLimit());
}
}