// Copyright 2013 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.morph;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import marmot.util.LineIterator;
import marmot.util.StringUtils;
import marmot.util.SymbolTable;
public class HashDictionary extends MorphDictionary {
private static final long serialVersionUID = 1L;
private HashMap<String, int[]> index_map_;
public HashDictionary() {
this(null);
}
public HashDictionary(SymbolTable<String> table) {
super(table);
}
@Override
public void init(MorphDictionaryOptions options) {
super.init(options);
LineIterator iterator = new LineIterator(options.getFilename());
Map<String, Set<Integer>> map = new HashMap<String, Set<Integer>>();
int[] row_indexes = options.getIndexes();
SymbolTable<String> table = getTable();
while (iterator.hasNext()) {
List<String> line = iterator.next();
if (!line.isEmpty()) {
String form = line.get(0);
form = StringUtils.normalize(form, options.getNormalize());
Set<Integer> indexes = map.get(form);
if (indexes == null) {
indexes = new HashSet<Integer>();
map.put(form, indexes);
}
for (int row_index : row_indexes) {
if (row_index > 0 && row_index < line.size()) {
String tag = line.get(row_index);
int index = table.toIndex(tag, true);
indexes.add(index);
}
}
}
}
index_map_ = new HashMap<String, int[]>();
for (Map.Entry<String, Set<Integer>> entry : map.entrySet()) {
String form = entry.getKey();
Set<Integer> set = entry.getValue();
if (set.size() <= options.getLimit()) {
int[] indexes = new int[set.size()];
int i = 0;
for (int index : set) {
indexes[i++] = index;
}
index_map_.put(form, indexes);
}
}
}
@Override
public int[] getIndexes(String form) {
form = StringUtils.normalize(form, getOptions().getNormalize());
return index_map_.get(form);
}
public int size() {
return index_map_.size();
}
}