package edu.stanford.nlp.coref.statistical;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
/**
* Converts a Counter<K> to a {@link CompressedFeatureVector} (i.e., parallel lists of integer
* keys and double values), which takes up much less memory.
* @author Kevin Clark
*/
public class Compressor<K> implements Serializable {
private static final long serialVersionUID = 364548642855692442L;
private final Map<K, Integer> index;
private final Map<Integer, K> inverse;
public Compressor() {
index = new HashMap<>();
inverse = new HashMap<>();
}
public CompressedFeatureVector compress(Counter<K> c) {
List<Integer> keys = new ArrayList<>(c.size());
List<Double> values = new ArrayList<>(c.size());
for (Map.Entry<K, Double> e : c.entrySet()) {
K key = e.getKey();
Integer id = index.get(key);
if (id == null) {
id = index.size();
inverse.put(id, key);
index.put(key, id);
}
keys.add(id);
values.add(e.getValue());
}
return new CompressedFeatureVector(keys, values);
}
public Counter<K> uncompress(CompressedFeatureVector cvf) {
Counter<K> c = new ClassicCounter<>();
for (int i = 0; i < cvf.keys.size(); i++) {
c.incrementCount(inverse.get(cvf.keys.get(i)), cvf.values.get(i));
}
return c;
}
}