package edu.uncc.cs.watsonsim.nlp;
import java.util.Iterator;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.codecs.bloom.MurmurHash2;
import com.carrotsearch.hppc.IntIntOpenHashMap;
/**
* A memory-efficient String-int map: only stores hash->int relations,
* and later when you iterate it guesses the hash->String relation using a
* dictionary.
* @author Sean
*/
public class ApproxStringIntMap implements Iterable<Pair<String, Integer>> {
StringStack dict;
IntIntOpenHashMap map = new IntIntOpenHashMap();
private int hash(String x) {
byte[] b = x.getBytes();
return MurmurHash2.hash(b, 0, 0, b.length);
}
/** Create an approximate String-int map using a shared dictionary */
public ApproxStringIntMap(StringStack dictionary) {
dict = dictionary;
}
public int size() {
return map.size();
}
public boolean isEmpty() {
return size() == 0;
}
public boolean containsKey(String key) {
return map.containsKey(hash(key));
}
public int get(String key) {
return map.get(hash(key));
}
public int put(String key, int value) {
return map.put(hash(key), value);
}
public int addTo(String key, int amount) {
return map.addTo(hash(key), amount);
}
public int remove(String key) {
return map.remove(hash(key));
}
public void clear() {
map.clear();
}
/**
* Iterate the entries in this map - linear in complexity to the vocabulary
* size!
*/
public Iterator<Pair<String, Integer>> iterator() {
return new StringIntMapIterator(this);
}
private class StringIntMapIterator implements Iterator<Pair<String, Integer>> {
private final Iterator<String> dictiter;
private Pair<String, Integer> next_item;
private ApproxStringIntMap asim;
StringIntMapIterator(ApproxStringIntMap asim) {
this.dictiter = asim.dict.iterator();
this.asim = asim;
}
@Override
public boolean hasNext() {
while (next_item == null && dictiter.hasNext()) {
String key = dictiter.next();
if (asim.containsKey(key))
next_item = Pair.of(key, asim.get(key));
}
return next_item != null;
}
@Override
public Pair<String, Integer> next() {
Pair<String,Integer> item = next_item;
next_item = null;
return item;
}
}
}