package skywriting.examples.grep;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class PartialHashOutputCollector<K extends Writable, V extends Writable> extends AbstractOutputCollector<K, V> {
int flushThresh;
DataOutputStream[] os;
public PartialHashOutputCollector(DataOutputStream[] out, int numMaps, int flushThreshold, Combiner<V> combiner) {
flushThresh = flushThreshold;
os = out;
comb = combiner;
maps = new ArrayList<Map<K, V>>(numMaps);
for (int i = 0; i < numMaps; i++)
maps.add(new HashMap<K, V>());
}
@Override
public void collect(K key, V value) throws IOException {
super.collect(key, value);
int nMaps = maps.size();
// Work out which HashMap (partition) this word should go into
int hc = key.hashCode();
int targetMap = (hc < 0 ? -hc : hc) % nMaps;
if (maps.get(targetMap).size() > flushThresh) {
// Flush out the hashmap
//System.err.println("flushing");
flush(targetMap);
}
}
public void flushAll() throws IOException {
for (int i = 0; i < maps.size(); i++)
flush(i);
}
private void flush(int mapID) throws IOException {
// Flush out the hashmap
Map<K,V> hmap = maps.get(mapID);
dump(mapID);
// ... and clear it
hmap.clear();
}
private void dump(int mapID) throws IOException {
Map<K, V> map = maps.get(mapID);
Iterator<Map.Entry<K, V>> it = map.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<K, V> pairs = it.next();
//System.out.println(pairs.getKey() + " = " + pairs.getValue());
// Write to output stream
pairs.getKey().write(os[mapID]);
pairs.getValue().write(os[mapID]);
}
}
}