package skywriting.examples.skyhout.common;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
public class PartialHashOutputCollector<K extends Writable, V, C, R extends Writable> extends AbstractOutputCollector<K, V, C, R> {
private int flushThresh;
private SequenceFile.Writer[] writers;
private int numOutputs;
private Combiner<K, C, V, R> combiner;
public PartialHashOutputCollector(SkywritingTaskFileSystem fs, Configuration conf, Class<K> keyClass, Class<R> valueClass, int flushThreshold, Combiner<K, C, V, R> combiner) throws IOException {
this(fs, conf, keyClass, valueClass, flushThreshold, combiner, new HashPartitioner<K, V>());
}
public PartialHashOutputCollector(SkywritingTaskFileSystem fs, Configuration conf, Class<K> keyClass, Class<R> valueClass, int flushThreshold, Combiner<K, C, V, R> combiner, Partitioner<K,V> partitioner) throws IOException {
super(fs.numOutputs(), partitioner, combiner);
this.numOutputs = fs.numOutputs();
this.flushThresh = flushThreshold;
this.combiner = combiner;
this.writers = new SequenceFile.Writer[numOutputs];
for (int i = 0; i < numOutputs; i++) {
this.maps.add(new HashMap<K, C>());
this.writers[i] = new SequenceFile.Writer(fs, conf, new Path("/out/" + i), keyClass, valueClass);
}
}
@Override
public void collect(K key, V value) throws IOException {
int mapIndex = super.collectWithIndex(key, value);
if (this.maps.get(mapIndex).size() > flushThresh) {
flush(mapIndex);
}
}
public void close() throws IOException {
this.flushAll();
this.closeWriters();
}
public void flushAll() throws IOException {
for (int i = 0; i < this.numOutputs; i++)
flush(i);
}
private void flush(int mapID) throws IOException {
// Flush out the hashmap
Map<K, C> hmap = maps.get(mapID);
dump(mapID);
// ... and clear it
hmap.clear();
}
public void closeWriters() throws IOException {
for (SequenceFile.Writer w : this.writers) {
w.close();
}
}
private void dump(int mapID) throws IOException {
Map<K, C> map = maps.get(mapID);
Iterator<Map.Entry<K, C>> it = map.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<K, C> pairs = it.next();
writers[mapID].append(pairs.getKey(), this.combiner.combineFinal(pairs.getKey(), pairs.getValue()));
}
}
}