/*
* Copyright (c) 2010 Chris Smowton <chris.smowton@cl.cam.ac.uk>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
package uk.co.mrry.mercator.mapreduce;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeMap;
import java.util.Map.Entry;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.hadoop.io.serializer.WritableSerialization;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class SWMapperOutputCollector<K extends Writable, V extends Writable>
extends org.apache.hadoop.mapreduce.RecordWriter<K,V> {
private final Partitioner<K,V> partitioner;
private final ArrayList<TreeMap<K, List<V>>> partitions;
private final int numPartitions;
private final WritableSerialization serialization;
private final FileOutputStream[] outputStreams;
@SuppressWarnings("unchecked")
public SWMapperOutputCollector(Partitioner partitioner, FileOutputStream[] outputs)
throws IOException, ClassNotFoundException {
numPartitions = outputs.length;
outputStreams = outputs;
partitions = new ArrayList<TreeMap<K, List<V>>>(numPartitions);
serialization = new WritableSerialization();
this.partitioner = partitioner;
for (int i = 0; i < outputs.length; ++i) {
partitions.add(new TreeMap<K, List<V>>());
}
}
@Override
public void write(K key, V value) throws IOException, InterruptedException {
TreeMap<K, List<V>> partition = partitions.get(partitioner.getPartition(key, value, numPartitions));
List<V> partitionList = partition.get(key);
if (partitionList == null) {
partitionList = new LinkedList<V>();
partition.put(key, partitionList);
}
partitionList.add(value);
}
@Override
public void close(TaskAttemptContext context
) throws IOException,InterruptedException {
// XXX: Hack to get around generics.
Serializer<Writable> serializer = serialization.getSerializer(null);
for (int i = 0; i < numPartitions; ++i) {
serializer.open(outputStreams[i]);
for (Entry<K, List<V>> e : partitions.get(i).entrySet()) {
for (V v : e.getValue()) {
serializer.serialize(e.getKey());
serializer.serialize(v);
}
}
serializer.close();
}
}
}