package com.github.elazarl.multireducers;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.util.ReflectionUtils;
import java.io.IOException;
import java.util.*;
public class MultiOutputFormat<V> extends OutputFormat<PerReducerOutputKey, V> implements Configurable {
private static final String MULTI_OUTPUT_FORMATS = "com.github.elazarl.multireducers.output.formats";
private OutputFormat[] outputFormats;
private List<Map<String, String>> outputProperties;
public static class Property {
private final String key;
private final String value;
public Property(String key, String value) {
this.key = key;
this.value = value;
}
}
public static Property outputPath(String path) {
return new Property("mapred.output.dir", path);
}
public static Property outputBaseName(String baseName) {
return new Property("mapreduce.output.basename", baseName);
}
private static Map<String, String> propertiesToMap(Property... properties) {
Map<String, String> m = Maps.newHashMap();
for (Property property : properties) {
m.put(property.key, property.value);
}
return m;
}
public static void addOutputFormat(Job job,Class<? extends OutputFormat> outputFormat,
Property... properties) {
List<String> outputFormats = Lists.newArrayList(
job.getConfiguration().getTrimmedStringCollection(MULTI_OUTPUT_FORMATS));
outputFormats.add(outputFormat.getName());
job.getConfiguration().setStrings(MULTI_OUTPUT_FORMATS, outputFormats.toArray(
new String[outputFormats.size()]));
List<String> outputProps = Lists.newArrayList(
job.getConfiguration().getTrimmedStringCollection(MultiJob.OUTPUT_FORMAT_PROPERTIES));
outputProps.add(MapToProperties.serialize(propertiesToMap(properties)));
job.getConfiguration().setStrings(MultiJob.OUTPUT_FORMAT_PROPERTIES,
outputProps.toArray(new String[outputProps.size()]));
}
@Override
public RecordWriter<PerReducerOutputKey, V> getRecordWriter(final TaskAttemptContext context)
throws IOException, InterruptedException {
final RecordWriter[] writers = new RecordWriter[outputFormats.length];
for (int i = 0; i < outputFormats.length; i++) {
pushConfiguration(i, context);
writers[i] = outputFormats[i].getRecordWriter(context);
popConfiguration(i, context);
}
return new RecordWriter<PerReducerOutputKey, V>() {
@SuppressWarnings("unchecked")
@Override
public void write(PerReducerOutputKey key, V value) throws IOException,
InterruptedException {
int i = key.targetReducer;
pushConfiguration(i, context);
writers[i].write(key.data, value);
popConfiguration(i, context);
}
@Override
public void close(TaskAttemptContext context) throws IOException,
InterruptedException {
for (int i = 0; i < writers.length; i++) {
pushConfiguration(i, context);
writers[i].close(context);
popConfiguration(i, context);
}
}
};
}
@Override
public void checkOutputSpecs(JobContext context) throws IOException,
InterruptedException {
for (int i = 0; i < outputFormats.length; i++) {
pushConfiguration(i, context);
outputFormats[i].checkOutputSpecs(context);
popConfiguration(i, context);
}
}
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
throws IOException, InterruptedException {
final OutputCommitter[] committers = new OutputCommitter[outputFormats.length];
for (int i = 0; i < committers.length; i++) {
pushConfiguration(i, context);
committers[i] = outputFormats[i].getOutputCommitter(context);
popConfiguration(i, context);
}
return new MultiOutputCommitter(Arrays.asList(committers));
}
private Map<String, String> prev = Maps.newHashMap();
private void popConfiguration(int i, JobContext context) throws IOException {
for (String key : outputProperties.get(i).keySet()) {
if (prev.get(key) == null) {
context.getConfiguration().unset(key);
} else {
context.getConfiguration().set(key, prev.get(key));
}
}
prev.clear();
}
private void pushConfiguration(int i, JobContext context) throws IOException {
for (Map.Entry<String, String> entry : outputProperties.get(i).entrySet()) {
prev.put(entry.getKey(), context.getConfiguration().get(entry.getKey()));
context.getConfiguration().set(entry.getKey(), entry.getValue());
}
}
private Configuration conf;
@Override
public void setConf(Configuration conf) {
this.conf = conf;
Class<?>[] outputFormatClasses = conf.getClasses(MULTI_OUTPUT_FORMATS);
outputProperties = Lists.newArrayList();
for (String serializedProperty : conf.getTrimmedStringCollection(MultiJob.OUTPUT_FORMAT_PROPERTIES)) {
outputProperties.add(MapToProperties.deserialize(serializedProperty));
}
outputFormats = new OutputFormat[outputFormatClasses.length];
for (int i = 0; i < outputFormatClasses.length; i++) {
outputFormats[i] =
(OutputFormat) ReflectionUtils.newInstance(outputFormatClasses[i], conf);
}
}
@Override
public Configuration getConf() {
return conf;
}
}