package com.github.elazarl.multireducers;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer;
import org.apache.hadoop.util.ReflectionUtils;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
/**
* MultiReducer would receive a PerReducerWritable as key, and use
* the relevant reducer, according to the index at the PerReducerWritable.
*/
public class MultiReducer<KEYOUT, VALUEOUT> extends Reducer<PerMapperOutputKey, PerMapperOutputValue, KEYOUT, VALUEOUT> {
public static final String CONF_KEY = "com.github.elazarl.multireducers.reducers";
public static final String INPUT_KEY_CLASSES = "com.github.elazarl.multireducers.reducer.input.key";
public static final String INPUT_VALUE_CLASSES = "com.github.elazarl.multireducers.reducer.input.value";
protected String conf_key() {
return CONF_KEY;
}
@Override
protected void reduce(PerMapperOutputKey key, Iterable<PerMapperOutputValue> values, Context context) throws IOException, InterruptedException {
int i = key.targetReducer;
Methods.invoke(reduces.get(i), reducers.get(i), key.data,
Iterables.transform(values, new Function<PerMapperOutputValue, VALUEOUT>() {
@SuppressWarnings("unchecked")
@Override
public VALUEOUT apply(PerMapperOutputValue input) {
return (VALUEOUT) input.data;
}
}), getContextForReducer(context, i));
}
protected Context getContextForReducer(Context context, int i) {
return contexts.get(i);
}
@SuppressWarnings("unchecked")
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
outputPaths = Lists.newArrayList(conf.getTrimmedStringCollection(MultiJob.OUTPUT_FORMAT_PATH));
@SuppressWarnings("unchecked")
Class<Reducer>[] reducersClass = (Class<Reducer>[]) conf.getClasses(conf_key());
reducers = new ArrayList<Reducer>(reducersClass.length);
cleanups = new ArrayList<Method>(reducersClass.length);
reduces = new ArrayList<Method>(reducersClass.length);
contexts = new ArrayList<Reducer<PerMapperOutputKey, PerMapperOutputValue, KEYOUT, VALUEOUT>.Context>();
if (outputPaths.isEmpty()) {
Iterables.addAll(outputPaths, Iterables.limit(Iterables.cycle(""), reducersClass.length));
}
WrappedReducer wrappedReducer = new WrappedReducer();
for (int i = 0; i < reducersClass.length; i++) {
Class<Reducer> reducerClass = reducersClass[i];
Reducer reducer = ReflectionUtils.newInstance(reducerClass, conf);
final int finalI = i;
WrappedReducer.Context myContext = wrappedReducer.new Context(context) {
@Override
public void write(Object key, Object value) throws IOException, InterruptedException {
context.write((KEYOUT) new PerReducerOutputKey(finalI, key), (VALUEOUT)value);
}
};
contexts.add(myContext);
reducers.add(reducer);
Methods.invoke(Methods.get(reducerClass, "setup", Context.class), reducer, getContextForReducer(context, i));
cleanups.add(Methods.get(reducerClass, "cleanup", Context.class));
reduces.add(Methods.getWithNameMatches(reducerClass, "reduce"));
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (int i = 0; i < reducers.size(); i++) {
Methods.invoke(cleanups.get(i), reducers.get(i), getContextForReducer(context, i));
}
}
private List<Reducer> reducers;
private List<Method> reduces;
private List<Method> cleanups;
private List<Reducer<PerMapperOutputKey, PerMapperOutputValue, KEYOUT, VALUEOUT>.Context> contexts;
private List<String> outputPaths;
}