package com.github.elazarl.multireducers;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
import org.apache.hadoop.util.ReflectionUtils;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
/**
* MultiMapper would run multiple mappers job, each with its own mapper.
*/
public class MultiMapper<KEYIN, VALUEIN> extends Mapper<KEYIN, VALUEIN, PerMapperOutputKey, PerMapperOutputValue> {
public static final String CONF_KEY = "com.github.elazarl.multireducers.mappers";
private List<TaskAttemptContext> contexts;
@SuppressWarnings("unchecked")
@Override
protected void map(KEYIN key, VALUEIN value, final Context context) throws IOException, InterruptedException {
for (int i = 0; i < mappers.size(); i++) {
Methods.invoke(maps.get(i), mappers.get(i), key, value, contexts.get(i));
}
}
@SuppressWarnings("unchecked")
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
@SuppressWarnings("unchecked")
Class<Mapper>[] mappersClass = (Class<Mapper>[]) conf.getClasses(CONF_KEY);
mappers = new ArrayList<Mapper>(mappersClass.length);
cleanups = new ArrayList<Method>(mappersClass.length);
maps = new ArrayList<Method>(mappersClass.length);
WrappedMapper wrappedMapper = new WrappedMapper();
contexts = Lists.newArrayList();
int[] redirectToReducer = context.getConfiguration().getInts(MultiJob.REDIRECT_TO_REDUCER);
for (int i = 0; i < mappersClass.length; i++) {
Class<Mapper> mapperClass = mappersClass[i];
final int finalI = redirectToReducer[i];
WrappedMapper.Context myContext = wrappedMapper.new Context(context) {
@Override
public void write(Object k, Object v) throws IOException, InterruptedException {
context.write(new PerMapperOutputKey(finalI, k),
new PerMapperOutputValue(finalI, v));
}
};
contexts.add(myContext);
Mapper mapper = ReflectionUtils.newInstance(mapperClass, conf);
mappers.add(mapper);
Methods.invoke(Methods.get(mapperClass, "setup", Context.class), mapper, myContext);
cleanups.add(Methods.get(mapperClass, "cleanup", Context.class));
maps.add(Methods.getWithNameMatches(mapperClass, "map"));
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (int i = 0; i < mappers.size(); i++) {
Methods.invoke(cleanups.get(i), mappers.get(i), contexts.get(i));
}
}
List<Mapper> mappers;
List<Method> maps;
List<Method> cleanups;
}