package com.skp.experiment.common.mapreduce;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterator;
import org.apache.mahout.math.map.OpenHashMap;
import com.skp.experiment.common.parameter.DefaultOptionCreator;
/**
* This mapper is wrapper Mapper that needs to load pair of <K, V> from sequencefile in hdfs.
*
*/
@SuppressWarnings("rawtypes")
public class ReferenceMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT, K extends WritableComparable, V extends Writable>
extends Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
public static final String REFERENCE_PATHS = ReferenceMapper.class.getName() + ".referencePaths";
@SuppressWarnings("rawtypes")
protected static List<OpenHashMap> references;
protected static Object newKey;
protected static Object newValue;
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
super.setup(context);
references = new ArrayList<OpenHashMap>();
Configuration conf = context.getConfiguration();
String referencesDir = conf.get(REFERENCE_PATHS);
if (referencesDir != null) {
fetchKeyValues(conf, referencesDir.split(DefaultOptionCreator.COMMA_DELIMETER));
}
int i = 0;
for (OpenHashMap<K, V> item : references) {
i++;
for (Entry<K, V> e : item.entrySet()) {
System.out.println(i + ":" + e.getKey() + ":" + e.getValue());
}
}
}
@SuppressWarnings("unchecked")
protected void fetchKeyValues(Configuration conf, String... pathsDir) throws IOException {
for (int i = 0; i < pathsDir.length; i++) {
references.add(new OpenHashMap<K, V>());
}
for (int i = 0; i < pathsDir.length; i++) {
SequenceFileIterator<K, V> iter =
new SequenceFileIterator<K, V>(new Path(pathsDir[i]), true, conf);
while (iter.hasNext()) {
Pair<K, V> row = iter.next();
references.get(i).put(ReflectionUtils.copy(conf, row.getFirst(), (K)newKey),
ReflectionUtils.copy(conf, row.getSecond(), (V)newValue));
}
}
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
super.cleanup(context);
for (OpenHashMap<K, V> h : references) {
h.clear();
}
references.clear();
}
}