package com.github.elazarl.multireducers;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.util.ReflectionUtils;
import java.io.IOException;
/**
* Compares PerMapperOutputKey keys according to their defined comparator, or using
* the default RawComparator if nothing is defined.
*/
public class MultiComparator implements RawComparator<PerInternalMapper>, Configurable{
public static final String CONF_KEY = "com.github.elazarl.multireducers.comparators";
private DataInputBuffer rhsBuffer = new DataInputBuffer();
private DataInputBuffer lhsBuffer = new DataInputBuffer();
private PerInternalMapper lhs = new PerInternalMapper(0, null);
private PerInternalMapper rhs = new PerInternalMapper(0, null);
public static abstract class NoComparator implements RawComparator{}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
lhsBuffer.reset(b1, s1, l1);
rhsBuffer.reset(b2, s2, l2);
try {
int lhsReducerNum = WritableUtils.readVInt(lhsBuffer);
int rhsReducerNum = WritableUtils.readVInt(rhsBuffer);
int vintLen = WritableUtils.getVIntSize(lhsReducerNum);
if (lhsReducerNum != rhsReducerNum) {
return lhsReducerNum-rhsReducerNum;
}
if (comparators[rhsReducerNum] != null) {
return comparators[lhsReducerNum].compare(b1, s1+vintLen, l1-vintLen,
b2, s2+vintLen, l2-vintLen);
}
lhsBuffer.reset(b1, s1, l1);
rhsBuffer.reset(b2, s2, l2);
PerInternalMapper rhsPerInternalMapper = rhsDeserializer.deserialize(rhs);
PerInternalMapper lhsPerInternalMapper = lhsDeserializer.deserialize(lhs);
return lhsPerInternalMapper.compareTo(rhsPerInternalMapper);
} catch (IOException e) {
throw new RuntimeException("cannot have IOError on in memory buffer", e);
}
}
@Override
public int compare(PerInternalMapper rhs, PerInternalMapper lhs) {
return rhs.compareTo(lhs);
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
MultiSerializer multiSerializer = new MultiSerializer();
multiSerializer.setConf(conf);
Class<?>[] comparatorClasses = conf.getClasses(CONF_KEY);
comparators = new RawComparator[comparatorClasses.length];
for (int i = 0; i < comparatorClasses.length; i++) {
if (!comparatorClasses[i].equals(NoComparator.class)) {
comparators[i] = (RawComparator) ReflectionUtils.newInstance(comparatorClasses[i], conf);
}
}
lhsDeserializer = multiSerializer.getDeserializer(PerMapperOutputKey.class);
rhsDeserializer = multiSerializer.getDeserializer(PerMapperOutputKey.class);
try {
lhsDeserializer.open(lhsBuffer);
rhsDeserializer.open(rhsBuffer);
} catch (IOException e) {
throw new RuntimeException("cannot have IOError on in memory buffer", e);
}
}
@Override
public Configuration getConf() {
return conf;
}
private Configuration conf;
Deserializer<PerInternalMapper> lhsDeserializer;
Deserializer<PerInternalMapper> rhsDeserializer;
RawComparator[] comparators;
}