package org.archive.hadoop.streaming;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class CdxDedupReducer implements Reducer<Text, Text, Text, Text> {
@Override
public void configure(JobConf job) {
// TODO Auto-generated method stub
}
@Override
public void close() throws IOException {
// TODO Auto-generated method stub
}
protected String lastCdx = null;
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
String cdx = key.toString();
if ((lastCdx != null) && lastCdx.equals(cdx)) {
return;
}
// String lastValue = null;
//
// while (values.hasNext()) {
// Text nextValue = values.next();
// String currValue = nextValue.toString();
//
// if ((lastValue != null) && !currValue.equals(lastValue)) {
// output.collect(key, nextValue);
// }
// lastValue = currValue;
// }
// Only one value output per key, (value should be empty key)
if (values.hasNext()) {
output.collect(key, values.next());
}
lastCdx = cdx;
}
}