package org.wikipedia.miner.extract.steps.labelOccurrences;
import java.io.IOException;
import org.apache.avro.mapred.AvroCollector;
import org.apache.avro.mapred.AvroReducer;
import org.apache.avro.mapred.Pair;
import org.apache.hadoop.mapred.Reporter;
import org.wikipedia.miner.extract.model.struct.LabelOccurrences;
public abstract class CombinerOrReducer extends AvroReducer<CharSequence, LabelOccurrences, Pair<CharSequence, LabelOccurrences>> {
public enum Counts {falsePositives, truePositives} ;
public abstract boolean isReducer() ;
@Override
public void reduce(CharSequence label, Iterable<LabelOccurrences> partials,
AvroCollector<Pair<CharSequence, LabelOccurrences>> collector,
Reporter reporter) throws IOException {
LabelOccurrences allOccurrences = new LabelOccurrences(0,0,0,0) ;
for (LabelOccurrences partial:partials) {
allOccurrences.setLinkDocCount(allOccurrences.getLinkDocCount() + partial.getLinkDocCount()) ;
allOccurrences.setLinkOccCount(allOccurrences.getLinkOccCount() + partial.getLinkOccCount()) ;
allOccurrences.setTextDocCount(allOccurrences.getTextDocCount() + partial.getTextDocCount()) ;
allOccurrences.setTextOccCount(allOccurrences.getTextOccCount() + partial.getTextOccCount()) ;
}
if (isReducer()) {
if (allOccurrences.getLinkOccCount() == 0) {
reporter.getCounter(Counts.falsePositives).increment(1L);
return ;
} else {
reporter.getCounter(Counts.truePositives).increment(1L);
}
}
collector.collect(new Pair<CharSequence, LabelOccurrences>(label, allOccurrences));
}
public static class Combiner extends CombinerOrReducer {
@Override
public boolean isReducer() {
return false;
}
}
public static class Reducer extends CombinerOrReducer {
@Override
public boolean isReducer() {
return true;
}
}
}