package org.wikipedia.miner.extract.steps.pageDepth;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.mapred.AvroCollector;
import org.apache.avro.mapred.AvroReducer;
import org.apache.avro.mapred.Pair;
import org.apache.hadoop.mapred.Reporter;
import org.wikipedia.miner.extract.model.struct.PageDepthSummary;
public abstract class DepthCombinerOrReducer extends AvroReducer<Integer, PageDepthSummary, Pair<Integer, PageDepthSummary>> {
public enum Counts {unforwarded, withDepth,withoutDepth} ;
public abstract boolean isReducer() ;
@Override
public void reduce(Integer pageId, Iterable<PageDepthSummary> partials,
AvroCollector<Pair<Integer, PageDepthSummary>> collector,
Reporter reporter) throws IOException {
Integer minDepth = null ;
boolean depthForwarded = false ;
List<Integer> childIds = new ArrayList<Integer>();
for (PageDepthSummary partial:partials) {
if (partial.getDepth() != null) {
if (minDepth == null || minDepth > partial.getDepth()) {
minDepth = partial.getDepth().intValue() ;
depthForwarded = partial.getDepthForwarded() ;
}
}
if (!partial.getChildIds().isEmpty())
childIds.addAll(partial.getChildIds()) ;
}
//if we haven't reached this node yet, just pass on as it is
if (minDepth == null) {
if (isReducer())
reporter.getCounter(Counts.withoutDepth).increment(1);
InitialDepthMapper.collect(pageId, new PageDepthSummary(minDepth, depthForwarded, childIds), collector);
return ;
}
if (isReducer() ) {
//depth forwarding is only required for pages with children
if (childIds.isEmpty())
depthForwarded = true ;
//if we have already forwarded all details to children, then we don't need to keep track of them any more
if (depthForwarded)
childIds = new ArrayList<Integer>() ;
//count stuff
reporter.getCounter(Counts.withDepth).increment(1);
if (!depthForwarded)
reporter.getCounter(Counts.unforwarded).increment(1);
}
InitialDepthMapper.collect(pageId, new PageDepthSummary(minDepth, depthForwarded, childIds), collector);
}
public static class DepthCombiner extends DepthCombinerOrReducer {
@Override
public boolean isReducer() {
return false;
}
}
public static class DepthReducer extends DepthCombinerOrReducer {
@Override
public boolean isReducer() {
return true;
}
}
}