package skywriting.examples.skyhout.kmeans; import java.io.IOException; import java.util.Map; import org.apache.hadoop.io.Text; import org.apache.mahout.clustering.kmeans.Cluster; import org.apache.mahout.clustering.kmeans.KMeansInfo; import org.apache.mahout.common.distance.DistanceMeasure; import skywriting.examples.skyhout.common.Combiner; public class KMeansReducerCombiner implements Combiner<Text, KMeansInfo, KMeansInfo, Cluster> { private boolean allConverged; private Map<String, Cluster> oldClusterMap; private DistanceMeasure measure; private double convergenceDelta; public KMeansReducerCombiner(Map<String, Cluster> oldClusterMap, DistanceMeasure measure, double convergenceDelta) { this.oldClusterMap = oldClusterMap; this.allConverged = true; this.measure = measure; this.convergenceDelta = convergenceDelta; } public boolean areAllConverged() { return this.allConverged; } @Override public KMeansInfo combine(KMeansInfo oldValue, KMeansInfo newValue) { Cluster cluster = new Cluster(); cluster.addPoints(oldValue.getPoints(), oldValue.getPointTotal()); cluster.addPoints(newValue.getPoints(), newValue.getPointTotal()); return new KMeansInfo(cluster.getNumPoints(), cluster.getPointTotal()); } @Override public KMeansInfo combineInit(KMeansInfo initVal) { return new KMeansInfo(initVal.getPoints(), initVal.getPointTotal()); } public Cluster combineFinal(Text key, KMeansInfo oldVal) throws IOException { System.out.println("Processing cluster " + key); Cluster cluster = oldClusterMap.get(key.toString()); cluster.addPoints(oldVal.getPoints(), oldVal.getPointTotal()); boolean clusterConverged = cluster.computeConvergence(this.measure, convergenceDelta); allConverged &= clusterConverged; return cluster; } }