package com.alimama.quanjingmonitor.kmeans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class KMeansReducer extends Reducer<Text, Vector, Text, Cluster> {
private Map<String, Cluster> clusterMap;
private double convergenceDelta;
@Override
protected void reduce(Text key, Iterable<Vector> values, Context context)
throws IOException, InterruptedException {
Vector cluster = new Vector();
Vector cluster_default = new Vector();
boolean isset=false;
boolean ismerger=false;
for (Vector value : values) {
if(value.getNumPoints()>0)
{
cluster.merger(value);
ismerger=true;
}else{
cluster_default.merger(value);
isset=true;
}
}
if(!ismerger&&isset)
{
cluster.merger(cluster_default);
}
Cluster clusterconv = clusterMap.get(key.toString());
boolean converged = cluster.distiance(clusterconv.getCenter())<this.convergenceDelta;
if (converged) {
context.getCounter("Clustering", "Converged Clusters").increment(1);
}
Cluster clu=new Cluster(cluster, Integer.parseInt(key.toString()));
clu.setConverged(converged);
context.write(new Text(key), clu);
}
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
try {
this.convergenceDelta = Double.parseDouble(conf.get(KMeansDriver.CLUSTER_CONVERGENCE_KEY));
this.clusterMap = new HashMap<String, Cluster>();
String path = conf.get(KMeansDriver.CLUSTER_PATH_KEY);
if (path.length() > 0) {
Collection<Cluster> clusters = new ArrayList<Cluster>();
KmeansPublic.configureWithClusterInfo(conf, new Path(path), clusters);
setClusterMap(clusters);
if (clusterMap.isEmpty()) {
throw new IllegalStateException("Cluster is empty!");
}
}
} catch (Throwable e) {
throw new IllegalStateException(e);
}
}
private void setClusterMap(Collection<Cluster> clusters) {
clusterMap = new HashMap<String, Cluster>();
for (Cluster cluster : clusters) {
clusterMap.put(String.valueOf(cluster.getId()), cluster);
}
clusters.clear();
}
}