package com.alimama.quanjingmonitor.kmeans; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.PriorityQueue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskID; public class KMeansGroupReducer extends Reducer<Text, Cluster, Text, Cluster> { public static class Clusterlist{ ArrayList<Cluster> list=new ArrayList<Cluster>(); @Override public String toString() { return "Clusterlist [ key=" + key + ",list=" + list.toString() + "]"; } Text key; public int count() { int rtn=0; for(Cluster cl:this.list) { rtn+=cl.getCenter().getNumPoints(); } return rtn; } } PriorityQueue<Clusterlist> res; static Comparator<Clusterlist> cmp=new Comparator<Clusterlist>() { @Override public int compare(Clusterlist o1, Clusterlist o2) { int t1=o1.count(); int t2=o2.count(); return t1 == t2 ? 0 : t1 < t2 ? 1 : -1; } }; int Index=0; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.res= new PriorityQueue<Clusterlist>(limit,Collections.reverseOrder(cmp)); TaskID taskId = context.getTaskAttemptID().getTaskID(); this.Index = taskId.getId()*10000; } int limit=256; int outputrecord=32; protected void cleanup(Context context) throws IOException, InterruptedException { ArrayList<Clusterlist> clusters_list=new ArrayList<Clusterlist>(); for(Clusterlist list:this.res) { clusters_list.add(list); } System.out.println(clusters_list.size()+"##################"); int index=0; int writecount=0; while(true) { boolean iswrite=false; for(Clusterlist list:clusters_list) { if(list.list.size()>index) { Cluster tmp=list.list.get(index); if(tmp.getCenter().getNumPoints()<10) { continue; } Cluster w=new Cluster(tmp.getCenter(),Index+writecount); System.out.println(list.key+"\t"+w.toString()); context.write(list.key, w); if(writecount++>outputrecord) { return ; } iswrite=true; } } index++; if(!iswrite) { break; } } } @Override protected void reduce(Text key, Iterable<Cluster> values, Context context) throws IOException, InterruptedException { Clusterlist list=new Clusterlist(); list.key=new Text(key.toString()); int eachMaxSize=3; int last_size=0; for (Cluster value : values) { int listsize=list.list.size(); if(listsize>eachMaxSize) { int index=(int) (Math.random()*100000)%eachMaxSize; list.list.get(index).getCenter().merger(value.getCenter()); }else if(list.list.size()<=0||last_size>40) { last_size=0; list.list.add(new Cluster(value)); }else{ list.list.get(listsize-1).getCenter().merger(value.getCenter()); last_size+=value.getCenter().getNumPoints(); } context.progress(); } System.out.println(">>>>>"+list.toString()); if (this.res.size() < limit) { this.res.add(list); } else if (cmp.compare(res.peek(), list) > 0) { this.res.add(list); this.res.poll(); } } }