/* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alimama.quanjingmonitor.kmeans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.PriorityQueue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.RawKeyValueIterator;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.StatusReporter;
import org.apache.hadoop.mapreduce.TaskAttemptID;
public class KMeansClusterReduce extends Reducer<Text, Text, Text, Text> {
private final Collection<Cluster> clusters = new ArrayList<Cluster>();
private Map<String, Cluster> clusterMap;
ParseVector parse=new ParseVector();
private void setClusterMap(Collection<Cluster> clusters) {
clusterMap = new HashMap<String, Cluster>();
for (Cluster cluster : clusters) {
clusterMap.put(String.valueOf(cluster.getId()), cluster);
}
clusters.clear();
}
int rep=2;
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
this.clusters.clear();
Configuration conf = context.getConfiguration();
parse.setup(conf);
this.rep=conf.getInt(KMeansDriver.CLUSTER_CONVERGENCE_ABTEST_REP, 2);
try {
String clusterPath = conf.get(KMeansDriver.CLUSTER_PATH_KEY);
if (clusterPath != null && clusterPath.length() > 0) {
KmeansPublic.configureWithClusterInfo(conf, new Path(clusterPath), clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException(
"No clusters found. Check your -c path.");
}
this.setClusterMap(clusters);
}
} catch (Throwable e) {
throw new IllegalStateException(e);
}
}
Comparator<String> cmp=new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
String[] cols1=o1.toString().split("@abtest@");
String[] cols2=o2.toString().split("@abtest@");
double t1=Double.parseDouble(cols1[0]);
double t2=Double.parseDouble(cols2[0]);
return t1 == t2 ? 0 : t1 > t2 ? 1 : -1;
}
};
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
Cluster clu=clusterMap.get(key.toString());
int numberSelect=1;
int limit=1;
if(clu!=null)
{
limit=Math.min(clu.getNumselect()*this.rep*100, 100000);
numberSelect=clu.getNumselect();
System.out.println("key:"+key+","+numberSelect);
}else{
System.out.println("can nott found key:"+key);
}
if(limit<5000)
{
limit=5000;
}
PriorityQueue<String> res= new PriorityQueue<String>(limit,Collections.reverseOrder(cmp));
for (Text value : values) {
if (res.size() < limit) {
res.add(value.toString());
} else if (cmp.compare(res.peek(),value.toString()) > 0) {
res.add(value.toString());
res.poll();
}
}
ArrayList<String> list=new ArrayList<String>(res);
Collections.sort(list,cmp);
comPair[] writelist=new comPair[numberSelect];
int end=list.size();
ArrayList<String> left=new ArrayList<String>(res);
for(int i=0;i<end;i++)
{
String s=list.get(i);
System.out.println("111>>"+s);
String[] cols=s.split("@abtest@");
String line=cols[1];
Vector group=parse.parseVector(line);
for(int j=0;j<writelist.length;j++)
{
if(writelist[j]==null)
{
comPair p=new comPair();
p.s1=s;
p.v1=group;
writelist[j]=p;
s=null;
break;
}
boolean deny=writelist[j].v1.Deny(group);
double dis=writelist[j].v1.distiance(group);
System.out.println("222>>"+dis);
if(!deny&&writelist[j].distance>dis)
{
writelist[j].distance=dis;
String s_tmp=writelist[j].s2;
Vector group_tmp=writelist[j].v2;
writelist[j].s2=s;
writelist[j].v2=group;
s=s_tmp;
group=group_tmp;
if(s_tmp==null)
{
break;
}
}
}
if(s!=null)
{
left.add(s);
}
}
int end2=left.size();
for(int i=0;i<end2;i++)
{
String s=left.get(i);
String[] cols=s.split("@abtest@");
String line=cols[1];
Vector group=parse.parseVector(line);
boolean isset=false;
for(int j=0;j<writelist.length;j++)
{
if(writelist[j]==null||writelist[j].s2!=null)
{
continue;
}
double dis=writelist[j].v1.distiance(group);
if(writelist[j].distance>dis)
{
System.out.println("333>>"+s);
isset=true;
writelist[j].distance=dis;
String s_tmp=writelist[j].s2;
Vector group_tmp=writelist[j].v2;
writelist[j].s2=s;
writelist[j].v2=group;
if(s_tmp==null)
{
break;
}
s=s_tmp;
group=group_tmp;
}
}
if(!isset)
{
break;
}
}
for(int i=0;i<writelist.length;i++)
{
if(writelist[i]!=null&&writelist[i].s2!=null)
{
int rrr=(int) ((Math.random()*10000)%2);
int rrr2=(rrr+1)%2;
System.out.println(writelist[i].toString());
context.write(key, new Text(writelist[i].distance+"\t"+i+"\trep"+rrr+"_1\t"+writelist[i].s1));
context.write(key, new Text(writelist[i].distance+"\t"+i+"\trep"+rrr2+"_2\t"+writelist[i].s2));
}
}
}
public static class comPair{
public String s1=null;
@Override
public String toString() {
return "comPair [distance="+distance+",s1=" + s1 + ", s2=" + s2 + "]";
}
public String s2=null;
public Vector v1=null;
public Vector v2=null;
public double distance=Integer.MAX_VALUE;
}
}