package com.alimama.quanjingmonitor.kmeans;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.io.Writable;
public class Vector implements Writable{
LinkedHashMap<String,Vector_val> vector=new LinkedHashMap<String, Vector_val>();
private long numPoints = 0;
public Vector(){};
protected void setNumPoints(long l) {
this.numPoints = l;
}
long getNumPoints() {
return numPoints;
}
@Override
public void readFields(DataInput in) throws IOException {
this.vector.clear();
this.numPoints = in.readLong();
int size = in.readInt();
for (int i = 0; i < size; i++) {
String sk=in.readUTF();
int type=in.readInt();
if(type==1)
{
String_val val=new String_val();
val.readFields(in);
vector.put(sk, val);
}
if(type==2)
{
Number_val val=new Number_val();
val.readFields(in);
vector.put(sk, val);
}
}
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(numPoints);
out.writeInt(this.vector.size());
for(Entry<String,Vector_val> e:vector.entrySet())
{
String s=e.getKey();
Vector_val obj_val=e.getValue();
out.writeUTF(s);
if(obj_val instanceof String_val)
{
out.writeInt(1);
String_val obj_val_str=(String_val) obj_val;
obj_val_str.write(out);
}
if(obj_val instanceof Number_val)
{
out.writeInt(2);
Number_val obj_val_num=(Number_val) obj_val;
obj_val_num.write(out);
}
}
}
@Override
public String toString() {
return "numPoints:"+this.numPoints+","+vector.toString();
}
public static void main(String[] args) {
Vector a=new Vector();
Vector b=new Vector();
Vector c=new Vector();
//
// for(int i=0;i<100;i++)
// {
// a.add("i_"+i, i, 1);
//
// b.add("i_"+i, i, 1);
// }
for(int i=0;i<3;i++)
{
a.add("s_"+i, String.valueOf(i), 1);
b.add("s_"+i, String.valueOf(i+1), 1);
c.add("s_"+i, String.valueOf(i), 1);
}
// a.add("abc", 1110001, 1);
// b.add("abc", 991, 1);
a.add("abcd", "1", 100);
a.add("abcd", "2", 100);
a.add("abcd", "3", 100);
a.add("abcd", "4", 100);
b.add("abcd", "1", 100);
b.add("abcd", "2", 100);
b.add("abcd", "3", 100);
b.add("abcd", "3", 100);
c.add("abcd", "1", 100);
c.add("abcd", "4", 100);
c.add("abcd", "5", 100);
c.add("abcd", "6", 100);
// System.out.println(a.toString());
// System.out.println(b.toString());
// System.out.println(c.toString());
System.out.println(a.distiance(b)*100);
System.out.println(a.distiance(c)*100);
}
public void merger(Vector vector)
{
this.numPoints+=vector.numPoints;
for(Entry<String, Vector_val> e:vector.vector.entrySet())
{
String s=e.getKey();
Vector_val obj_val=e.getValue();
Vector_val obj_val_cmp=this.vector.get(s);
if(obj_val_cmp==null)
{
this.vector.put(s, obj_val.copy());
continue;
}
if(obj_val instanceof String_val&&obj_val_cmp instanceof String_val)
{
String_val obj_val_str=(String_val) obj_val;
String_val obj_val_cmp_str=(String_val) obj_val_cmp;
obj_val_cmp_str.merger(obj_val_str);
}
if(obj_val instanceof Number_val&&obj_val_cmp instanceof Number_val)
{
Number_val obj_val_num=(Number_val) obj_val;
Number_val obj_val_cmp_num=(Number_val) obj_val_cmp;
obj_val_cmp_num.merger(obj_val_num);
}
}
}
public void add(String col,String val,double weight)
{
String_val obj_val=(String_val) vector.get(col);
if(obj_val==null)
{
obj_val=new String_val();
obj_val.setWeight(weight);
vector.put(col, obj_val);
}
obj_val.add(val);
}
public void addImportand(String col,String val,double weight)
{
String_val obj_val=(String_val) vector.get(col);
if(obj_val==null)
{
obj_val=new String_val();
obj_val.setWeight(weight);
vector.put(col, obj_val);
}
obj_val.setMustVeryLike(true);
obj_val.add(val);
}
public void add(String col,double val,double weight)
{
Number_val obj_val=(Number_val) vector.get(col);
if(obj_val==null)
{
obj_val=new Number_val();
obj_val.setWeight(weight);
vector.put(col, obj_val);
}
obj_val.add(val);
}
public void addImporatnt(String col,double val,double weight,double diff)
{
Number_val obj_val=(Number_val) vector.get(col);
if(obj_val==null)
{
obj_val=new Number_val();
obj_val.setWeight(weight);
vector.put(col, obj_val);
}
obj_val.setMustVeryLike(true);
obj_val.setMustveryLikeNumber(diff);
obj_val.add(val);
}
public boolean Deny(Vector v)
{
double sumalla=0d;
double sumallb=0d;
double sumall_importanta=0d;
double sumall_importantb=0d;
for(Entry<String, Vector_val> e:this.vector.entrySet())
{
String s=e.getKey();
Vector_val obj_val=e.getValue();
Vector_val obj_val_cmp=v.vector.get(s);
if(obj_val_cmp==null)
{
continue;
}
if(obj_val instanceof String_val&&obj_val_cmp instanceof String_val)
{
String_val obj_val_str=(String_val) obj_val;
String_val obj_val_cmp_str=(String_val) obj_val_cmp;
if(obj_val_str.Deny(obj_val_cmp_str))
{
return true;
}
}
if(obj_val instanceof Number_val&&obj_val_cmp instanceof Number_val)
{
Number_val obj_val_num=(Number_val) obj_val;
Number_val obj_val_cmp_num=(Number_val) obj_val_cmp;
if(obj_val_num.isMustVeryLike()||obj_val_cmp_num.isMustVeryLike())
{
sumall_importanta+=obj_val_num.getVal();
sumall_importantb+=obj_val_cmp_num.getVal();
}else{
sumalla+=obj_val_num.getVal();
sumallb+=obj_val_cmp_num.getVal();
}
if(obj_val_num.Deny(obj_val_cmp_num))
{
return true;
}
}
}
return Math.abs(sumalla)<0.5||Math.abs(sumallb)<0.5||Math.abs(sumall_importanta)<0.01||Math.abs(sumall_importantb)<0.01;
}
public double distiance(Vector v)
{
double sum_distance=0;
for(Entry<String, Vector_val> e:this.vector.entrySet())
{
String s=e.getKey();
Vector_val obj_val=e.getValue();
Vector_val obj_val_cmp=v.vector.get(s);
if(obj_val_cmp==null)
{
continue;
}
if(obj_val instanceof String_val&&obj_val_cmp instanceof String_val)
{
String_val obj_val_str=(String_val) obj_val;
String_val obj_val_cmp_str=(String_val) obj_val_cmp;
double weight=obj_val_str.getWeight();
double distance=(weight*Math.pow(obj_val_str.distiance(obj_val_cmp_str),2));
sum_distance+=distance;
}
if(obj_val instanceof Number_val&&obj_val_cmp instanceof Number_val)
{
Number_val obj_val_num=(Number_val) obj_val;
Number_val obj_val_cmp_num=(Number_val) obj_val_cmp;
double weight=obj_val_num.getWeight();
double distance=(weight*Math.pow(obj_val_num.distiance(obj_val_cmp_num), 2));
// System.out.println("dd"+weight+","+distance);
sum_distance+=distance;
}
}
return Math.sqrt(sum_distance);
}
private static class Vector_val implements Writable
{
private double weight=1;
private boolean mustVeryLike=false;
private double mustveryLikeNumber=0;
public boolean isMustVeryLike() {
return mustVeryLike;
}
public void setMustVeryLike(boolean mustVeryLike) {
this.mustVeryLike = mustVeryLike;
}
public double getMustveryLikeNumber() {
return mustveryLikeNumber;
}
public void setMustveryLikeNumber(double mustveryLikeNumber) {
this.mustveryLikeNumber = mustveryLikeNumber;
}
public Vector_val copy()
{
Vector_val rtn=new Vector_val();
rtn.weight=this.weight;
rtn.mustVeryLike=this.mustVeryLike;
rtn.mustveryLikeNumber=this.mustveryLikeNumber;
return rtn;
}
public double getWeight() {
return weight;
}
public void setWeight(double weight) {
this.weight = weight;
}
@Override
public void readFields(DataInput arg0) throws IOException {
this.weight=arg0.readDouble();
this.mustveryLikeNumber=arg0.readDouble();
this.mustVeryLike=arg0.readBoolean();
}
@Override
public void write(DataOutput arg0) throws IOException {
arg0.writeDouble(this.weight);
arg0.writeDouble(this.mustveryLikeNumber);
arg0.writeBoolean(this.mustVeryLike);
}
@Override
public String toString() {
return "Vector_val [weight=" + weight + "]";
}
}
private static class String_val extends Vector_val implements Writable{
private final static float LOADFACTOR = 0.75f;
private final static int MAX_SIZE = 64;
Map<String,Integer> val=new LinkedHashMap<String,Integer>((int) Math.ceil(MAX_SIZE / LOADFACTOR) + 1, LOADFACTOR, true) {
private static final long serialVersionUID = 1L;
@Override
protected boolean removeEldestEntry(Map.Entry<String,Integer> eldest) {
return size() > MAX_SIZE;
}
};
public int count=0;
@Override
public void readFields(DataInput arg0) throws IOException {
super.readFields(arg0);
this.val.clear();
this.count=arg0.readInt();
int size=arg0.readInt();
for(int i=0;i<size;i++)
{
String kstr=arg0.readUTF();
Integer cnt=arg0.readInt();
this.val.put(kstr, cnt);
}
}
@Override
public void write(DataOutput arg0) throws IOException {
super.write(arg0);
arg0.writeInt(this.count);
arg0.writeInt(this.val.size());
for(Entry<String, Integer> e:this.val.entrySet())
{
arg0.writeUTF(e.getKey());
arg0.writeInt(e.getValue());
}
}
public String_val copy()
{
String_val rtn=new String_val();
rtn.merger(this);
return rtn;
}
public void merger(String_val v)
{
this.count+=v.count;
this.setWeight(v.getWeight());
this.setMustVeryLike(v.isMustVeryLike());
this.setMustveryLikeNumber(v.getMustveryLikeNumber());
for(Entry<String,Integer> e:v.val.entrySet())
{
this.addval(e.getKey(), e.getValue());
}
}
private void addval(String strval,int num)
{
Integer oldval=this.val.get(strval);
if(oldval==null)
{
oldval=0;
}
this.val.put(strval, oldval+num);
}
public void add(String strval)
{
this.addval(strval,1);
this.count+=1;
}
/**
* http://www.ruanyifeng.com/blog/2013/03/cosine_similarity.html
* @param obj_val_cmp_str
* @return
*/
public String getTopField()
{
String f=null;
int count=0;
for(Entry<String, Integer> e:this.val.entrySet())
{
if(f==null||count<=e.getValue())
{
f=e.getKey();
}
}
return String.valueOf(f);
}
public boolean Deny(String_val obj_val_cmp_str)
{
if(this.isMustVeryLike())
{
if(this.getTopField().equals(obj_val_cmp_str.getTopField()))
{
return false;
}
return true;
}
return false;
}
public double distiance(String_val obj_val_cmp_str)
{
if(this.count==0||obj_val_cmp_str.count==0)
{
return 1;
}
HashSet<String> allwords=new HashSet<String>();
allwords.addAll(this.val.keySet());
allwords.addAll(obj_val_cmp_str.val.keySet());
double sumall=0;
double powa=0;
double powb=0;
for(String words:allwords)
{
Integer tf=this.val.get(words);
if(tf==null)
{
tf=0;
}
Integer tfCmp=obj_val_cmp_str.val.get(words);
if(tfCmp==null)
{
tfCmp=0;
}
sumall+=(tf*tfCmp);
powa+=Math.pow(tf, 2);
powb+=Math.pow(tfCmp, 2);
}
if(powa==0||powb==0||sumall==0)
{
return 1;
}
double powab=Math.sqrt(powa)*Math.sqrt(powb);
double rtn= Math.abs(sumall/powab);
return 1-rtn;
}
public static class forsort{
@Override
public String toString() {
return "" + key + ":" + cnt + "";
}
String key;
int cnt;
}
@Override
public String toString() {
ArrayList<forsort> list=new ArrayList<Vector.String_val.forsort>();
for(Entry<String, Integer> e:this.val.entrySet())
{
forsort s=new forsort();
s.cnt=e.getValue();
s.key=e.getKey();
list.add(s);
}
Collections.sort(list,new Comparator<forsort>() {
@Override
public int compare(forsort o1, forsort o2) {
long t1 = o1.cnt;
long t2 = o2.cnt;
return t1 == t2 ? 0 : t1 < t2 ? 1 : -1;
}
});
ArrayList<forsort> print=new ArrayList<Vector.String_val.forsort>();
int index=0;
for(forsort s:list)
{
if(index++>5)
{
break;
}
print.add(s);
}
return "[val=" + print + ", count=" + count + "]";
}
}
private static class Number_val extends Vector_val implements Writable
{
public double val=0;
public double getVal() {
return val;
}
public int count=0;
public Number_val copy()
{
Number_val rtn=new Number_val();
rtn.merger(this);
return rtn;
}
public void merger(Number_val v)
{
this.setWeight(v.getWeight());
this.setMustVeryLike(v.isMustVeryLike());
this.setMustveryLikeNumber(v.getMustveryLikeNumber());
this.val+=v.val;
this.count+=v.count;
}
public void add(double v)
{
this.val+=v;
this.count++;
}
public double avg()
{
if(count==0)
{
return 0;
}
return this.val/this.count;
}
public boolean Deny(Number_val obj_val_cmp_str)
{
if(this.isMustVeryLike())
{
double diff=Math.abs(this.avg()-obj_val_cmp_str.avg());
if(diff<(this.getMustveryLikeNumber()*2))
{
return false;
}
return true;
}
return false;
}
public double distiance(Number_val obj_val_cmp_str)
{
//a.b / (|a|^2 + |b|^2 - a.b)
double a=this.avg();
double b=obj_val_cmp_str.avg();
if(this.val<=0.001||this.val<=0.001)
{
return 1;
}
// if(a<0.00001&&b<0.00001)
// {
// return 0.75;
// }
double ab=a*b;
double maxval=a*a+b*b-ab;
if(maxval==0)
{
return 1;
}
double rtn=Math.abs(ab/maxval);
return 1-rtn;
}
@Override
public String toString() {
return String.valueOf(this.avg());//"[val=" + val + ", count=" +this.count+" ,avg=" + + "]";
}
@Override
public void readFields(DataInput arg0) throws IOException {
this.count=arg0.readInt();
this.val=arg0.readDouble();
}
@Override
public void write(DataOutput arg0) throws IOException {
arg0.writeInt(this.count);
arg0.writeDouble(this.val);
}
}
}