package com.yahoo.labs.samoa.evaluation.measures; /* * #%L * SAMOA * %% * Copyright (C) 2010 RWTH Aachen University, Germany * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.util.ArrayList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.yahoo.labs.samoa.moa.cluster.Clustering; import com.yahoo.labs.samoa.moa.core.DataPoint; import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection; import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix; public class EntropyCollection extends MeasureCollection{ private static final Logger logger = LoggerFactory.getLogger(EntropyCollection.class); @Override protected String[] getNames() { return new String[]{"GT cross entropy","FC cross entropy","Homogeneity","Completeness","V-Measure","VarInformation"}; } @Override protected boolean[] getDefaultEnabled() { return new boolean[]{false, false, false, false, false, false}; } @Override public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points) throws Exception { MembershipMatrix mm = new MembershipMatrix(fclustering, points); int numClasses = mm.getNumClasses(); int numCluster = fclustering.size()+1; int n = mm.getTotalEntries(); double FCentropy = 0; if(numCluster > 1){ for (int fc = 0; fc < numCluster; fc++){ double weight = mm.getClusterSum(fc)/(double)n; if(weight > 0) FCentropy+= weight * Math.log10(weight); } FCentropy/=(-1*Math.log10(numCluster)); } logger.debug("FC entropy: {}", FCentropy); double GTentropy = 0; if(numClasses > 1){ for (int hc = 0; hc < numClasses; hc++){ double weight = mm.getClassSum(hc)/(double)n; if(weight > 0) GTentropy+= weight * Math.log10(weight); } GTentropy/=(-1*Math.log10(numClasses)); } logger.debug("GT entropy: {}", GTentropy); //cluster based entropy double FCcrossEntropy = 0; for (int fc = 0; fc < numCluster; fc++){ double e = 0; int clusterWeight = mm.getClusterSum(fc); if(clusterWeight>0){ for (int hc = 0; hc < numClasses; hc++) { double p = mm.getClusterClassWeight(fc, hc)/(double)clusterWeight; if(p!=0){ e+=p * Math.log10(p); } } FCcrossEntropy+=((clusterWeight/(double)n) * e); } } if(numCluster > 1){ FCcrossEntropy/=-1*Math.log10(numCluster); } addValue("FC cross entropy", 1-FCcrossEntropy); logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy); //class based entropy double GTcrossEntropy = 0; for (int hc = 0; hc < numClasses; hc++){ double e = 0; int classWeight = mm.getClassSum(hc); if(classWeight>0){ for (int fc = 0; fc < numCluster; fc++) { double p = mm.getClusterClassWeight(fc, hc)/(double)classWeight; if(p!=0){ e+=p * Math.log10(p); } } } GTcrossEntropy+=((classWeight/(double)n) * e); } if(numClasses > 1) GTcrossEntropy/=-1*Math.log10(numClasses); addValue("GT cross entropy", 1-GTcrossEntropy); logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy); double homogeneity; if(FCentropy == 0) homogeneity = 1; else homogeneity = 1 - FCcrossEntropy/FCentropy; //TODO set err values for now, needs to be debugged if(homogeneity > 1 || homogeneity < 0) addValue("Homogeneity",-1); else addValue("Homogeneity",homogeneity); double completeness; if(GTentropy == 0) completeness = 1; else completeness = 1 - GTcrossEntropy/GTentropy; addValue("Completeness",completeness); double beta = 1; double vmeasure = (1+ beta)*homogeneity*completeness/(beta *homogeneity+completeness); if(vmeasure > 1 || homogeneity < 0) addValue("V-Measure",-1); else addValue("V-Measure",vmeasure); double mutual = 0; for (int i = 0; i < numCluster; i++){ for (int j = 0; j < numClasses; j++) { if(mm.getClusterClassWeight(i, j)==0) continue; double m = Math.log10(mm.getClusterClassWeight(i, j)/(double)mm.getClusterSum(i)/(double)mm.getClassSum(j)*(double)n); m*= mm.getClusterClassWeight(i, j)/(double)n; logger.debug("( {} / {}): ",m, m); mutual+=m; } } if(numClasses > 1) mutual/=Math.log10(numClasses); double varInfo = 1; if(FCentropy + GTentropy > 0) varInfo = 2*mutual/(FCentropy + GTentropy); logger.debug("mutual: {} / VI: {}", mutual, varInfo); addValue("VarInformation", varInfo); } }