/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.layers.quality; import java.util.Collection; import java.util.HashMap; import java.util.List; import org.apache.commons.lang.mutable.MutableDouble; import at.tuwien.ifs.somtoolbox.apps.viewer.GeneralUnitPNode; import at.tuwien.ifs.somtoolbox.data.SOMLibClassInformation; import at.tuwien.ifs.somtoolbox.input.SOMLibFileFormatException; import at.tuwien.ifs.somtoolbox.visualization.clustering.ClusterNode; public class EntropyAndPurityCalculator { private double entropy; private double purity; // FIXME implement normalised mutual information // http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-clustering-1.html public EntropyAndPurityCalculator(List<ClusterNode> clusters, SOMLibClassInformation classInfo) { // for all clusters int clusterCounter = 0; double[] halfWeightedEntropyValues = new double[clusters.size()]; double[] halfWeightedPurityValues = new double[clusters.size()]; int vectorCounter = 0; for (ClusterNode clusterNode : clusters) { int numberOfInputs = clusterNode.getNumberOfInputs(); vectorCounter += numberOfInputs; LabelledCounterMapToBePutInCOMMONS lcm = new LabelledCounterMapToBePutInCOMMONS(classInfo.classNames()); // for all nodes in cluster GeneralUnitPNode[] nodes = clusterNode.getNodes(); for (GeneralUnitPNode node : nodes) { String inputNames[] = node.getUnit().getMappedInputNames(); if (inputNames != null) { // for all inputs mapped onto node for (String inputName : inputNames) { String className = null; try { className = classInfo.getClassName(inputName); } catch (SOMLibFileFormatException e) { e.printStackTrace(); } lcm.increment(className); } } } // System.out.println("In cluster " + clusterCounter + ": " + lcm.getCounter()); double entropySum = 0; double purity = Double.MIN_VALUE; for (String key : lcm.keySet()) { // System.out.println("\t" + key + ": " + lcm.get(key) + " / " + lcm.getCounter()); double classProbability = lcm.get(key).doubleValue() / lcm.getCounter(); double entropy = entropy(classProbability); // System.out.println("\tent: " + entropy); entropySum += entropy; if (classProbability > purity) { purity = classProbability; } } // System.out.println("Cluster entropy: " + entropySum); // System.out.println("Cluster purity: " + purity); double halfWeightedEntropy = entropySum * lcm.getCounter(); double halfWeightedPurity = purity * lcm.getCounter(); // System.out.println("Weighted cluster entropy (" + lcm.getCounter() + "): " + halfWeightedEntropy); // System.out.println("Weighted cluster purity (" + lcm.getCounter() + "): " + halfWeightedPurity); // counting clusters halfWeightedEntropyValues[clusterCounter] = halfWeightedEntropy; halfWeightedPurityValues[clusterCounter] = halfWeightedPurity; clusterCounter++; } // finally, we build the some over the normalised values (that is divided by the toal number of vectors) double finalEntValue = 0d; double finalPurityValue = 0d; for (int i = 0; i < clusterCounter; i++) { double normalisedEntropy = halfWeightedEntropyValues[i] / vectorCounter; double normalisedPurity = halfWeightedPurityValues[i] / vectorCounter; // System.out.println("norm: " + normalisedEntropy); finalEntValue += normalisedEntropy; finalPurityValue += normalisedPurity; } // System.out.println("sum over entropy norms: " + finalEntValue); // System.out.println("sum over purity norms: " + finalPurityValue); entropy = finalEntValue; purity = finalPurityValue; } // FIXME see to it that these are used from cm imports // FIXME see to it that these are used from cm imports // FIXME see to it that these are used from cm imports public static double entropy(double value) { double val = -getLog2(value) * value; val = !Double.isInfinite(val) ? val : 0d; return !Double.isNaN(val) ? val : 0d; } // FIXME see to it that these are used from cm imports // FIXME see to it that these are used from cm imports // FIXME see to it that these are used from cm imports public static double getLog2(double value) { return Math.log(value) / Math.log(2.0); } public double getEntropy() { return entropy; } public double getPurity() { return purity; } public class LabelledCounterMapToBePutInCOMMONS { // FIXME use generics for aa vaere alfa private HashMap<String, MutableDouble> map; private double counter; /** here we count the sum of all entries */ public double getCounter() { return counter; } public LabelledCounterMapToBePutInCOMMONS() { map = new HashMap<String, MutableDouble>(); counter = 0; } public LabelledCounterMapToBePutInCOMMONS(String[] labels) { this(); for (String label : labels) { map.put(label, new MutableDouble(0)); } } public int size() { return map.size(); } @Override public String toString() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < map.keySet().size(); i++) { sb.append(map.keySet().toArray()[i] + " " + map.values().toArray()[i] + "\n"); } return sb.toString(); } /** increment the counter for the given key */ public void increment(String key) { this.map.get(key).increment(); counter++; } /** increment the counter for the given key this one checks for existence */ public void incrementOrAdd(String key) { MutableDouble v = this.map.get(key); if (v == null) { this.map.put(key, new MutableDouble(1)); } else { increment(key); } counter++; } public Collection<MutableDouble> entrySet() { return this.map.values(); } public Collection<String> keySet() { return this.map.keySet(); } public MutableDouble get(String key) { return this.map.get(key); } } }