/* * This file is part of ELKI: * Environment for Developing KDD-Applications Supported by Index-Structures * * Copyright (C) 2017 * ELKI Development Team * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.lmu.ifi.dbs.elki.evaluation.clustering; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import net.jafama.FastMath; /** * Entropy based measures. * * References: * <p> * Meilă, M.<br /> * Comparing clusterings by the variation of information<br /> * Learning theory and kernel machines * </p> * * @author Sascha Goldhofer * @since 0.5.0 */ @Reference(authors = "Meilă, M.", // title = "Comparing clusterings by the variation of information", // booktitle = "Learning theory and kernel machines", // url = "http://dx.doi.org/10.1007/978-3-540-45167-9_14") public class Entropy { /** * Entropy in first */ protected double entropyFirst = -1.0; /** * Entropy in second */ protected double entropySecond = -1.0; /** * Joint entropy */ protected double entropyJoint = -1.0; /** * Constructor. * * @param table Contingency table */ protected Entropy(ClusterContingencyTable table) { super(); double norm = 1.0 / table.contingency[table.size1][table.size2]; { entropyFirst = 0.0; // iterate over first clustering for(int i1 = 0; i1 < table.size1; i1++) { if(table.contingency[i1][table.size2] > 0) { double probability = norm * table.contingency[i1][table.size2]; entropyFirst -= probability * FastMath.log(probability); } } } { entropySecond = 0.0; // iterate over first clustering for(int i2 = 0; i2 < table.size2; i2++) { if(table.contingency[table.size1][i2] > 0) { double probability = norm * table.contingency[table.size1][i2]; entropySecond -= probability * FastMath.log(probability); } } } { entropyJoint = 0.0; for(int i1 = 0; i1 < table.size1; i1++) { for(int i2 = 0; i2 < table.size2; i2++) { if(table.contingency[i1][i2] > 0) { double probability = norm * table.contingency[i1][i2]; entropyJoint -= probability * FastMath.log(probability); } } } } } /** * Get the entropy of the first clustering using Log_2. (not normalized, 0 = * equal) * * @return Entropy of first clustering */ public double entropyFirst() { return entropyFirst; } /** * Get the entropy of the second clustering using Log_2. (not normalized, 0 = * equal) * * @return Entropy of second clustering */ public double entropySecond() { return entropySecond; } /** * Get the joint entropy of both clusterings (not normalized, 0 = equal) * * @return Joint entropy of both clusterings */ public double entropyJoint() { return entropyJoint; } /** * Get the conditional entropy of the first clustering. (not normalized, 0 = * equal) * * @return Conditional entropy of first clustering */ public double entropyConditionalFirst() { return (entropyJoint() - entropySecond()); } /** * Get the conditional entropy of the first clustering. (not normalized, 0 = * equal) * * @return Conditional entropy of second clustering */ public double entropyConditionalSecond() { return (entropyJoint() - entropyFirst()); } /** * Get Powers entropy (normalized, 0 = equal) Powers = 1 - NMI_Sum * * @return Powers */ public double entropyPowers() { return (2 * entropyJoint() / (entropyFirst() + entropySecond()) - 1); } /** * Get the mutual information (not normalized, 0 = equal) * * @return Mutual information */ public double entropyMutualInformation() { return (entropyFirst() + entropySecond() - entropyJoint()); } /** * Get the joint-normalized mutual information (normalized, 0 = unequal) * * @return Joint Normalized Mutual information */ public double entropyNMIJoint() { if(entropyJoint() == 0) { return 0; } return (entropyMutualInformation() / entropyJoint()); } /** * Get the min-normalized mutual information (normalized, 0 = unequal) * * @return Min Normalized Mutual information */ public double entropyNMIMin() { return (entropyMutualInformation() / Math.min(entropyFirst(), entropySecond())); } /** * Get the max-normalized mutual information (normalized, 0 = unequal) * * @return Max Normalized Mutual information */ public double entropyNMIMax() { return (entropyMutualInformation() / Math.max(entropyFirst(), entropySecond())); } /** * Get the sum-normalized mutual information (normalized, 0 = unequal) * * @return Sum Normalized Mutual information */ public double entropyNMISum() { return (2 * entropyMutualInformation() / (entropyFirst() + entropySecond())); } /** * Get the sqrt-normalized mutual information (normalized, 0 = unequal) * * @return Sqrt Normalized Mutual information */ public double entropyNMISqrt() { if(entropyFirst() * entropySecond() <= 0) { return entropyMutualInformation(); } return (entropyMutualInformation() / FastMath.sqrt(entropyFirst() * entropySecond())); } /** * Get the variation of information (not normalized, 0 = equal) * * @return Variation of information */ public double variationOfInformation() { return (2 * entropyJoint() - (entropyFirst() + entropySecond())); } /** * Get the normalized variation of information (normalized, 0 = equal) NVI = 1 * - NMI_Joint * * <p> * Nguyen, X. V. and Epps, J. and Bailey, J.<br /> * Information theoretic measures for clusterings comparison: is a correction * for chance necessary?<br /> * In: Proc. ICML '09 Proceedings of the 26th Annual International Conference * on Machine Learning * </p> * * @return Normalized Variation of information */ @Reference(authors = "Nguyen, X. V. and Epps, J. and Bailey, J.", // title = "Information theoretic measures for clusterings comparison: is a correction for chance necessary?", // booktitle = "Proc. ICML '09 Proceedings of the 26th Annual International Conference on Machine Learning", // url = "http://dx.doi.org/10.1145/1553374.1553511") public double normalizedVariationOfInformation() { return (1.0 - (entropyMutualInformation() / entropyJoint())); } }