/* * This file is part of ELKI: * Environment for Developing KDD-Applications Supported by Index-Structures * * Copyright (C) 2017 * ELKI Development Team * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.lmu.ifi.dbs.elki.evaluation.clustering; import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable.Util; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** * Set matching purity measures. * * References: * <p> * Zhao, Y. and Karypis, G.<br /> * Criterion functions for document clustering: Experiments and analysis<br /> * University of Minnesota, Department of Computer Science, Technical Report * 01-40, 2001 * </p> * <p> * Meilă, M<br /> * Comparing clusterings<br /> * University of Washington, Seattle, Technical Report 418, 2002 * </p> * <p> * Steinbach, M. and Karypis, G. and Kumar, V.<br /> * A comparison of document clustering techniques<br /> * KDD workshop on text mining, 2000 * </p> * <p> * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br /> * A comparison of extrinsic clustering evaluation metrics based on formal * constraints<br /> * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009 * </p> * * @author Sascha Goldhofer * @since 0.5.0 */ @Reference(authors = "Meilă, M", // title = "Comparing clusterings", // booktitle = "University of Washington, Seattle, Technical Report 418, 2002", // url = "http://www.stat.washington.edu/mmp/Papers/compare-colt.pdf") public class SetMatchingPurity { /** * Result cache */ protected double smPurity = -1.0, smInversePurity = -1.0, smFFirst = -1.0, smFSecond = -1.0; /** * Constructor. * * @param table Contingency table */ protected SetMatchingPurity(ClusterContingencyTable table) { super(); int numobj = table.contingency[table.size1][table.size2]; { smPurity = 0.0; smFFirst = 0.0; // iterate first clustering for(int i1 = 0; i1 < table.size1; i1++) { double precisionMax = 0.0; double fMax = 0.0; for(int i2 = 0; i2 < table.size2; i2++) { precisionMax = Math.max(precisionMax, (1.0 * table.contingency[i1][i2])); fMax = Math.max(fMax, (2.0 * table.contingency[i1][i2]) / (table.contingency[i1][table.size2] + table.contingency[table.size1][i2])); // / numobj)); } smPurity += (precisionMax / numobj); smFFirst += (table.contingency[i1][table.size2] / (double) table.contingency[table.size1][table.size2]) * fMax; // * contingency[i1][size2]/numobj; } } { smInversePurity = 0.0; smFSecond = 0.0; // iterate second clustering for(int i2 = 0; i2 < table.size2; i2++) { double recallMax = 0.0; double fMax = 0.0; for(int i1 = 0; i1 < table.size1; i1++) { recallMax = Math.max(recallMax, (1.0 * table.contingency[i1][i2])); fMax = Math.max(fMax, (2.0 * table.contingency[i1][i2]) / (table.contingency[i1][table.size2] + table.contingency[table.size1][i2])); // / numobj)); } smInversePurity += (recallMax / numobj); smFSecond += (table.contingency[table.size1][i2] / (double) table.contingency[table.size1][table.size2]) * fMax; // * contingency[i1][size2]/numobj; } } } /** * Get the set matchings purity (first:second clustering) (normalized, 1 = * equal) * * @return purity */ @Reference(authors = "Zhao, Y. and Karypis, G.", // title = "Criterion functions for document clustering: Experiments and analysis", // booktitle = "University of Minnesota, Department of Computer Science, Technical Report 01-40, 2001", // url = "http://www-users.cs.umn.edu/~karypis/publications/Papers/PDF/vscluster.pdf") public double purity() { return smPurity; } /** * Get the set matchings inverse purity (second:first clustering) (normalized, * 1 = equal) * * @return Inverse purity */ public double inversePurity() { return smInversePurity; } /** * Get the set matching F1-Measure * * <p> * Steinbach, M. and Karypis, G. and Kumar, V.<br /> * A comparison of document clustering techniques<br /> * KDD workshop on text mining, 2000 * </p> * * @return Set Matching F1-Measure */ @Reference(authors = "Steinbach, M. and Karypis, G. and Kumar, V.", // title = "A comparison of document clustering techniques", // booktitle = "KDD workshop on text mining, 2000", // url = "http://www-users.itlabs.umn.edu/~karypis/publications/Papers/PDF/doccluster.pdf") public double f1Measure() { return Util.f1Measure(purity(), inversePurity()); } /** * Get the Van Rijsbergen’s F measure (asymmetric) for first clustering * * <p> * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br /> * A comparison of extrinsic clustering evaluation metrics based on formal * constraints<br /> * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009 * </p> * * @return Set Matching F-Measure of first clustering */ @Reference(authors = "E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo", // title = "A comparison of extrinsic clustering evaluation metrics based on formal constraints", // booktitle = "Inf. Retrieval, vol. 12, no. 5", // url = "http://dx.doi.org/10.1007/s10791-009-9106-z") public double fMeasureFirst() { return smFFirst; } /** * Get the Van Rijsbergen’s F measure (asymmetric) for second clustering * * <p> * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br /> * A comparison of extrinsic clustering evaluation metrics based on formal * constraints<br /> * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009 * </p> * * @return Set Matching F-Measure of second clustering */ @Reference(authors = "E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo", // title = "A comparison of extrinsic clustering evaluation metrics based on formal constraints", // booktitle = "Inf. Retrieval, vol. 12, no. 5", // url = "http://dx.doi.org/10.1007/s10791-009-9106-z") public double fMeasureSecond() { return smFSecond; } }