/*
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
*
* Copyright (C) 2017
* ELKI Development Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.lmu.ifi.dbs.elki.evaluation.clustering;
import java.util.Collection;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation;
import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest;
import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DistanceResultAdapter;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.EvaluationResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultHierarchy;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.io.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Evaluate a clustering result by comparing it to an existing cluster label.
*
* @author Erich Schubert
* @since 0.4.0
*
* @apiviz.landmark
* @apiviz.uses ClusterContingencyTable
* @apiviz.has EvaluateClustering.ScoreResult oneway - - «create»
*/
@Alias("de.lmu.ifi.dbs.elki.evaluation.paircounting.EvaluatePairCountingFMeasure")
public class EvaluateClustering implements Evaluator {
/**
* Logger for debug output.
*/
private static final Logging LOG = Logging.getLogger(EvaluateClustering.class);
/**
* Parameter to obtain the reference clustering. Defaults to a flat label
* clustering.
*/
public static final OptionID REFERENCE_ID = new OptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering.");
/**
* Parameter flag for special noise handling.
*/
public static final OptionID NOISE_ID = new OptionID("paircounting.noisespecial", "Use special handling for noise clusters.");
/**
* Parameter flag to disable self-pairing
*/
public static final OptionID SELFPAIR_ID = new OptionID("paircounting.selfpair", "Enable self-pairing for cluster comparison.");
/**
* Reference algorithm.
*/
private ClusteringAlgorithm<?> referencealg;
/**
* Apply special handling to noise "clusters".
*/
private boolean noiseSpecialHandling;
/**
* Use self-pairing in pair-counting measures
*/
private boolean selfPairing;
/**
* Constructor.
*
* @param referencealg Reference clustering
* @param noiseSpecialHandling Noise handling flag
* @param selfPairing Self-pairing flag
*/
public EvaluateClustering(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling, boolean selfPairing) {
super();
this.referencealg = referencealg;
this.noiseSpecialHandling = noiseSpecialHandling;
this.selfPairing = selfPairing;
}
/**
* Evaluate given a cluster (of positive elements) and a scoring list.
*
* @param eval Evaluation method
* @param clus Cluster object
* @param ranking Object ranking
* @return Score
*/
public static double evaluateRanking(ScoreEvaluation eval, Cluster<?> clus, DoubleDBIDList ranking) {
return eval.evaluate(new DBIDsTest(DBIDUtil.ensureSet(clus.getIDs())), new DistanceResultAdapter(ranking.iter()));
}
@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
// We may just have added this result.
if(newResult instanceof Clustering && isReferenceResult((Clustering<?>) newResult)) {
return;
}
Database db = ResultUtil.findDatabase(hier);
List<Clustering<?>> crs = Clustering.getClusteringResults(newResult);
if(crs == null || crs.isEmpty()) {
return;
}
// Compute the reference clustering
Clustering<?> refc = null;
// Try to find an existing reference clustering (globally)
{
Collection<Clustering<?>> cs = ResultUtil.filterResults(hier, db, Clustering.class);
for(Clustering<?> test : cs) {
if(isReferenceResult(test)) {
refc = test;
break;
}
}
}
// Try to find an existing reference clustering (locally)
if(refc == null) {
Collection<Clustering<?>> cs = ResultUtil.filterResults(hier, newResult, Clustering.class);
for(Clustering<?> test : cs) {
if(isReferenceResult(test)) {
refc = test;
break;
}
}
}
if(refc == null) {
LOG.debug("Generating a new reference clustering.");
Result refres = referencealg.run(db);
List<Clustering<?>> refcrs = Clustering.getClusteringResults(refres);
if(refcrs.isEmpty()) {
LOG.warning("Reference algorithm did not return a clustering result!");
return;
}
if(refcrs.size() > 1) {
LOG.warning("Reference algorithm returned more than one result!");
}
refc = refcrs.get(0);
}
else {
LOG.debug("Using existing clustering: " + refc.getLongName() + " " + refc.getShortName());
}
for(Clustering<?> c : crs) {
if(c == refc) {
continue;
}
evaluteResult(db, c, refc);
}
}
/**
* Evaluate a clustering result.
*
* @param db Database
* @param c Clustering
* @param refc Reference clustering
*/
protected void evaluteResult(Database db, Clustering<?> c, Clustering<?> refc) {
ClusterContingencyTable contmat = new ClusterContingencyTable(selfPairing, noiseSpecialHandling);
contmat.process(refc, c);
ScoreResult sr = new ScoreResult(contmat);
sr.addHeader(c.getLongName());
db.getHierarchy().add(c, sr);
}
/**
* Test if a clustering result is a valid reference result.
*
* @param t Clustering to test.
* @return {@code true} if it is considered to be a reference result.
*/
private boolean isReferenceResult(Clustering<?> t) {
// FIXME: don't hard-code strings
if("bylabel-clustering".equals(t.getShortName())) {
return true;
}
if("bymodel-clustering".equals(t.getShortName())) {
return true;
}
if("allinone-clustering".equals(t.getShortName())) {
return true;
}
if("allinnoise-clustering".equals(t.getShortName())) {
return true;
}
return false;
}
/**
* Result object for outlier score judgements.
*
* @author Erich Schubert
*
* @apiviz.composedOf ClusterContingencyTable
*/
public static class ScoreResult extends EvaluationResult {
/**
* Cluster contingency table
*/
protected ClusterContingencyTable contmat;
/**
* Constructor.
*
* @param contmat score result
*/
public ScoreResult(ClusterContingencyTable contmat) {
super("Cluster-Evalation", "cluster-evaluation");
this.contmat = contmat;
PairCounting paircount = contmat.getPaircount();
MeasurementGroup g = newGroup("Pair counting measures");
g.addMeasure("Jaccard", paircount.jaccard(), 0, 1, false);
g.addMeasure("F1-Measure", paircount.f1Measure(), 0, 1, false);
g.addMeasure("Precision", paircount.precision(), 0, 1, false);
g.addMeasure("Recall", paircount.recall(), 0, 1, false);
g.addMeasure("Rand", paircount.randIndex(), 0, 1, false);
g.addMeasure("ARI", paircount.adjustedRandIndex(), 0, 1, false);
g.addMeasure("FowlkesMallows", paircount.fowlkesMallows(), 0, 1, false);
Entropy entropy = contmat.getEntropy();
g = newGroup("Entropy based measures");
g.addMeasure("NMI Joint", entropy.entropyNMIJoint(), 0, 1, false);
g.addMeasure("NMI Sqrt", entropy.entropyNMISqrt(), 0, 1, false);
BCubed bcubed = contmat.getBCubed();
g = newGroup("BCubed-based measures");
g.addMeasure("F1-Measure", bcubed.f1Measure(), 0, 1, false);
g.addMeasure("Recall", bcubed.recall(), 0, 1, false);
g.addMeasure("Precision", bcubed.precision(), 0, 1, false);
SetMatchingPurity setm = contmat.getSetMatching();
g = newGroup("Set-Matching-based measures");
g.addMeasure("F1-Measure", setm.f1Measure(), 0, 1, false);
g.addMeasure("Purity", setm.purity(), 0, 1, false);
g.addMeasure("Inverse Purity", setm.inversePurity(), 0, 1, false);
EditDistance edit = contmat.getEdit();
g = newGroup("Editing-distance measures");
g.addMeasure("F1-Measure", edit.f1Measure(), 0, 1, false);
g.addMeasure("Precision", edit.editDistanceFirst(), 0, 1, false);
g.addMeasure("Recall", edit.editDistanceSecond(), 0, 1, false);
MeanVariance gini = contmat.averageSymmetricGini();
g = newGroup("Gini measures");
g.addMeasure("Mean +-" + FormatUtil.NF4.format(gini.getCount() > 1. ? gini.getSampleStddev() : 0.), gini.getMean(), 0, 1, false);
}
/**
* Get the contingency table
*
* @return the contingency table
*/
public ClusterContingencyTable getContingencyTable() {
return contmat;
}
@Override
public boolean visualizeSingleton() {
return true;
}
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
protected ClusteringAlgorithm<?> referencealg = null;
protected boolean noiseSpecialHandling = false;
protected boolean selfPairing = false;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelOrAllInOneClustering.class);
if(config.grab(referencealgP)) {
referencealg = referencealgP.instantiateClass(config);
}
Flag noiseSpecialHandlingF = new Flag(NOISE_ID);
if(config.grab(noiseSpecialHandlingF)) {
noiseSpecialHandling = noiseSpecialHandlingF.getValue();
}
Flag selfPairingF = new Flag(SELFPAIR_ID);
if(config.grab(selfPairingF)) {
selfPairing = selfPairingF.getValue();
}
}
@Override
protected EvaluateClustering makeInstance() {
return new EvaluateClustering(referencealg, noiseSpecialHandling, !selfPairing);
}
}
}