ClusterContingencyTableTest.java example

Explorer
elki-master
/*
 * This file is part of ELKI:
 * Environment for Developing KDD-Applications Supported by Index-Structures
 *
 * Copyright (C) 2017
 * ELKI Development Team
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package de.lmu.ifi.dbs.elki.evaluation.paircounting;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllInOne;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllNoise;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;

/**
 * Validate {@link ClusterContingencyTable} with respect to its ability to compare
 * data clusterings.
 *
 * @author Erich Schubert
 * @since 0.2
 */
public class ClusterContingencyTableTest {
  // the following values depend on the data set used!
  String dataset = "elki/testdata/unittests/hierarchical-3d2d1d.csv";

  // size of the data set
  int shoulds = 600;

  /**
   * Validate {@link ClusterContingencyTable} with respect to its ability to
   * compare data clusterings.
   *
   * @throws ParameterException on errors.
   */
  @Test
  public void testCompareDatabases() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);

    // verify data set size.
    Relation<?> rel = db.getRelation(TypeUtil.ANY);
    assertEquals("Data set size not as expected", shoulds, rel.size());

    // run all-in-one
    TrivialAllInOne allinone = new TrivialAllInOne();
    Clustering<Model> rai = allinone.run(db);

    // run all-in-noise
    TrivialAllNoise allinnoise = new TrivialAllNoise();
    Clustering<Model> ran = allinnoise.run(db);

    // run by-label
    ByLabelClustering bylabel = new ByLabelClustering();
    Clustering<?> rbl = bylabel.run(db);

    assertEquals(1.0, computeFMeasure(rai, rai, false), Double.MIN_VALUE);
    assertEquals(1.0, computeFMeasure(ran, ran, false), Double.MIN_VALUE);
    assertEquals(1.0, computeFMeasure(rbl, rbl, false), Double.MIN_VALUE);

    assertEquals(0.009950248756218905, computeFMeasure(ran, rbl, true), Double.MIN_VALUE);
    assertEquals(0.0033277870216306157, computeFMeasure(rai, ran, true), Double.MIN_VALUE);

    assertEquals(0.5 /* 0.3834296724470135 */, computeFMeasure(rai, rbl, false), Double.MIN_VALUE);
  }

  private double computeFMeasure(Clustering<?> c1, Clustering<?> c2, boolean noise) {
    ClusterContingencyTable ct = new ClusterContingencyTable(true, noise);
    ct.process(c1, c2);
    return ct.getPaircount().f1Measure();
  }
}