/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.output.metrics; import org.carrot2.core.Cluster; import org.junit.Test; import org.carrot2.shaded.guava.common.collect.Lists; /** * Test cases for {@link IClusteringMetric}. */ public class ContaminationMetricTest extends IdealPartitioningBasedMetricTest { @Test public void testWorstCaseH() { assertThat(ContaminationMetric.calculateWorstCaseH(0, 1)).isEqualTo(0); assertThat(ContaminationMetric.calculateWorstCaseH(1, 1)).isEqualTo(0); assertThat(ContaminationMetric.calculateWorstCaseH(2, 1)).isEqualTo(0); assertThat(ContaminationMetric.calculateWorstCaseH(2, 2)).isEqualTo(1); assertThat(ContaminationMetric.calculateWorstCaseH(8, 4)).isEqualTo(24); assertThat(ContaminationMetric.calculateWorstCaseH(6, 4)).isEqualTo(13); } @Test public void testEmptyCluster() { check(new Cluster(), null); } @Test public void testTrivialCluster() { check(new Cluster("test", documentWithPartitions("test")), 0.0); } @Test public void testPureCluster() { check(pureCluster(), 0.0); } @Test public void testPartiallyContaminatedCluster() { check(partiallyContaminatedCluster(), 0.75); } @Test public void testFullyContaminatedCluster() { check(fullyContaminatedCluster(), 1.0); } @Test public void testHardClustersWithOverlappingPartitions() { // Second cluster is fully contaminated even though it perfectly matches // second partition. This is because the partition itself is "contaminated" // by sharing one document with the first partition. check(hardClustersWithOverlappingPartitions(), 0.0, 1.0); } @Test public void testHardPartitionsOverlappingClusters() { check(overlappingClustersWithHardPartitions(), 1.0, 0.0); } @Test public void testOverlappingPartitionsOverlappingClusters() { // Again, clusters are penalized because partitions themselves are // "contaminated", see comment above. check(overlappingClustersWithOverlappingPartitions(), 0.75, 1.0); } @Test public void testAllDocumentsInOtherTopics() { final Cluster otherTopics = clusterWithPartitions("t1", "t2", "t3"); otherTopics.setOtherTopics(true); check(otherTopics, null); } @Test public void testIdealClustering() { check(idealClusters(), 0.0, 0.0); } private void check(Cluster cluster, Double expectedContamination) { check(new Cluster [] { cluster }, expectedContamination); } private void check(Cluster [] clusters, Double... expectedContaminations) { final ContaminationMetric metric = new ContaminationMetric(); metric.documents = getAllDocuments(clusters); metric.clusters = Lists.newArrayList(clusters); metric.calculate(); for (int i = 0; i < clusters.length; i++) { assertThat( clusters[i].<Object> getAttribute(ContaminationMetric.CONTAMINATION)) .isEqualTo(expectedContaminations[i]); } } @Override protected String [] getClusterMetricKeys() { return new String [] { ContaminationMetric.CONTAMINATION }; } }