/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.core; import static org.carrot2.core.test.assertions.Carrot2CoreAssertions.assertThatDocuments; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.carrot2.util.tests.CarrotTestCase; import org.fest.assertions.Assertions; import org.junit.Test; import org.carrot2.shaded.guava.common.collect.Lists; import static org.junit.Assert.*; /** * Test cases for {@link Cluster}. */ public class ClusterTest extends CarrotTestCase { @Test public void testAllDocumentsEmptyFlat() { final Cluster flatCluster = new Cluster(); assertEquals(0, flatCluster.size()); Assertions.assertThat(flatCluster.getAllDocuments()).isEmpty(); } @Test public void testAllDocumentsEmptyHierarchical() { final Cluster hierarchicalCluster = new Cluster(); final Cluster subcluster = new Cluster(); hierarchicalCluster.addSubclusters(subcluster); subcluster.addSubclusters(new Cluster()); assertEquals(0, hierarchicalCluster.size()); Assertions.assertThat(hierarchicalCluster.getAllDocuments()).isEmpty(); } @Test public void testSizeNonEmptyFlat() { final Cluster flatCluster = new Cluster(); final List<Document> documents = Lists.newArrayList(new Document(), new Document()); flatCluster.addDocuments(documents); assertEquals(2, flatCluster.size()); assertEquals(documents, flatCluster.getAllDocuments()); } @Test public void testSizeNonEmptyHierarchicalWithoutOverlap() { final Cluster hierarchicalCluster = new Cluster(); final Cluster subcluster = new Cluster(); hierarchicalCluster.addSubclusters(subcluster); final Document documentA = new Document(); hierarchicalCluster.addDocuments(documentA); final Document documentB = new Document(); subcluster.addDocuments(documentB); final List<Document> expectedAllDocuments = Lists.newArrayList(documentA, documentB); assertEquals(2, hierarchicalCluster.size()); assertEquals(expectedAllDocuments, hierarchicalCluster.getAllDocuments()); } @Test public void testSizeNonEmptyHierarchicalWithOverlap() { final Cluster hierarchicalCluster = new Cluster(); final Cluster subcluster = new Cluster(); hierarchicalCluster.addSubclusters(subcluster); final Document document1 = new Document(); hierarchicalCluster.addDocuments(document1); final Document documentB = new Document(); hierarchicalCluster.addDocuments(documentB); subcluster.addDocuments(document1); final Document documentC = new Document(); subcluster.addDocuments(documentC); final List<Document> expectedAllDocuments = Lists.newArrayList(document1, documentB, documentC); assertEquals(3, hierarchicalCluster.size()); assertEquals(expectedAllDocuments, hierarchicalCluster.getAllDocuments()); } @Test public void testByLabelComparator() { final Cluster clusterA = new Cluster(); clusterA.addPhrases("A"); final Cluster clusterB = new Cluster(); clusterB.addPhrases("b"); final Cluster clusterNull = new Cluster(); checkOrder(Lists.newArrayList(clusterNull, clusterA, clusterB), Cluster.BY_SIZE_COMPARATOR); } @Test public void testBySizeComparator() { final Cluster clusterA = new Cluster(); clusterA.addDocuments(new Document(), new Document()); final Cluster clusterB = new Cluster(); checkOrder(Lists.newArrayList(clusterB, clusterA), Cluster.BY_SIZE_COMPARATOR); } @Test public void testByReversedSizeAndLabelComparator() { final Cluster clusterA = new Cluster(); clusterA.addPhrases("A"); clusterA.addDocuments(new Document(), new Document()); final Cluster clusterB = new Cluster(); clusterB.addPhrases("B"); clusterB.addDocuments(new Document(), new Document()); final Cluster clusterC = new Cluster(); clusterC.addPhrases("C"); clusterC.addDocuments(new Document(), new Document(), new Document()); checkOrder(Lists.newArrayList(clusterC, clusterA, clusterB), Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR); } @Test public void testByReversedWeightedScoreAndSizeComparatorOnlySize() { checkOrder(createSizeAndScoreClusters(1, 2, 0), Cluster .byReversedWeightedScoreAndSizeComparator(0)); } @Test public void testByReversedWeightedScoreAndSizeComparatorOnlyScore() { checkOrder(createSizeAndScoreClusters(1, 0, 2), Cluster .byReversedWeightedScoreAndSizeComparator(1)); } private List<Cluster> createSizeAndScoreClusters(int a, int b, int c) { Cluster [] clusters = new Cluster [3]; final Cluster clusterA = new Cluster(); clusterA.addPhrases("A"); clusterA.setAttribute(Cluster.SCORE, 1.0); clusterA.addDocuments(new Document(), new Document()); clusters[a] = clusterA; final Cluster clusterB = new Cluster(); clusterB.addPhrases("B"); clusterB.setAttribute(Cluster.SCORE, 2.0); clusterB.addDocuments(new Document(), new Document()); clusters[b] = clusterB; final Cluster clusterC = new Cluster(); clusterC.addPhrases("C"); clusterC.setAttribute(Cluster.SCORE, 0.1); clusterC.addDocuments(new Document(), new Document(), new Document()); clusters[c] = clusterC; return Arrays.asList(clusters); } private void checkOrder(List<Cluster> expected, Comparator<Cluster> comparator) { List<Cluster> toSort = Lists.newArrayList(expected); Collections.sort(toSort, comparator); Assertions.assertThat(toSort).isEqualTo(expected); } @Test() public void testNoIdentifiers() { final Cluster d1 = new Cluster(); final Cluster d2 = new Cluster(); final Cluster d3 = new Cluster(); Cluster.assignClusterIds(Lists.newArrayList(d1, d2, d3)); assertThat(d1.id).isEqualTo(0); assertThat(d2.id).isEqualTo(1); assertThat(d3.id).isEqualTo(2); } @Test() public void testSubclusterIdentifiers() { final Cluster d1 = new Cluster(); final Cluster d2 = new Cluster(); final Cluster d3 = new Cluster(); final Cluster d4 = new Cluster(); d1.addSubclusters(d2); d2.addSubclusters(d4); Cluster.assignClusterIds(Lists.newArrayList(d1, d3)); assertThat(d1.id).isEqualTo(0); assertThat(d2.id).isEqualTo(1); assertThat(d4.id).isEqualTo(2); assertThat(d3.id).isEqualTo(3); } @Test(expected = IllegalArgumentException.class) public void testSomeIdentifiers() { final Cluster d1 = new Cluster(); d1.id = 2; final Cluster d2 = new Cluster(); final Cluster d3 = new Cluster(); final Cluster d4 = new Cluster(); d4.id = 5; final Cluster d5 = new Cluster(); Cluster.assignClusterIds(Lists.newArrayList(d1, d2, d3, d4, d5)); } @Test(expected = IllegalArgumentException.class) public void testNonUniqueIdentifiers() { final Cluster d1 = new Cluster(); d1.id = 0; final Cluster d2 = new Cluster(); d2.id = 0; Cluster.assignClusterIds(Lists.newArrayList(d1, d2)); } @Test(expected = IllegalArgumentException.class) public void testSingleNullIdentifier() { final Cluster d1 = new Cluster(); d1.id = 0; final Cluster d2 = new Cluster(); Cluster.assignClusterIds(Lists.newArrayList(d1, d2)); } @Test public void testFindRootCluster() { final Cluster c1 = new Cluster(); c1.id = 0; final Cluster c2 = new Cluster(); c2.id = 1; Assertions.assertThat(Cluster.find(1, Lists.newArrayList(c1, c2))).isSameAs(c2); } @Test public void testFindSubcluster() { final Cluster c1 = new Cluster(); c1.id = 0; final Cluster c2 = new Cluster(); c2.id = 1; c1.addSubclusters(c2); final Cluster c3 = new Cluster(); c3.id = 2; c2.addSubclusters(c3); Assertions.assertThat(Cluster.find(2, Lists.newArrayList(c1))).isSameAs(c3); } @Test public void testFindNotFound() { final Cluster c1 = new Cluster(); c1.id = 0; final Cluster c2 = new Cluster(); c2.id = 1; c1.addSubclusters(c2); final Cluster c3 = new Cluster(); c3.id = 2; c2.addSubclusters(c3); Assertions.assertThat(Cluster.find(3, Lists.newArrayList(c1))).isNull(); } @Test public void testBuildOtherTopicsNonAssigned() { final Document d1 = new Document(); final Document d2 = new Document(); final Document d3 = new Document(); final List<Document> allDocuments = Lists.newArrayList(d1, d2, d3); final Cluster c1 = new Cluster(); final Cluster c2 = new Cluster(); final Cluster c3 = new Cluster(); c2.addSubclusters(c3); final List<Cluster> clusters = Lists.newArrayList(c1, c2); final Cluster otherTopics = Cluster.buildOtherTopics(allDocuments, clusters); assertThatDocuments(otherTopics.getDocuments()).isEquivalentTo(allDocuments); } @Test public void testBuildOtherTopicsSomeAssigned() { final Document d1 = new Document(); final Document d2 = new Document(); final Document d3 = new Document(); final List<Document> allDocuments = Lists.newArrayList(d1, d2, d3); final Cluster c1 = new Cluster(); final Cluster c2 = new Cluster(); final Cluster c3 = new Cluster(); c2.addSubclusters(c3); c3.addDocuments(d2); final List<Cluster> clusters = Lists.newArrayList(c1, c2); final Cluster otherTopics = Cluster.buildOtherTopics(allDocuments, clusters); assertThatDocuments(otherTopics.getDocuments()).isEquivalentTo(Lists.newArrayList(d1, d3)); } @Test public void testBuildOtherTopicsAllAssigned() { final Document d1 = new Document(); final Document d2 = new Document(); final Document d3 = new Document(); final List<Document> allDocuments = Lists.newArrayList(d1, d2, d3); final Cluster c1 = new Cluster(); final Cluster c2 = new Cluster(); final Cluster c3 = new Cluster(); c2.addSubclusters(c3); c3.addDocuments(d2); c1.addDocuments(d1); c2.addDocuments(d3); final List<Cluster> clusters = Lists.newArrayList(c1, c2); final Cluster otherTopics = Cluster.buildOtherTopics(allDocuments, clusters); assertThatDocuments(otherTopics.getDocuments()).isEquivalentTo( Lists.<Document> newArrayList()); } }