/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.clustering.kmeans;
import java.util.List;
import java.util.Set;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingResult;
import org.carrot2.core.test.ClusteringAlgorithmTestBase;
import org.carrot2.core.test.SampleDocumentData;
import org.carrot2.core.test.assertions.Carrot2CoreAssertions;
import org.carrot2.text.clustering.MultilingualClustering.LanguageAggregationStrategy;
import org.carrot2.text.clustering.MultilingualClusteringDescriptor;
import org.junit.Test;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.shaded.guava.common.collect.Sets;
import static org.junit.Assert.*;
public class BisectingKMeansClusteringAlgorithmTest extends
ClusteringAlgorithmTestBase<BisectingKMeansClusteringAlgorithm>
{
@Override
public Class<BisectingKMeansClusteringAlgorithm> getComponentClass()
{
return BisectingKMeansClusteringAlgorithm.class;
}
@Test
public void smokeTest()
{
final List<Document> documents = Lists.newArrayList();
documents.add(new Document("WordA . WordA"));
documents.add(new Document("WordB . WordB"));
documents.add(new Document("WordC . WordC"));
documents.add(new Document("WordA . WordA"));
documents.add(new Document("WordB . WordB"));
documents.add(new Document("WordC . WordC"));
BisectingKMeansClusteringAlgorithmDescriptor.attributeBuilder(
processingAttributes).labelCount(1).partitionCount(3);
final List<Cluster> clusters = cluster(documents).getClusters();
assertNotNull(clusters);
assertEquals(3, clusters.size());
Carrot2CoreAssertions.assertThat(clusters.get(0)).hasLabel("WordA");
Carrot2CoreAssertions.assertThat(clusters.get(1)).hasLabel("WordB");
Carrot2CoreAssertions.assertThat(clusters.get(2)).hasLabel("WordC");
}
@Test
public void testMultilingualSplit() throws Exception
{
BisectingKMeansClusteringAlgorithmDescriptor.attributeBuilder(processingAttributes)
.labelCount(1).partitionCount(3);
MultilingualClusteringDescriptor.attributeBuilder(processingAttributes)
.languageAggregationStrategy(LanguageAggregationStrategy.FLATTEN_NONE);
final ProcessingResult pr = cluster(SampleDocumentData.DOCUMENTS_SALSA_MULTILINGUAL);
final List<Cluster> clusters = pr.getClusters();
final Set<String> clusterNames = Sets.newHashSet();
for (Cluster c : clusters) {
clusterNames.add(c.getLabel());
}
assertThat(clusterNames).contains("English", "Italian", "French", "Spanish", "German");
}
}