/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.clustering.lingo;
import java.util.Collection;
import java.util.List;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.test.ClusteringAlgorithmTestBase;
import org.carrot2.core.test.SampleDocumentData;
import org.carrot2.text.preprocessing.CaseNormalizer;
import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test;
import org.carrot2.shaded.guava.common.collect.ImmutableList;
import org.carrot2.shaded.guava.common.collect.Lists;
import static org.junit.Assert.*;
/**
* Test cases for the {@link LingoClusteringAlgorithm}
*/
public class LingoClusteringAlgorithmTest extends
ClusteringAlgorithmTestBase<LingoClusteringAlgorithm>
{
@Override
public Class<LingoClusteringAlgorithm> getComponentClass()
{
return LingoClusteringAlgorithm.class;
}
@Test
public void testNoRequiredDocuments()
{
try
{
getSimpleController(initAttributes).process(
processingAttributes, getComponentClass());
fail("Should fail with an exception.");
}
catch (ProcessingException e)
{
assertThat(e.getMessage()).contains("No value for required attribute");
}
}
@Test
public void testClusteringWithDfThreshold()
{
processingAttributes.put(AttributeUtils.getKey(CaseNormalizer.class,
"dfThreshold"), 20);
final Collection<Cluster> clustersWithThreshold = cluster(
SampleDocumentData.DOCUMENTS_DATA_MINING).getClusters();
// Clustering with df threshold must not fail
assertThat(clustersWithThreshold.size()).isGreaterThan(0);
}
@Test
public void testNoLabelCandidates()
{
final List<Document> documents = Lists.newArrayList();
documents.add(new Document("test"));
documents.add(new Document("test"));
documents.add(new Document("test"));
processingAttributes.put(AttributeNames.QUERY, "test");
final List<Cluster> clusters = cluster(documents).getClusters();
assertNotNull(clusters);
assertEquals(1, clusters.size());
assertThat(clusters.get(0).size()).isEqualTo(documents.size());
}
@Test
public void testStemmingUsedWithDefaultAttributes()
{
final List<Document> documents = ImmutableList.of(new Document("program"),
new Document("programs"), new Document("programming"),
new Document("program"), new Document("programs"),
new Document("programming"), new Document("other"));
final List<Cluster> clusters = cluster(documents).getClusters();
assertThat(clusters).hasSize(2);
assertThat(clusters.get(0).getLabel().toLowerCase()).startsWith("program");
}
}