/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.clustering.synthetic;
import static org.carrot2.core.test.assertions.Carrot2CoreAssertions.*;
import java.util.*;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.test.ClusteringAlgorithmTestBase;
import org.junit.Test;
import org.carrot2.shaded.guava.common.collect.Lists;
import static org.junit.Assert.*;
/**
* Test cases for the {@link ByUrlClusteringAlgorithm}.
*/
public class ByUrlClusteringAlgorithmTest extends
ClusteringAlgorithmTestBase<ByUrlClusteringAlgorithm>
{
@Override
public Class<ByUrlClusteringAlgorithm> getComponentClass()
{
return ByUrlClusteringAlgorithm.class;
}
@Test
public void testUrlParsing()
{
final Collection<Document> docs = DocumentWithUrlsFactory.INSTANCE
.generate(new String []
{
"cos.pl", "http://cos.pl/cos", "cos.pl/cos", "http://", null
});
final ByUrlClusteringAlgorithm instance = new ByUrlClusteringAlgorithm();
final String [][] actualUrlParts = instance.buildUrlParts(docs
.toArray(new Document [docs.size()]));
final String [][] expectedUrlParts = new String [] []
{
{
"pl", "cos"
},
{
"pl", "cos"
},
{
"pl", "cos"
}, null, null
};
assertArrayEquals("Url parts equality", expectedUrlParts, actualUrlParts);
}
@Test
public void testOneUrl()
{
final List<Document> docs = DocumentWithUrlsFactory.INSTANCE
.generate(new String []
{
"cos.pl", "http://cos.pl/cos", "cos.pl/cos"
});
final List<Cluster> expectedFacets = Lists.newArrayList(new Cluster("cos.pl",
docs.get(0), docs.get(1), docs.get(2)));
final ArrayList<Cluster> actual = Lists.newArrayList(cluster(docs).getClusters());
assertThatClusters(actual).isEquivalentTo(expectedFacets);
}
@Test
public void testStopPartsStripping()
{
final List<Document> docs = DocumentWithUrlsFactory.INSTANCE
.generate(new String []
{
"www.cos.pl", "http://cos.pl/cos", "cos.pl/cos"
});
final List<Cluster> expectedFacets = Lists.newArrayList(new Cluster("cos.pl",
docs.get(0), docs.get(1), docs.get(2)));
assertThatClusters(cluster(docs).getClusters()).isEquivalentTo(expectedFacets);
}
@Test
public void testOneUrlWithTwoSuburls()
{
final List<Document> docs = DocumentWithUrlsFactory.INSTANCE
.generate(new String []
{
"mail.cos.pl", "http://cos.pl/cos", "cos.pl/cos", "mail.cos.pl"
});
final List<Cluster> expectedFacets = Lists.newArrayList();
final Cluster facet11 = new Cluster("mail.cos.pl", docs.get(0), docs.get(3));
final Cluster facet12 = new Cluster("Other Sites", docs.get(1), docs.get(2))
.setOtherTopics(true);
final Cluster facet1 = new Cluster("cos.pl").addSubclusters(facet11, facet12);
expectedFacets.add(facet1);
assertThatClusters(cluster(docs).getClusters()).isEquivalentTo(expectedFacets);
}
@Test
public void testSorting()
{
final List<Document> docs = DocumentWithUrlsFactory.INSTANCE
.generate(new String []
{
"cos.pl", "http://cos.pl/cos", "cos.com/cos", "cos.com", "cos.pl"
});
final List<Cluster> expectedFacets = Lists.newArrayList(new Cluster("cos.pl",
docs.get(0), docs.get(1), docs.get(4)), new Cluster("cos.com", docs.get(2),
docs.get(3)));
assertThatClusters(cluster(docs).getClusters()).isEquivalentTo(expectedFacets);
}
}