package focusedCrawler.link.frontier.selector;
import static java.util.Arrays.asList;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import java.util.ArrayList;
import java.util.List;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import focusedCrawler.link.frontier.LinkRelevance;
import focusedCrawler.link.frontier.selector.MaximizeWebsitesLinkSelector;
public class MaximizeWebsitesLinkSelectorTest {
@Rule
public TemporaryFolder tempFolder = new TemporaryFolder();
@Test
public void shouldSelectLinksOfEachDomain() throws Exception {
// given
MaximizeWebsitesLinkSelector selector = new MaximizeWebsitesLinkSelector();
List<LinkRelevance> frontier = new ArrayList<>(asList(
new LinkRelevance("http://example1.com/1", 1d),
new LinkRelevance("http://example1.com/2", 2d),
new LinkRelevance("http://example2.com/1", 3d),
new LinkRelevance("http://example2.com/2", 4d)
));
// when
selector.startSelection(2);
for(LinkRelevance link : frontier) selector.evaluateLink(link);
List<LinkRelevance> selectedLinks = selector.getSelectedLinks();
// then
assertThat(selectedLinks.size(), is(2));
assertThat(selectedLinks.get(0).getRelevance(), is(4d));
assertThat(selectedLinks.get(1).getRelevance(), is(2d));
// given
frontier.removeAll(selectedLinks);
// when
selector.startSelection(2);
for(LinkRelevance link : frontier) selector.evaluateLink(link);
selectedLinks = selector.getSelectedLinks();
// then
assertThat(selectedLinks.size(), is(2));
assertThat(selectedLinks.get(0).getRelevance(), is(3d));
assertThat(selectedLinks.get(1).getRelevance(), is(1d));
// given
frontier.removeAll(selectedLinks);
// when
selector.startSelection(2);
selectedLinks = selector.getSelectedLinks();
// then
assertThat(selectedLinks.size(), is(0));
}
@Test
public void shouldSelectTopkLinksOfHigherRelevance() throws Exception {
// given
MaximizeWebsitesLinkSelector selector = new MaximizeWebsitesLinkSelector();
List<LinkRelevance> frontier = asList(
new LinkRelevance("http://example1.com/1", 1),
new LinkRelevance("http://example1.com/2", 2),
new LinkRelevance("http://example1.com/3", 3),
new LinkRelevance("http://example2.com/1", 1),
new LinkRelevance("http://example2.com/2", 2),
new LinkRelevance("http://example2.com/3", 3),
new LinkRelevance("http://example3.com/1", 1),
new LinkRelevance("http://example3.com/2", 2),
new LinkRelevance("http://example3.com/3", 3)
);
// when
selector.startSelection(15);
for(LinkRelevance link : frontier) selector.evaluateLink(link);
List<LinkRelevance> selectedLinks = selector.getSelectedLinks();
// then
assertThat(selectedLinks.size(), is(9));
assertThat(selectedLinks.get(0).getRelevance(), is(3d));
assertThat(selectedLinks.get(1).getRelevance(), is(3d));
assertThat(selectedLinks.get(2).getRelevance(), is(3d));
assertThat(selectedLinks.get(3).getRelevance(), is(2d));
assertThat(selectedLinks.get(4).getRelevance(), is(2d));
assertThat(selectedLinks.get(5).getRelevance(), is(2d));
assertThat(selectedLinks.get(6).getRelevance(), is(1d));
assertThat(selectedLinks.get(7).getRelevance(), is(1d));
assertThat(selectedLinks.get(8).getRelevance(), is(1d));
}
}