package focusedCrawler.crawler.async;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.Matchers.notNullValue;
import static org.junit.Assert.assertThat;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.tika.metadata.Metadata;
import org.junit.Test;
import focusedCrawler.crawler.crawlercommons.fetcher.FetchedResult;
import focusedCrawler.link.frontier.LinkRelevance;
import focusedCrawler.util.CommunicationException;
import focusedCrawler.util.storage.StorageDefault;
import focusedCrawler.util.storage.StorageException;
public class SitemapXmlHandlerTest {
static class LinkStorageMock extends StorageDefault {
public SitemapXmlHandler.SitemapData sitemapData = null;
@Override
public synchronized Object insert(Object obj) throws StorageException, CommunicationException {
if(obj instanceof SitemapXmlHandler.SitemapData) {
this.sitemapData = (SitemapXmlHandler.SitemapData) obj;
}
return null;
}
};
@Test
public void shouldParseLinksFromSitemapXml() throws Exception {
// given
LinkStorageMock linkStorageMock = new LinkStorageMock();
SitemapXmlHandler handler = new SitemapXmlHandler(linkStorageMock);
String url = "http://www.example.com/sitemap.xml";
Path sitemapFilePath = Paths.get(SitemapXmlHandler.class.getResource("sample-sitemap.xml").toURI());
byte[] sitemapContent = Files.readAllBytes(sitemapFilePath);
FetchedResult response = new FetchedResult(url, url, 1, new Metadata(), sitemapContent, "text/xml", 1, null, url, 0, "127.0.0.1", 200, "OK");
LinkRelevance link = new LinkRelevance(new URL(url), 1, LinkRelevance.Type.SITEMAP);
// when
handler.completed(link , response);
// then
assertThat(linkStorageMock.sitemapData, is(notNullValue()));
assertThat(linkStorageMock.sitemapData.sitemaps.size(), is(0));
assertThat(linkStorageMock.sitemapData.links.size(), is(4));
}
@Test
public void shouldParseChildSitemapsFromSitemapIndexes() throws Exception {
// given
LinkStorageMock linkStorageMock = new LinkStorageMock();
SitemapXmlHandler handler = new SitemapXmlHandler(linkStorageMock);
String url = "http://www.example.com/sitemap.xml";
Path sitemapFilePath = Paths.get(SitemapXmlHandler.class.getResource("sitemap-index.xml").toURI());
byte[] sitemapContent = Files.readAllBytes(sitemapFilePath);
FetchedResult response = new FetchedResult(url, url, 1, new Metadata(), sitemapContent, "text/xml", 1, null, url, 0, "127.0.0.1", 200, "OK");
LinkRelevance link = new LinkRelevance(new URL(url), 1, LinkRelevance.Type.SITEMAP);
// when
handler.completed(link , response);
// then
assertThat(linkStorageMock.sitemapData, is(notNullValue()));
assertThat(linkStorageMock.sitemapData.sitemaps.size(), is(3));
assertThat(linkStorageMock.sitemapData.links.size(), is(0));
}
}