package focusedCrawler.crawler.async;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.junit.Assert.assertThat;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.Before;
import org.junit.Test;
import com.sun.net.httpserver.HttpServer;
import focusedCrawler.crawler.crawlercommons.fetcher.FetchedResult;
import focusedCrawler.link.frontier.LinkRelevance;
public class HttpDownloaderTest {
private HttpDownloader downloader;
@Before
public void setUp() {
this.downloader = new HttpDownloader();
}
@Test
public void shouldFollowRedirections() throws Exception {
// given
HttpServer httpServer = new TestWebServerBuilder()
.withRedirect("/index.html", "/new/location.html")
.with200OK("/new/location.html", "Hello world!")
.start();
String originalUrl = TestWebServerBuilder.address+"/index.html";
String expectedRedirectedUrl = TestWebServerBuilder.address+"/new/location.html";
// when
FetchedResult result = downloader.dipatchDownload(originalUrl).get();
// then
assertThat(result.getNumRedirects(), is(1));
assertThat(result.getBaseUrl(), is(originalUrl));
assertThat(result.getFetchedUrl(), is(expectedRedirectedUrl));
assertThat(result.getNewBaseUrl(), is(expectedRedirectedUrl));
assertThat(result.getStatusCode(), is(200));
assertThat(result.getReasonPhrase(), is("OK"));
assertThat(result.getContentType(), is("text/html; charset=utf-8"));
assertThat(result.getContent(), is("Hello world!".getBytes()));
httpServer.stop(0);
}
@Test
public void shouldDownloadPageContentAndMetadata() throws Exception {
// given
String responseContent = "Hello world!";
String originalUrl = TestWebServerBuilder.address+"/index.html";
HttpServer httpServer = new TestWebServerBuilder()
.with200OK("/index.html", responseContent)
.start();
// when
FetchedResult result = downloader.dipatchDownload(originalUrl).get();
// then
assertThat(result.getNumRedirects(), is(0));
assertThat(result.getBaseUrl(), is(originalUrl));
assertThat(result.getFetchedUrl(), is(originalUrl));
assertThat(result.getNewBaseUrl(), is(nullValue()));
assertThat(result.getStatusCode(), is(200));
assertThat(result.getReasonPhrase(), is("OK"));
assertThat(result.getContentType(), is("text/html; charset=utf-8"));
assertThat(result.getContent(), is(responseContent.getBytes()));
httpServer.stop(0);
}
@Test
public void shouldDownloadMultipleUrlsInParallel() throws Exception {
// given
String originalUrl = TestWebServerBuilder.address+"/index.html";
HttpServer httpServer = new TestWebServerBuilder()
.with200OK("/index.html", "Hello world!")
.start();
// when
List<Future<FetchedResult>> results = new ArrayList<>();
for (int i = 0; i < 100; i++) {
Future<FetchedResult> futureResult = downloader.dipatchDownload(originalUrl);
results.add(futureResult);
}
// then
for (Future<FetchedResult> future : results) {
assertThat(future.get().getStatusCode(), is(200));
}
httpServer.stop(0);
}
@Test
public void shouldCallCompletedCallbackAfterDownloadFinishes() throws Exception {
// given
String originalUrl = TestWebServerBuilder.address+"/index.html";
HttpServer httpServer = new TestWebServerBuilder()
.with200OK("/index.html", "Hello world!")
.start();
final int numberOfRequests = 5;
final AtomicInteger requestsFinished = new AtomicInteger(0);
// when
for (int i = 0; i < numberOfRequests; i++) {
downloader.dipatchDownload(new URL(originalUrl), new HttpDownloader.Callback() {
@Override
public void failed(LinkRelevance link, Exception e) {
}
@Override
public void completed(LinkRelevance link, FetchedResult result) {
// increment counter when download finishes
requestsFinished.incrementAndGet();
}
});
}
while(downloader.hasPendingDownloads()) {
// wait until all downloads are finished
Thread.sleep(5);
}
// then
assertThat(requestsFinished.get(), is(numberOfRequests));
httpServer.stop(0);
}
}