package io.monokkel.actors;
import akka.actor.Props;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import io.monokkel.actors.domain.ThrottlerConfig;
import io.monokkel.core.*;
import io.monokkel.domain.PageData;
import io.monokkel.domain.UrlVisitResponse;
import io.monokkel.exceptions.IndexDocumentException;
import io.monokkel.exceptions.ValidationError;
import io.monokkel.messages.ParserDone;
import io.monokkel.messages.SeedUrl;
import org.junit.Before;
import org.junit.Test;
import java.util.Map;
import static java.lang.Thread.sleep;
import static org.mockito.Mockito.*;
public class CrawlerActorTest extends ActorTest {
private UrlVisitor mockedUrlVisitor;
private HtmlTransformer mockedHtmlTransformer;
private Indexer mockedIndexer;
private DataCleaner mockedDataCleaner;
private ThrottlerConfig throttlerConfig;
private DataValidator mockedValidator;
private JsonTransformer mockedJsonTransformer;
@Before
public void setUp() throws Exception {
mockedUrlVisitor = mock(UrlVisitor.class);
mockedHtmlTransformer = mock(HtmlTransformer.class);
mockedIndexer = mock(Indexer.class);
mockedDataCleaner = mock(DataCleaner.class);
mockedValidator = mock(DataValidator.class);
mockedJsonTransformer = mock(JsonTransformer.class);
throttlerConfig = new ThrottlerConfig(1,2, true);
super.before();
}
@Test
public void onReceive_withOneSeedUrl_expectUrlToBeVisited() throws Exception {
String url = "http://url";
when(mockedUrlVisitor.visitUrlAndRespond(url)).thenReturn(createUrlVisitResponse(url));
subject.tell(new SeedUrl(1, url), javaTestKit.getRef());
sleep(1000L);
verify(mockedUrlVisitor, times(1)).visitUrlAndRespond(url);
}
@Test
public void onReceive_withBasicResponse_expectParserToBeCalled() throws Exception {
final String url = "http://url";
final String response = "<html></html>";
final UrlVisitResponse urlVisitResponse = createUrlVisitResponse(url, response);
when(mockedUrlVisitor.visitUrlAndRespond(url)).thenReturn(urlVisitResponse);
when(mockedHtmlTransformer.shouldParse(url, response, urlVisitResponse.getTypesFromTheResponseHeader())).thenReturn(true);
subject.tell(new SeedUrl(1, url), javaTestKit.getRef());
sleep(1000L);
verify(mockedHtmlTransformer, times(1)).parse(url, response, 1L);
}
@Test
public void onReceive_withJSONBasicResponse_expectJSONParserToBeCalled() throws Exception {
final String url = "http://url";
final String response = "{\"data\":\"dataContent\"}";
final UrlVisitResponse urlVisitResponse = createUrlVisitResponse(url, response);
when(mockedUrlVisitor.visitUrlAndRespond(url)).thenReturn(urlVisitResponse);
when(mockedJsonTransformer.shouldParse(url, response, urlVisitResponse.getTypesFromTheResponseHeader())).thenReturn(true);
subject.tell(new SeedUrl(1, url), javaTestKit.getRef());
sleep(1000L);
verify(mockedJsonTransformer, times(1)).parse(url, response, 1L);
}
@Test
public void onReceive_withJSONBasicResponseAndNotParsables_expectJSONToNotBeCalled() throws Exception {
final String url = "http://url";
final String response = "{\"data\":\"dataContent\"}";
final UrlVisitResponse urlVisitResponse = createUrlVisitResponse(url, response);
when(mockedUrlVisitor.visitUrlAndRespond(url)).thenReturn(urlVisitResponse);
when(mockedJsonTransformer.shouldParse(url, response, urlVisitResponse.getTypesFromTheResponseHeader())).thenReturn(false);
subject.tell(new SeedUrl(1, url), javaTestKit.getRef());
sleep(1000L);
verify(mockedJsonTransformer, never()).parse(url, response, 1L);
}
@Test
public void onReceive_withWithSeedUrl_expectUrlToBeVisited() throws Exception {
final String url = "http://url";
final String response = "<html></html>";
final UrlVisitResponse urlVisitResponse = createUrlVisitResponse(url, response);
when(mockedUrlVisitor.visitUrlAndRespond(url)).thenReturn(urlVisitResponse);
Map<String, String> transformedMap = Maps.newHashMap();
PageData pageData = new PageData(Sets.newHashSet(""), url, 1L, response, response, "title", transformedMap);
when(mockedHtmlTransformer.shouldParse(url,response,urlVisitResponse.getTypesFromTheResponseHeader())).thenReturn(true);
when(mockedHtmlTransformer.parse(url, response, 1L)).thenReturn(pageData);
when(mockedDataCleaner.clean(pageData)).thenReturn(pageData);
subject.tell(new SeedUrl(1, url), javaTestKit.getRef());
sleep(1000L);
verify(mockedIndexer, times(1)).indexParserOutput(pageData);
}
@Test
public void onReceive_withWithSeedUrlButFailedValidation_expectIndexerNeverToBeCalled() throws Exception {
final String url = "http://url";
final String response = "<html></html>";
when(mockedUrlVisitor.visitUrlAndRespond(url)).thenReturn(createUrlVisitResponse(url,response));
Map<String, String> transformedMap = Maps.newHashMap();
PageData pageData = new PageData(Sets.newHashSet(""), url, 1L, response, response, "title", transformedMap);
when(mockedHtmlTransformer.parse(url, response, 1L)).thenReturn(pageData);
when(mockedDataCleaner.clean(pageData)).thenReturn(pageData);
when(mockedValidator.validate(pageData)).thenThrow(new ValidationError("field", "broke", "expression"));
subject.tell(new SeedUrl(1, url), javaTestKit.getRef());
sleep(1000L);
verify(mockedIndexer, never()).indexParserOutput(pageData);
}
@Test
public void onReceive_withWithSeedUrl_expectParseDoneToBeSent() throws Exception, IndexDocumentException {
final String urlRetrievedFromHtml = "http://htmlurl";
PageData pageData = new PageData(Sets.newHashSet(urlRetrievedFromHtml), "http://seedurl5", 1L, "<html></html>", "title", "extractedContent",null);
subject.tell(new ParserDone(pageData, 0, 1), javaTestKit.getRef());
sleep(1000L);
verify(mockedUrlVisitor, times(1)).visitUrlAndRespond(urlRetrievedFromHtml);
}
private UrlVisitResponse createUrlVisitResponse(String url,String response) {
return new UrlVisitResponse(response, 200, ImmutableList.of("text/html"), 1L, url);
}
private UrlVisitResponse createUrlVisitResponse(String url) {
return createUrlVisitResponse(url,"<html><body></body></html>");
}
@Override
protected String getActorName() {
return "crawlerActor";
}
@Override
protected Props getActor() {
return CrawlerActor.props(mockedUrlVisitor, mockedIndexer, mockedDataCleaner, throttlerConfig, Lists.newArrayList(mockedHtmlTransformer, mockedJsonTransformer), mockedValidator);
}
}