package focusedCrawler.target; import static java.util.Arrays.asList; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.Matchers.lessThan; import static org.junit.Assert.assertThat; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import focusedCrawler.target.classifier.TargetRelevance; import focusedCrawler.target.model.Page; import focusedCrawler.target.model.TargetModelJson; import focusedCrawler.target.repository.FilesTargetRepository; import focusedCrawler.target.repository.FilesTargetRepository.RepositoryIterator; public class FilesTargetRepositoryTest { @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); static String html; static String url; static Map<String, List<String>> responseHeaders; @BeforeClass static public void setUp() { url = "http://example.com"; html = "<html><body>Hello World! Hello World! Hello World!</body></html>"; responseHeaders = new HashMap<>(); responseHeaders.put("content-type", asList("text/html")); } @Test public void shouldStoreAndIterageOverData() throws IOException { // given String folder = tempFolder.newFolder().toString(); Page target = new Page(new URL(url), html, responseHeaders); target.setTargetRelevance(TargetRelevance.RELEVANT); FilesTargetRepository repository = new FilesTargetRepository(folder); // when repository.insert(target); repository.close(); // then RepositoryIterator it = repository.iterator(); assertThat(it.hasNext(), is(true)); TargetModelJson page = it.next(); assertThat(page.getContentAsString(), is(html)); assertThat(page.getUrl(), is(url)); assertThat(page.getResponseHeaders().get("content-type").get(0), is("text/html")); assertThat(page.getRelevance().isRelevant(), is(TargetRelevance.RELEVANT.isRelevant())); assertThat(page.getRelevance().getRelevance(), is(TargetRelevance.RELEVANT.getRelevance())); } @Test public void shoudNotCreateFilesLargerThanMaximumSize() throws IOException { // given String folder = tempFolder.newFolder().toString(); String url1 = "http://a.com"; String url2 = "http://b.com"; Page target1 = new Page(new URL(url1), html); Page target2 = new Page(new URL(url2), html); long maxFileSize = 250; FilesTargetRepository repository = new FilesTargetRepository(folder, maxFileSize); // when repository.insert(target1); repository.insert(target2); repository.close(); Iterator<TargetModelJson> it = repository.iterator(); // then TargetModelJson page; assertThat(it.hasNext(), is(true)); page = it.next(); assertThat(page, is(notNullValue())); assertThat(page.getContentAsString(), is(html)); assertThat(it.hasNext(), is(true)); page = it.next(); assertThat(page, is(notNullValue())); assertThat(page.getContentAsString(), is(html)); assertThat(it.hasNext(), is(false)); assertThat(it.next(), is(nullValue())); assertThat(it.hasNext(), is(false)); assertThat(it.next(), is(nullValue())); File[] files = new File(folder).listFiles(); assertThat(files.length, is(2)); assertThat(files[0].length(), is(lessThan(maxFileSize))); assertThat(files[1].length(), is(lessThan(maxFileSize))); } @Test public void sholdIterateOverEmptyFolder() throws IOException { // given String folder = tempFolder.newFolder().toString(); FilesTargetRepository repository = new FilesTargetRepository(folder); // when Iterator<TargetModelJson> it = repository.iterator(); // then assertThat(it.hasNext(), is(false)); assertThat(it.next(), is(nullValue())); } }