package focusedCrawler.config; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; import static org.junit.Assert.assertThat; import java.io.IOException; import org.junit.After; import org.junit.Before; import org.junit.Test; import focusedCrawler.crawler.async.AsyncCrawlerConfig; import focusedCrawler.link.LinkStorageConfig; import focusedCrawler.target.TargetStorageConfig; public class ConfigServiceTest { String configFilePath = ConfigServiceTest.class.getResource("ache.yml").getPath(); @Before public void setUp() throws Exception { } @After public void tearDown() throws Exception { } @Test public void shouldReadTargeStorageConfig() throws IOException { // given ConfigService configService = new ConfigService(configFilePath); // when TargetStorageConfig config = configService.getTargetStorageConfig(); // then assertThat(config, is(notNullValue())); assertThat(config.isUseClassifier(), is(false)); assertThat(config.isSaveNegativePages(), is(false)); assertThat(config.getVisitedPageLimit(), is(12345)); assertThat(config.isEnglishLanguageDetectionEnabled(), is(false)); assertThat(config.isHardFocus(), is(false)); assertThat(config.isBipartite(), is(true)); assertThat(config.getDataFormat(), is("ELASTICSEARCH")); assertThat(config.getElasticSearchConfig(), is(notNullValue())); assertThat(config.getElasticSearchConfig().getHost(), is("elasticsearch.localhost")); assertThat(config.getElasticSearchConfig().getPort(), is(9999)); assertThat(config.getElasticSearchConfig().getClusterName(), is("elasticsearch-test")); assertThat(config.getStorageServerConfig(), is(notNullValue())); assertThat(config.getStorageServerConfig().getHost(), is("targetstorage.localhost")); assertThat(config.getStorageServerConfig().getPort(), is(19876)); } @Test public void shouldReadLinkStorageConfig() throws IOException { // given ConfigService configService = new ConfigService(configFilePath); // when LinkStorageConfig config = configService.getLinkStorageConfig(); // then assertThat(config, is(notNullValue())); assertThat(config.getMaxPagesPerDomain(), is(222)); assertThat(config.getOutlinks(), is(false)); assertThat(config.getBacklinks(), is(true)); assertThat(config.isUseScope(), is(true)); assertThat(config.getTypeOfClassifier(), is("LinkClassifierImpl")); // TODO: add parameters for link classifier assertThat(config.isUseOnlineLearning(), is(false)); assertThat(config.getOnlineMethod(), is("FORWARD_CLASSIFIER_BINARY")); assertThat(config.getLearningLimit(), is(555)); assertThat(config.getLinkSelector(), is("TopkLinkSelector")); assertThat(config.getMaxCacheUrlsSize(), is(222222)); assertThat(config.getStorageServerConfig(), is(notNullValue())); assertThat(config.getStorageServerConfig().getHost(), is("linkstorage.localhost")); assertThat(config.getStorageServerConfig().getPort(), is(19888)); assertThat(config.getSchedulerHostMinAccessInterval(), is(123)); assertThat(config.getSchedulerMaxLinks(), is(234)); } @Test public void shouldReadCrawlerConfig() throws IOException { // given ConfigService configService = new ConfigService(configFilePath); // when AsyncCrawlerConfig config = configService.getCrawlerConfig(); // then assertThat(config, is(notNullValue())); assertThat(config.getDownloaderConfig().getDownloadThreadPoolSize(), is(333)); assertThat(config.getDownloaderConfig().getMaxRetryCount(), is(444)); assertThat(config.getDownloaderConfig().getUserAgentName(), is("TestAgent")); assertThat(config.getDownloaderConfig().getUserAgentUrl(), is("http://www.test-agent-crawler-example.com/robot")); assertThat(config.getDownloaderConfig().getValidMimeTypes()[0], is("test/mimetype")); } }