package focusedCrawler.crawler.async.fetcher;
import java.net.MalformedURLException;
import java.net.URL;
import focusedCrawler.crawler.async.HttpDownloaderConfig;
import focusedCrawler.crawler.crawlercommons.fetcher.BaseFetcher;
import focusedCrawler.crawler.crawlercommons.fetcher.http.SimpleHttpFetcher;
import focusedCrawler.crawler.crawlercommons.fetcher.http.UserAgent;
public class FetcherFactory {
public static BaseFetcher createFetcher(HttpDownloaderConfig config) {
if(config.getTorProxy() != null) {
return createTorProxyFetcher(config);
} else {
return createSimpleHttpFetcher(config);
}
}
public static SimpleHttpFetcher createSimpleHttpFetcher(HttpDownloaderConfig config){
UserAgent userAgent = new UserAgent(config.getUserAgentName(), "", config.getUserAgentUrl());
int connectionPoolSize = config.getConnectionPoolSize();
SimpleHttpFetcher httpFetcher = new SimpleHttpFetcher(connectionPoolSize, userAgent);
// timeout for inactivity between two consecutive data packets
httpFetcher.setSocketTimeout(30*1000);
// timeout for establishing a new connection
httpFetcher.setConnectionTimeout(30*1000);
// timeout for requesting a connection from httpclient's connection manager
httpFetcher.setConnectionRequestTimeout(1*60*1000);
httpFetcher.setMaxConnectionsPerHost(1);
httpFetcher.setMaxRetryCount(config.getMaxRetryCount());
httpFetcher.setDefaultMaxContentSize(10*1024*1024);
if(config.getValidMimeTypes() != null) {
for (String mimeTypes : config.getValidMimeTypes()) {
httpFetcher.addValidMimeType(mimeTypes);
}
}
return httpFetcher;
}
public static TorProxyFetcher createTorProxyFetcher(HttpDownloaderConfig config) {
SimpleHttpFetcher httpFetcher = FetcherFactory.createSimpleHttpFetcher(config);
// TOR fetcher is just a simple HTTP fetcher through a proxy and different parameters
SimpleHttpFetcher torFetcher = FetcherFactory.createSimpleHttpFetcher(config);
URL torProxy;
try {
torProxy = new URL(config.getTorProxy());
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Invalid URL provide for TOR proxy: "+config.getTorProxy());
}
torFetcher.setProxy(torProxy.getProtocol(), torProxy.getHost(), torProxy.getPort());
torFetcher.setSocketTimeout(1000*1000);
return new TorProxyFetcher(torFetcher, httpFetcher);
}
}