package focusedCrawler.crawler.async.fetcher;
import java.net.MalformedURLException;
import java.net.URL;
import focusedCrawler.crawler.crawlercommons.fetcher.BaseFetchException;
import focusedCrawler.crawler.crawlercommons.fetcher.BaseFetcher;
import focusedCrawler.crawler.crawlercommons.fetcher.FetchedResult;
import focusedCrawler.crawler.crawlercommons.fetcher.Payload;
import focusedCrawler.crawler.crawlercommons.fetcher.http.SimpleHttpFetcher;
/**
* This class downloads .onion links through a TOR proxy. This proxy should be
* set up independently and then, the configuration details should be provided
* to ACHE via the config file.
*
* @author aeciosantos
*
*/
@SuppressWarnings("serial")
public class TorProxyFetcher extends BaseFetcher {
private final SimpleHttpFetcher torFetcher;
private final SimpleHttpFetcher httpFetcher;
public TorProxyFetcher(SimpleHttpFetcher torFetcher, SimpleHttpFetcher httpFetcher) {
this.torFetcher = torFetcher;
this.httpFetcher = httpFetcher;
}
/**
* Downloads RL using a TOR proxy, when it is a onion URL, or using
* regular HTTP fetcher otherwise.
*/
@Override
public FetchedResult get(String url, Payload payload) throws BaseFetchException {
URL realUrl;
try {
realUrl = new URL(url);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Invalide URL provided: "+url);
}
String host = realUrl.getHost();
String domain = host.substring(host.lastIndexOf('.')+1);
if(domain.equals("onion")) {
return torFetcher.get(url);
} else{
return httpFetcher.get(url);
}
}
@Override
public void abort() {
httpFetcher.abort();
torFetcher.abort();
}
}