package focusedCrawler.crawler.async;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import focusedCrawler.crawler.crawlercommons.fetcher.AbortedFetchException;
import focusedCrawler.crawler.crawlercommons.fetcher.FetchedResult;
import focusedCrawler.link.frontier.LinkRelevance;
import focusedCrawler.target.model.Page;
import focusedCrawler.target.model.ParsedData;
import focusedCrawler.util.parser.PaginaURL;
import focusedCrawler.util.storage.Storage;
public class FetchedResultHandler implements HttpDownloader.Callback {
private static final Logger logger = LoggerFactory.getLogger(FetchedResultHandler.class);
private Storage targetStorage;
public FetchedResultHandler(Storage targetStorage) {
this.targetStorage = targetStorage;
}
@Override
public void completed(LinkRelevance link, FetchedResult response) {
int statusCode = response.getStatusCode();
if(statusCode >= 200 && statusCode < 300) {
processData(link, response);
}
//else {
// TODO: Update metadata about page visits in link storage
//}
}
@Override
public void failed(LinkRelevance link, Exception e) {
if(e instanceof AbortedFetchException) {
AbortedFetchException afe = (AbortedFetchException) e;
logger.info("Download aborted: \n>URL: {}\n>Reason: {}", link.getURL().toString(), afe.getAbortReason());
} else {
logger.info("Failed to download URL: {}\n>Reason: {}", link.getURL().toString(), e.getMessage());
}
}
private void processData(LinkRelevance link, FetchedResult response) {
try {
Page page = new Page(response);
// TODO Check whether page is HTML before trying to parse HTML
PaginaURL pageParser = new PaginaURL(page);
page.setParsedData(new ParsedData(pageParser));
page.setLinkRelevance(link);
targetStorage.insert(page);
} catch (Exception e) {
logger.error("Problem while processing data.", e);
}
}
}