package com.constellio.app.modules.es.connectors.http.fetcher; import java.io.IOException; import java.net.MalformedURLException; import java.security.NoSuchAlgorithmException; import java.util.concurrent.Callable; import java.util.logging.Logger; import com.constellio.app.modules.es.connectors.http.fetcher.config.FetcherConfig; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.Page; import com.gargoylesoftware.htmlunit.WebClient; public class FetchTask implements Callable<FetchedDoc> { private static final Logger LOG = Logger.getLogger(FetchTask.class.getName()); private final FetchTaskCompletedHandler handler; private final WebClient webClient = new WebClient(); private final FetcherConfig config; private final String url; FetchTask(FetcherConfig config, String url, FetchTaskCompletedHandler handler) { this.config = config; initWebClient(config); this.url = url; this.handler = handler; } private void initWebClient(FetcherConfig config) { // FIXME TEMPORARY webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setTimeout(config.getRequestTimeout()); webClient.getOptions().setThrowExceptionOnScriptError(false); } @Override public FetchedDoc call() throws Exception { //Return empty doc in worse case FetchedDoc fetchedDoc = new FetchedDoc() { }; try { Page fetchedPage = this.webClient.getPage(url); LOG.finer("Successfully fetched: " + url); fetchedDoc = new FetchedHttpDoc(config, url, fetchedPage); } catch (FailingHttpStatusCodeException e) { LOG.info("Http error " + e.getStatusCode() + " for: " + url); fetchedDoc = new FailedFetchedDoc(url, e); } catch (MalformedURLException e) { LOG.warning("Malformed url : " + this.url); fetchedDoc = new FailedFetchedDoc(url, e); } catch (IOException e) { fetchedDoc = new FailedFetchedDoc(url, e); } catch (NoSuchAlgorithmException e) { LOG.severe("Invalid Algorith : " + e.getMessage()); } finally { if (this.handler != null) { this.handler.taskCompleted(fetchedDoc); } } return fetchedDoc; } }