/******************************************************
* Web crawler
*
*
* Copyright (C) 2012 by Peter Hedenskog (http://peterhedenskog.com)
*
******************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*******************************************************
*/
package com.soulgalore.crawler.core;
import java.util.Map;
import java.util.concurrent.Callable;
/**
* A callable that fetch a HTTP response code and return response to the caller.
*
*/
public class HTMLPageResponseCallable implements Callable<HTMLPageResponse> {
private final HTMLPageResponseFetcher fetcher;
private final CrawlerURL url;
private final boolean fetchPage;
private final boolean followRedirectsToNewDomain;
private final Map<String, String> requestHeaders;
/**
* Create a new callable.
*
* @param theUrl the url to call.
* @param theFetcher the fetcher to use
* @param fetchTheBody if true, the response body is fetched, else not.
* @param theRequestHeaders request headers to add
* @param followRedirectsToNewDomain if true, follow redirects that lead to a different domain.
*/
public HTMLPageResponseCallable(CrawlerURL theUrl, HTMLPageResponseFetcher theFetcher,
boolean fetchTheBody, Map<String, String> theRequestHeaders, boolean followRedirectsToNewDomain) {
url = theUrl;
fetcher = theFetcher;
fetchPage = fetchTheBody;
requestHeaders = theRequestHeaders;
this.followRedirectsToNewDomain = followRedirectsToNewDomain;
}
/**
* Fetch the actual response.
*
* @return the response
* @throws InterruptedException if it takes longer time than the configured max time to fetch the
* response
*/
public HTMLPageResponse call() throws InterruptedException {
return fetcher.get(url, fetchPage, requestHeaders, followRedirectsToNewDomain);
}
@Override
public String toString() {
// TODO add request headers
return this.getClass().getSimpleName() + " url:" + url;
}
}