/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package focusedCrawler.crawler.crawlercommons.fetcher.http; import focusedCrawler.crawler.crawlercommons.fetcher.BaseFetcher; @SuppressWarnings("serial") public abstract class BaseHttpFetcher extends BaseFetcher { public enum RedirectMode { FOLLOW_ALL, // Fetcher will try to follow all redirects FOLLOW_TEMP, // Temp redirects are automatically followed, but not // permanent. FOLLOW_NONE // No redirects are followed. } public static final int NO_MIN_RESPONSE_RATE = Integer.MIN_VALUE; public static final int NO_REDIRECTS = 0; public static final int DEFAULT_MIN_RESPONSE_RATE = NO_MIN_RESPONSE_RATE; public static final int DEFAULT_MAX_CONNECTIONS_PER_HOST = 2; public static final int DEFAULT_MAX_REDIRECTS = 20; public static final String DEFAULT_ACCEPT_LANGUAGE = "en-us,en-gb,en;q=0.7,*;q=0.3"; public static final RedirectMode DEFAULT_REDIRECT_MODE = RedirectMode.FOLLOW_ALL; protected int _maxThreads; protected UserAgent _userAgent; protected String _userAgentString; protected int _maxRedirects = DEFAULT_MAX_REDIRECTS; protected int _maxConnectionsPerHost = DEFAULT_MAX_CONNECTIONS_PER_HOST; protected int _minResponseRate = DEFAULT_MIN_RESPONSE_RATE; protected String _acceptLanguage = DEFAULT_ACCEPT_LANGUAGE; protected RedirectMode _redirectMode = DEFAULT_REDIRECT_MODE; public BaseHttpFetcher(int maxThreads, UserAgent userAgent) { super(); _maxThreads = maxThreads; _userAgent = userAgent; _userAgentString = userAgent.getUserAgentString(); } public int getMaxThreads() { return _maxThreads; } public UserAgent getUserAgent() { return _userAgent; } public void setMaxConnectionsPerHost(int maxConnectionsPerHost) { _maxConnectionsPerHost = maxConnectionsPerHost; } public int getMaxConnectionsPerHost() { return _maxConnectionsPerHost; } public void setMinResponseRate(int minResponseRate) { _minResponseRate = minResponseRate; } /** * Return the minimum response rate. If the speed at which bytes are being * returned from the server drops below this, the fetch of that page will be * aborted. * * @return bytes/second */ public int getMinResponseRate() { return _minResponseRate; } public void setAcceptLanguage(String acceptLanguage) { _acceptLanguage = acceptLanguage; } public String getAcceptLanguage() { return _acceptLanguage; } public void setMaxRedirects(int maxRedirects) { _maxRedirects = maxRedirects; } public int getMaxRedirects() { return _maxRedirects; } public void setRedirectMode(RedirectMode mode) { _redirectMode = mode; } public RedirectMode getRedirectMode() { return _redirectMode; } }