/* $Id: IThrottledConnection.java 988245 2010-08-23 18:39:35Z kwright $ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.connectors.webcrawler; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.agents.interfaces.*; import org.apache.manifoldcf.crawler.interfaces.*; import java.io.*; import java.util.*; /** This interface represents an established connection to a URL. */ public interface IThrottledConnection { public static final String _rcsid = "@(#)$Id: IThrottledConnection.java 988245 2010-08-23 18:39:35Z kwright $"; // Issue codes. public static final int FETCH_NOT_TRIED = -1; public static final int FETCH_CIRCULAR_REDIRECT = -100; public static final int FETCH_BAD_URI = -101; public static final int FETCH_SEQUENCE_ERROR = -102; public static final int FETCH_IO_ERROR = -103; public static final int FETCH_INTERRUPTED = -104; public static final int FETCH_UNKNOWN_ERROR = -999; /** Set the abort checker. This must be done before the connection is actually used. */ public void setAbortChecker(AbortChecker abortCheck); /** Check whether the connection has expired. *@param currentTime is the current time to use to judge if a connection has expired. *@return true if the connection has expired, and should be closed. */ public boolean hasExpired(long currentTime); /** Begin the fetch process. * @param fetchType is a short descriptive string describing the kind of fetch being requested. This * is used solely for logging purposes. */ public void beginFetch(String fetchType) throws ManifoldCFException, ServiceInterruption; /** Execute the fetch and get the return code. This method uses the * standard logging mechanism to keep track of the fetch attempt. It also * signals the following conditions: ServiceInterruption (if a dynamic * error occurs), or ManifoldCFException if a fatal error occurs, or nothing if * a standard protocol error occurs. * Note that, for proxies etc, the idea is for this fetch request to handle whatever * redirections are needed to support proxies. * @param urlPath is the path part of the url, e.g. "/robots.txt" * @param userAgent is the value of the userAgent header to use. * @param from is the value of the from header to use. * @param redirectOK should be set to true if you want redirects to be automatically followed. * @param host is the value to use as the "Host" header, or null to use the default. * @param formData describes additional form arguments and how to fetch the page. * @param loginCookies describes the cookies that should be in effect for this page fetch. */ public void executeFetch(String urlPath, String userAgent, String from, boolean redirectOK, String host, FormData formData, LoginCookies loginCookies) throws ManifoldCFException, ServiceInterruption; /** Get the http response code. *@return the response code. This is either an HTTP response code, or one of the codes above. */ public int getResponseCode() throws ManifoldCFException, ServiceInterruption; /** Get the last fetch cookies. *@return the cookies now in effect from the last fetch. */ public LoginCookies getLastFetchCookies() throws ManifoldCFException, ServiceInterruption; /** Get response headers *@return a map keyed by header name containing a list of values. */ public Map<String,List<String>> getResponseHeaders() throws ManifoldCFException, ServiceInterruption; /** Get a specified response header, if it exists. *@param headerName is the name of the header. *@return the header value, or null if it doesn't exist. */ public String getResponseHeader(String headerName) throws ManifoldCFException, ServiceInterruption; /** Get the response input stream. It is the responsibility of the caller * to close this stream when done. */ public InputStream getResponseBodyStream() throws ManifoldCFException, ServiceInterruption; /** Get limited response as a string. */ public String getLimitedResponseBody(int maxSize, String encoding) throws ManifoldCFException, ServiceInterruption; /** Note that the connection fetch was interrupted by something. */ public void noteInterrupted(Throwable e); /** Done with the fetch. Call this when the fetch has been completed. A log entry will be generated * describing what was done. */ public void doneFetch(IProcessActivity activities) throws ManifoldCFException; /** Close the connection. Call this to return the connection to * its pool. */ public void close(); /** Destroy the connection. Call this to close the connection. */ public void destroy(); }