/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.resolver.http;
// Java 2 standard packages
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Enumeration;
import javax.activation.MimeType;
import javax.activation.MimeTypeParameterList;
import javax.activation.MimeTypeParseException;
import org.apache.commons.httpclient.ConnectionPoolTimeoutException;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpConnection;
import org.apache.commons.httpclient.HttpConnectionManager;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.SimpleHttpConnectionManager;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.protocol.Protocol;
import org.apache.log4j.Logger;
import org.mulgara.content.Content;
import org.mulgara.content.NotModifiedException;
/**
* Wrapper around a {@link URL}to make it satisfy the {@link Content}
* interface.
*
* @created 2004-09-23
* @author Mark Ludlow
* @version $Revision: 1.8 $
* @modified $Date: 2005/01/05 04:58:45 $
* @maintenanceAuthor $Author: newmana $
* @company <a href="mailto:info@tucanatech.com">Tucana Technology </a>
* @copyright © 2004 <a href="http://www.tucanatech.com/">Tucana Technology
* Inc </a>
* @licence <a href=" {@docRoot}/../../LICENCE">Mozilla Public License v1.1
* </a>
*/
public class HttpContent implements Content {
/** Logger. */
private final static Logger logger = Logger.getLogger(HttpContent.class.getName());
/** The URI version of the URL */
private URI httpUri;
/** The MIME type of this data */
private MimeType contentType = null;
/** Connection host <code>host</code> */
private String host;
/** <code>port</code> to make connection to */
private int port;
/** Schema for connection <code>schema</code> */
private String schema;
/** A container for HTTP attributes that may persist from request to request */
private HttpState state = new HttpState();
/** Http connection */
private HttpConnection connection = null;
/** Http connection manager. For setting up and cleaning after connections. */
HttpConnectionManager connectionManager = new SimpleHttpConnectionManager();
/** To obtain the http headers only */
private static final int HEAD = 1;
/** To obtain the response body */
private static final int GET = 2;
/** Max. number of redirects */
private static final int MAX_NO_REDIRECTS = 10;
public HttpContent(URI uri) throws URISyntaxException, MalformedURLException {
this(uri.toURL());
}
/**
* Constructor.
*
* @param url The URL this object will be representing
* the content of
*/
public HttpContent(URL url) throws URISyntaxException {
// Validate "url" parameter
if (url == null) throw new IllegalArgumentException("Null \"url\" parameter");
initialiseSettings(url);
}
/**
* Initialise the basic settings for a connection
*
* @param url location of source
* @throws URISyntaxException invalid URI
*/
private void initialiseSettings(URL url) throws URISyntaxException {
// Convert the URL to a Uri
httpUri = new URI(url.toExternalForm());
// obtain basic details for connections
host = httpUri.getHost();
port = httpUri.getPort();
schema = httpUri.getScheme();
}
/**
* Obtain the approrpriate connection method
*
* @param methodType can be HEAD or GET
* @return HttpMethodBase method
*/
private HttpMethod getConnectionMethod(int methodType) {
if (methodType != GET && methodType != HEAD) {
throw new IllegalArgumentException("Invalid method base supplied for connection");
}
HostConfiguration config = new HostConfiguration();
config.setHost(host, port, Protocol.getProtocol(schema));
if (connection != null) {
connection.releaseConnection();
connection.close();
connection = null;
}
try {
connection = connectionManager.getConnectionWithTimeout(config, 0L);
} catch (ConnectionPoolTimeoutException te) {
// NOOP: SimpleHttpConnectionManager does not use timeouts
}
String proxyHost = System.getProperty("mulgara.httpcontent.proxyHost");
if (proxyHost != null && proxyHost.length() > 0) {
connection.setProxyHost(proxyHost);
}
String proxyPort = System.getProperty("mulgara.httpcontent.proxyPort");
if (proxyPort != null && proxyPort.length() > 0) {
connection.setProxyPort(Integer.parseInt(proxyPort));
}
// default timeout to 30 seconds
connection.getParams().setConnectionTimeout(Integer.parseInt(System.getProperty(
"mulgara.httpcontent.timeout", "30000")));
String proxyUserName = System.getProperty("mulgara.httpcontent.proxyUserName");
if (proxyUserName != null) {
state.setCredentials(
new AuthScope(
System.getProperty("mulgara.httpcontent.proxyRealmHost"), AuthScope.ANY_PORT,
System.getProperty("mulgara.httpcontent.proxyRealm"), AuthScope.ANY_SCHEME
),
new UsernamePasswordCredentials(proxyUserName, System.getProperty("mulgara.httpcontent.proxyPassword"))
);
}
HttpMethod method = null;
if (methodType == HEAD) {
method = new HeadMethod(httpUri.toString());
} else {
method = new GetMethod(httpUri.toString());
}
// manually follow redirects due to the
// strictness of http client implementation
method.setFollowRedirects(false);
return method;
}
/**
* Obtain a valid connection and follow redirects if necessary.
*
* @param methodType request the headders (HEAD) or body (GET)
* @return valid connection method. Can be null.
* @throws NotModifiedException if the content validates against the cache
* @throws IOException if there's difficulty communicating with the web site
*/
private HttpMethod establishConnection(int methodType) throws IOException, NotModifiedException {
if (logger.isDebugEnabled()) logger.debug("Establishing connection");
HttpMethod method = getConnectionMethod(methodType);
assert method != null;
Header header = null;
/*
// Add cache validation headers to the request
if (lastModifiedMap.containsKey(httpUri)) {
String lastModified = (String) lastModifiedMap.get(httpUri);
assert lastModified != null;
method.addRequestHeader("If-Modified-Since", lastModified);
}
if (eTagMap.containsKey(httpUri)) {
String eTag = (String) eTagMap.get(httpUri);
assert eTag != null;
method.addRequestHeader("If-None-Match", eTag);
}
*/
// Make the request
if (logger.isDebugEnabled()) logger.debug("Executing HTTP request");
connection.open();
method.execute(state, connection);
if (logger.isDebugEnabled()) {
logger.debug("Executed HTTP request, response code " + method.getStatusCode());
}
// Interpret the response header
if (method.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
// cache has been validated
throw new NotModifiedException(httpUri);
} else if (!isValidStatusCode(method.getStatusCode())) {
throw new UnknownHostException("Unable to obtain connection to "
+ httpUri + ". Returned status code " + method.getStatusCode());
} else {
// has a redirection been issued
int numberOfRedirection = 0;
while (isRedirected(method.getStatusCode()) && numberOfRedirection <= MAX_NO_REDIRECTS) {
// release the existing connection
method.releaseConnection();
//attempt to follow the redirects
numberOfRedirection++;
// obtain the new location
header = method.getResponseHeader("location");
if (header != null) {
try {
initialiseSettings(new URL(header.getValue()));
if (logger.isInfoEnabled()) {
logger.info("Redirecting to " + header.getValue());
}
// attempt a new connection to this location
method = getConnectionMethod(methodType);
connection.open();
method.execute(state, connection);
if (!isValidStatusCode(method.getStatusCode())) {
throw new UnknownHostException(
"Unable to obtain connection to " + " the redirected site "
+ httpUri + ". Returned status code "
+ method.getStatusCode());
}
} catch (URISyntaxException ex) {
throw new IOException("Unable to follow redirection to "
+ header.getValue() + " Not a valid URI");
}
} else {
throw new IOException("Unable to obtain redirecting detaild from " + httpUri);
}
}
}
// Update metadata about the cached document
Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
if (lastModifiedHeader != null) {
logger.debug(lastModifiedHeader.toString());
assert lastModifiedHeader.getElements().length >= 1;
assert lastModifiedHeader.getElements()[0].getName() != null;
assert lastModifiedHeader.getElements()[0].getName() instanceof String;
// previous code: added to cache
}
Header eTagHeader = method.getResponseHeader("Etag");
if (eTagHeader != null) {
logger.debug(eTagHeader.toString());
assert eTagHeader.getElements().length >= 1;
assert eTagHeader.getElements()[0].getName() != null;
assert eTagHeader.getElements()[0].getName() instanceof String;
// previous code: added to cache
}
return method;
}
/**
* {@inheritDoc}
*
* This particular implementation tries to read the content type directly
* from the HTTP <code>Content-Type</code> header.
*/
public MimeType getContentType() throws NotModifiedException {
// if we don't have the type from the connection already, then establish one
if (contentType == null) {
HttpMethod method = null;
try {
method = establishConnection(HEAD);
contentType = readMimeType(method);
} catch (IOException e) {
logger.info("Unable to obtain content type for " + httpUri);
} finally {
// we're the only one to have needed this connection, so drop it
if (method != null) method.releaseConnection();
if (connection != null) connection.close();
}
}
return contentType;
}
/**
* Retrieves the URI for the actual content.
*
* @return The URI for the actual content
*/
public URI getURI() {
return httpUri;
}
/** The stream can be re-opened, so return false. */
public boolean isStreaming() {
return false;
}
/**
* Creates an input stream to the resource whose content we are representing.
*
* @return An input stream to the resource whose content we are representing
* @throws IOException
*/
public InputStream newInputStream() throws IOException, NotModifiedException {
if (logger.isDebugEnabled()) logger.debug("Getting new input stream for " + httpUri);
// Create an input stream by opening the URL's input stream
GetMethod method = null;
InputStream inputStream = null;
// obtain connection and retrieve the headers
method = (GetMethod) establishConnection(GET);
contentType = readMimeType(method);
inputStream = method.getResponseBodyAsStream();
if (inputStream == null) throw new IOException("Unable to obtain inputstream from " + httpUri);
if (logger.isDebugEnabled()) logger.debug("Got new input stream for " + httpUri);
return inputStream;
}
/**
* @throws IOException always (not implemented)
*/
public OutputStream newOutputStream() throws IOException {
throw new IOException("Output of HTTP content not implemented");
}
private boolean isValidStatusCode(int status) {
return (status == HttpStatus.SC_OK || isRedirected(status));
}
private boolean isRedirected(int status) {
return (status == HttpStatus.SC_TEMPORARY_REDIRECT
|| status == HttpStatus.SC_MOVED_TEMPORARILY
|| status == HttpStatus.SC_MOVED_PERMANENTLY
|| status == HttpStatus.SC_SEE_OTHER);
}
/** @see org.mulgara.content.Content#getURIString() */
public String getURIString() {
return httpUri.toString();
}
/**
* Read the mime type. Should only be done if the Mime type is not already available
* as this will close the connection.
* @return The MimeType for the URL.
* @throws NotModifiedException if the content validates against the cache
*/
@SuppressWarnings("unchecked")
private MimeType readMimeType(HttpMethod method) throws NotModifiedException {
MimeType result = null;
String contentType = null;
try {
// obtain connection and retrieve the headers
Header header = method.getResponseHeader("Content-Type");
if (header != null) {
contentType = header.getValue();
// find the parameter separator so we can protect against bad params
int sep = contentType.indexOf(';');
// no params, just create the MimeType
if (sep < 0) result = new MimeType(contentType);
else {
// create the MimeType from the type/subtype
result = new MimeType(contentType.substring(0, sep));
// parse parameters separately and set the result accordingly
try {
MimeTypeParameterList params = new MimeTypeParameterList(contentType.substring(sep + 1));
Enumeration<String> names = (Enumeration<String>)params.getNames();
while (names.hasMoreElements()) {
String name = names.nextElement();
result.setParameter(name, params.get(name));
}
} catch (MimeTypeParseException e) {
logger.warn("Ignoring bad parameters in '" + contentType.substring(sep + 1) + "' from the content type for " + httpUri);
}
}
if (logger.isInfoEnabled()) {
logger.info("Obtain content type " + result + " from " + httpUri);
}
}
} catch (java.lang.IllegalStateException e) {
logger.info("Unable to obtain content type for " + httpUri);
} catch (MimeTypeParseException e) {
logger.warn("Unexpected parameters before ; in '" + contentType + "' as a content type for " + httpUri);
}
return result;
}
}