/* Copyright (2006-2012) Schibsted ASA
* This file is part of Possom.
*
* Possom is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Possom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Possom. If not, see <http://www.gnu.org/licenses/>.
*/
package no.sesat.search.http;
import java.util.jar.JarFile;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLStreamHandler;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.text.DecimalFormat;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import no.sesat.search.http.protocol.jar.JarURLConnection;
import no.sesat.search.http.protocol.jar.URLJarFile;
import no.sesat.search.http.protocol.jar.URLJarFile.URLJarFileCloseController;
import no.sesat.search.http.protocol.jar.URLJarFileCallBack;
/**
* Utility class to fetch URLs and return them as either BufferedReaders or XML documents.
* Keeps statistics on connection times and failures.
* XXX redesign into multiple classes with less static methods.
* <p/>
* Supports protocols http, https, ftp, jar, and file.
* If no protocol is specified in the host it defaults to http.
* Provides support for URL Jars loaded with request properties as Sun's JVM does not.
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6270774
*
*
*
* @version <tt>$Id$</tt>
*/
public final class HTTPClient {
// Constants -----------------------------------------------------
private static final int CONNECT_TIMEOUT = 1000; // milliseconds
private static final int READ_TIMEOUT = 3000; // millisceonds
private static final Logger LOG = Logger.getLogger(HTTPClient.class);
private static final String DEBUG_USING_URL = "Using url {0} and Host-header {1} ";
// Attributes ----------------------------------------------------
private final String id;
private URLConnection urlConn;
private final URL u;
private final PhysicalHostStreamHandler handler;
// Static --------------------------------------------------------
/**
* Returns client for specified host and port for HTTP protocol.
*
* @param host The host to use. If no protocol is given then http is assumed.
* @param port The port to use.
*
* @return a client.
*/
public static HTTPClient instance(final String host, final int port) {
assert !host.contains("://") : "Not allowed to specify protocol, use another instance method.";
return instance(host, port, host);
}
/**
* Returns client for specified host, port and physical host (if the host is virtual).
* Useful if you need to use a virtual host different
* from the physical host.
* Defaults to the http protocol if the host argument doesn't specify it.
*
* @param host the physical host to use.
* @param port the port to use.
* @param physicalHost virtual host to use.
*
* @return a client.
*/
public static HTTPClient instance(final String host, final int port, final String physicalHost) {
try {
return new HTTPClient(new URL(ensureProtocol(host) + ':' + port), physicalHost);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
/**
* Returns client instance for the specified URL. The URL can either be complete or just contain the host.
*
* Note that only the host and port and used since the url must be supplied again against the HTTPClient instance.
*
* The path can be supplied later when using the querying methods like
* {@link HTTPClient#getBufferedStream(String path)}.
*
* @param url The URL.
* @return a client.
*/
public static HTTPClient instance(final URL url) {
return new HTTPClient(url, "file".equals(url.getProtocol()) ? "localhost" : url.getHost());
}
/**
* Returns client instance for the specified URL and physical host. Use this if the virtual host is different from
* the physcical host. The original host in the URL will be replaced by the supplied physical host and and the
* original host will instead be used as a host header.
*
* @param url The url.
* @param physicalHost The physical host.
*
* @return a client.
*/
public static HTTPClient instance(final URL url, final String physicalHost) {
return new HTTPClient(url, physicalHost);
}
// Constructors --------------------------------------------------
private HTTPClient(final URL url, final String physicalHost) {
try {
handler = new PhysicalHostStreamHandler(physicalHost);
u = new URL(url, "", handler);
id = u.getHost() + ':' + u.getPort();
} catch (final MalformedURLException e) {
throw new RuntimeException(e);
}
}
// Public --------------------------------------------------------
/**
* @param path
* @return
* @throws java.io.IOException
* @throws org.xml.sax.SAXException
*/
public Document getXmlDocument(final String path) throws IOException, SAXException {
loadUrlConnection(path);
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
final DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
final long start = System.nanoTime();
final Document result = builder.parse(urlConn.getInputStream());
Statistic.getStatistic(this.id).addInvocation(System.nanoTime() - start);
return result;
} catch (ParserConfigurationException e) {
throw new IOException(e.getMessage());
} catch (IOException e) {
throw interceptIOException(e);
} finally {
if (null != urlConn && null != urlConn.getInputStream()) {
urlConn.getInputStream().close();
}
if (null != urlConn) {
// definitely done with connection now
urlConn = null;
}
}
}
/**
* @param path
* @return
* @throws java.io.IOException
*/
public BufferedInputStream getBufferedStream(final String path) throws IOException {
loadUrlConnection(path);
try {
final long start = System.nanoTime();
final BufferedInputStream result = new BufferedInputStream(urlConn.getInputStream());
Statistic.getStatistic(this.id).addInvocation(System.nanoTime() - start);
return result;
} catch (IOException e) {
throw interceptIOException(e);
}
}
/**
* @param path
* @return
* @throws java.io.IOException
*/
public BufferedReader getBufferedReader(final String path) throws IOException {
loadUrlConnection(path);
try {
final long start = System.nanoTime();
final BufferedReader result = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));
Statistic.getStatistic(this.id).addInvocation(System.nanoTime() - start);
return result;
} catch (IOException e) {
throw interceptIOException(e);
}
}
/**
* @param path
* @param encoding
* @return
* @throws java.io.IOException
*/
public BufferedReader getBufferedReader(final String path, final String encoding) throws IOException {
loadUrlConnection(path);
try {
final long start = System.nanoTime();
final BufferedReader result = new BufferedReader(new InputStreamReader(urlConn.getInputStream(), encoding));
Statistic.getStatistic(this.id).addInvocation(System.nanoTime() - start);
return result;
} catch (IOException e) {
throw interceptIOException(e);
}
}
/**
* @param path
* @return
* @throws java.io.IOException
*/
public long getLastModified(final String path) throws IOException {
try {
return loadUrlConnection(path).getLastModified();
} catch (IOException e) {
throw interceptIOException(e);
} finally {
urlConn = null;
}
}
/**
* @param path
* @return
* @throws java.io.IOException
*/
public boolean exists(final String path) throws IOException {
boolean success = false;
loadUrlConnection(path);
if (urlConn instanceof HttpURLConnection || urlConn instanceof java.net.JarURLConnection) {
try {
if (urlConn instanceof HttpURLConnection) {
((HttpURLConnection)urlConn).setInstanceFollowRedirects(false);
((HttpURLConnection)urlConn).setRequestMethod("HEAD");
success = HttpURLConnection.HTTP_OK == ((HttpURLConnection)urlConn).getResponseCode();
} else {
success = urlConn.getContentLength() > 0;
}
} catch (IOException e) {
throw interceptIOException(e);
} finally {
urlConn = null;
}
} else {
final File file = new File(path);
success = file.exists();
}
return success;
}
/**
* @param ioe
* @return
*/
public IOException interceptIOException(final IOException ioe) {
final IOException e = interceptIOException(id, urlConn, ioe);
// definitely done with connection now
urlConn = null;
return e;
}
/**
* @param conn
* @param ioe
* @return
*/
public static IOException interceptIOException(
final URLConnection conn,
final IOException ioe) {
final String id = conn.getURL().getHost() + ':'
+ (-1 != conn.getURL().getPort() ? conn.getURL().getPort() : 80);
return interceptIOException(id, conn, ioe);
}
/**
* @param conn
* @param time
*/
public static void addConnectionStatistic(final URLConnection conn, final long time) {
final String id = conn.getURL().getHost() + ':'
+ (-1 != conn.getURL().getPort() ? conn.getURL().getPort() : 80);
Statistic.getStatistic(id).addInvocation(time);
}
// Package protected ---------------------------------------------
// Protected -----------------------------------------------------
// Private -------------------------------------------------------
private static IOException interceptIOException(
final String id,
final URLConnection urlConn,
final IOException ioe) {
if (ioe instanceof SocketTimeoutException) {
Statistic.getStatistic(id).addReadTimeout();
} else if (ioe instanceof ConnectException) {
Statistic.getStatistic(id).addConnectTimeout();
} else {
Statistic.getStatistic(id).addFailure();
}
// Clean out the error stream. See
if (urlConn instanceof HttpURLConnection) {
cleanErrorStream((HttpURLConnection) urlConn);
}
LOG.error("IOException occured for server at: " + id + " ("
+ urlConn.getURL() + ") ["
+ ioe.getMessage() + ']');
return ioe;
}
private URLConnection loadUrlConnection(final String path) throws IOException {
if (null == urlConn) {
urlConn = new URL(u, path, handler).openConnection();
}
return urlConn;
}
private static String ensureProtocol(final String host) {
return host.contains("://") ? host : "http://" + host;
}
private static void cleanErrorStream(final HttpURLConnection con) {
if (null != con.getErrorStream()) {
final BufferedReader errReader = new BufferedReader(new InputStreamReader(con.getErrorStream()));
final StringBuilder err = new StringBuilder();
try {
for (String line = errReader.readLine(); null != line; line = errReader.readLine()) {
err.append(line);
}
con.getErrorStream().close();
} catch (IOException ioe) {
LOG.warn(ioe.getMessage(), ioe);
}
LOG.info(err.toString());
}
}
// Inner classes -------------------------------------------------
private static class PhysicalHostStreamHandler extends URLStreamHandler {
private final String physicalHost;
public PhysicalHostStreamHandler(final String physicalHost) {
this.physicalHost = physicalHost;
}
String getPhysicalHost(){
return physicalHost;
}
protected URLConnection openConnection(final URL u) throws IOException {
URL url;
final URLConnection connection;
final String host;
if ("jar".equals(u.getProtocol())) {
// Doesn't work with jar urls?
// url = new URL(u.getProtocol(), physicalHost, u.getPort(), u.getFile());
final URL containedURL = new URL(u.getFile());
final String innerPath = containedURL.toString()
.replace("://" + containedURL.getHost(), "://" + physicalHost);
url = new URL("jar:" + innerPath);
host = containedURL.getHost();
// HACK around http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6270774
// XXX !!Danger!! Not at all synchronized!
// Makes a new callback only applicable to this url,
// required that callbacks are not overlapped or repeated!!
URLJarFile.setCallBack(new URLJarFileCallBackImpl(host));
// EndOfHACK
// HACK Third solution. Use own URLStreamHandler
connection = new JarURLConnection(url, null);
// EndOfHACK
} else {
url = new URL(u.getProtocol(), physicalHost, u.getPort(), u.getFile());
host = u.getHost();
connection = url.openConnection();
}
connection.addRequestProperty("host", host);
connection.setConnectTimeout(CONNECT_TIMEOUT);
connection.setReadTimeout(READ_TIMEOUT);
if (LOG.isTraceEnabled()) {
LOG.trace(MessageFormat.format(DEBUG_USING_URL, url, host));
}
return connection;
}
/**
* HACK around http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6270774
* XXX !!Danger!! Not at all synchronized!
* Makes a new callback only applicable to this url, required that callbacks are not overlapped or repeated!!
**/
private class URLJarFileCallBackImpl implements URLJarFileCallBack {
private final String host;
private URLJarFileCallBackImpl(final String host) {
this.host = host;
}
private int BUF_SIZE = 2048;
@SuppressWarnings(value = "unchecked")
public JarFile retrieve(final URL url, final URLJarFileCloseController closeController) throws IOException {
// next to verbose copy from URLJarFile
JarFile result = null;
/* get the stream before asserting privileges */
final URLConnection connection = url.openConnection();
connection.addRequestProperty("host", host);
connection.setConnectTimeout(CONNECT_TIMEOUT);
connection.setReadTimeout(READ_TIMEOUT);
final InputStream in = connection.getInputStream();
try {
result = (JarFile)
AccessController.doPrivileged(new PrivilegedExceptionAction() {
public Object run() throws IOException {
OutputStream out = null;
File tmpFile = null;
try {
tmpFile = File.createTempFile("jar_sesat_cache", null);
tmpFile.deleteOnExit();
out = new FileOutputStream(tmpFile);
int read = 0;
byte[] buf = new byte[BUF_SIZE];
while ((read = in.read(buf)) != -1) {
out.write(buf, 0, read);
}
out.close();
out = null;
return new URLJarFile(tmpFile, closeController);
} catch (IOException e) {
if (tmpFile != null) {
tmpFile.delete();
}
throw e;
} catch(RuntimeException rte){
if (tmpFile != null) {
tmpFile.delete();
}
LOG.error("failed writing jar_sesat_cache file", rte);
throw rte;
} finally {
if (in != null) {
in.close();
}
if (out != null) {
out.close();
}
}
}
});
}catch (PrivilegedActionException pae) {
throw (IOException) pae.getException();
}
//URLJarFile.setCallBack(null);
return result;
}
}
}
private static final class Statistic implements Comparable<Statistic> {
private static final Map<String, Statistic> STATISTICS = new ConcurrentHashMap<String, Statistic>();
private static final Logger STATISTICS_LOG = Logger.getLogger(Statistic.class);
private final String id;
private long totalTime = 0;
private long longest = 0;
private long invocations = 0;
private volatile long connectTimeouts = 0;
private volatile long readTimeouts = 0;
private volatile long failures = 0;
private static volatile long lastPrint = System.currentTimeMillis() / 60000;
static{
Runtime.getRuntime().addShutdownHook(new Thread(){
@Override
public void run(){
printStatistics();
}
});
}
static Statistic getStatistic(final String id) {
if (null == STATISTICS.get(id)) {
STATISTICS.put(id, new Statistic(id));
// log STATISTICS size
LOG.info("STATISTICS.size is " + STATISTICS.size());
}
return STATISTICS.get(id);
}
private Statistic(final String id) {
this.id = id;
}
synchronized void addInvocation(final long time) {
final long timeMs = (time / 1000000);
totalTime += timeMs;
if (timeMs > longest) {
longest = timeMs;
}
++invocations;
if (STATISTICS_LOG.isDebugEnabled() && System.currentTimeMillis() / 60000 != lastPrint) {
printStatistics();
lastPrint = System.currentTimeMillis() / 60000;
}
}
void addFailure() {
++failures;
}
void addConnectTimeout() {
++connectTimeouts;
}
void addReadTimeout() {
++readTimeouts;
}
private long getAverageInvocationTime() {
return 0 < invocations ? (totalTime * (long) 1000 / invocations) : 0;
}
@Override
public String toString() {
return ": " + new DecimalFormat("000,000,000").format(invocations)
+ " : " + new DecimalFormat("00,000").format(longest)
+ "ms : " + new DecimalFormat("0,000,000").format(getAverageInvocationTime())
+ "µs : " + new DecimalFormat("00,000").format(failures)
+ " : " + new DecimalFormat("00,000").format(connectTimeouts)
+ " : " + new DecimalFormat("00,000").format(readTimeouts)
+ " <-- " + id;
}
public int compareTo(Statistic o) {
return (int) (o.getAverageInvocationTime() - getAverageInvocationTime());
}
private static void printStatistics() {
final List<Statistic> list = new ArrayList<Statistic>(STATISTICS.values());
Collections.sort(list);
final StringBuilder msg = new StringBuilder();
msg.append("\n------ Printing HTTPClient statistics ------\n"
+ ": invocations : longest : average "
+ ": failures : connect errors : read timeouts <- client\n");
for (Statistic stat : list) {
msg.append(stat.toString() + '\n');
}
msg.append("------ ------------------------------ ------");
STATISTICS_LOG.debug(msg.toString());
}
}
}