// BlogBridge -- RSS feed reader, manager, and web based service // Copyright (C) 2002-2006 by R. Pito Salas // // This program is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free Software Foundation; // either version 2 of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU General Public License for more details. // // You should have received a copy of the GNU General Public License along with this program; // if not, write to the Free Software Foundation, Inc., 59 Temple Place, // Suite 330, Boston, MA 02111-1307 USA // // Contact: R. Pito Salas // mailto:pitosalas@users.sourceforge.net // More information: about BlogBridge // http://www.blogbridge.com // http://sourceforge.net/projects/blogbridge // // $Id: URLInputStream.java,v 1.22 2007/04/13 12:56:43 spyromus Exp $ // package com.salas.bb.utils.net; import com.salas.bb.utils.i18n.Strings; import com.salas.bb.utils.net.auth.AuthCancelException; import java.io.FileNotFoundException; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * Input stream which is created from URL's. It provides several missing features transparently: * <ul> * <li>Bandwidth controlling</li> * <li>Resuming</li> * <li>Telling progress of operation</li> * <li>Ensured connections</li> * </ul> * * <p>This input stream takes URL of resource as construction parameter and provides all of the * above mentioned services to the readers. The connection to a given URL is performed lazily, * meaning that there's no actual connection established before any of operations over the stream * required. However, the bandwidth limit and other settings can be changed at any moment before * or after initialization and will take effect instantly.</p> * * <h1>Bandwidth controlling</h1> * * <p>This functionality allows users of this class to limit the usage of available bandwidth in * relatively precise way (in average +/- 700 bytes/sec). The precision is a price for low overhead. * As it was said before the bandwidth limit value can be changed at any moment and will take effect * instantly. It's possible to set <i>unlimited</i> usage mode (default) by telling limit of <=0. * This implementation utilizes <code>BandwidthInputStream</code> input stream implementation.</p> * * <h1>Resuming</h1> * * <p>Resuming support is what we all that needed for so long. If some error occurs during * connection or reading phases the engine provide all available info about currently failed attempt * to <code>IRetriesPolicy</code> (which is part of <i>Ensured connections</i> module) and receives * the value of time period to wait before the next attempt or order to terminate the attempts and * report the error.</p> * * <p>The resuming itself works closely with sources of resources to position of the pointer in the * stream after reconnection. For HTTP-protocol URL's it tries to use Range-query parameter of * HTTP servers defined in <a href="http://www.faqs.org/ftp/rfc/rfc2616.html"> * RFC2616 "Hypertext Transfer Protocol - HTTP1/1"</a>. If the direct positioning isn't supported * (old HTTP servers, local resources or anything else) then the same effect is established with * skipping of first N bytes.</p> * * <p>The resuming functionality is <u>completely transparent</u> to the class using this enhanced * stream. There will be no time-out's reported during the emulated resuming and so on.</p> * * <h1>Telling progress of operation</h1> * * <p>This one is very simple, but can be used for making extremely user-friendly interfaces, which * are showing where we are with loading data at any given moment. Currently, the following list * of events is supported:</p> * * <ul> * <li><b>Connecting</b> - indicates that the (re)connection to resource has started.</li> * <li><b>Connected</b> - indicates that the connection to resource has been (re)established. Also * the size of the resource is told.</li> * <li><b>Read</b> - tells the number of bytes read/skipped (only delta). Note that bytes skipped * when empulating the resuming aren't counted.</li> * <li><b>Finished</b> - indicates that the resource has been fully fetched.</li> * </ul> * * <p>There's a method to attach only single listener. A tip: use Composite pattern to create a * complex listener. Also, note that the listeners may throw any run-time exception and it will * not affect the reading procedure in any way. The exceptions will be recorded in log with WARNING * level.</p> * * <h1>Ensured connections</h1> * * <p>This functionality is completely built on top of Retries Policy concept. You are free to * define your own policy which will be analyzing the history of failed attempts to decide whether * to make pause between the next attempt and what the length of the pause will be or simply order * to terminate any further attempts and abord reading.</p> * * <h1>Redirections handling</h1> * * <p>Standard HTTP(S) protocols implementations by SUN support seamless following redirections, but * it's isn't possible to learn when we are permanently redirected somewhere. Current implementation * addresses this issue by allowing client to register permanent redirection listener which will be * notified once the connection to resource is established and it's detected that we were redirected * somewhere during connection phase.</p> */ public class URLInputStream extends InputStream { private static final Logger LOG = Logger.getLogger(URLInputStream.class.getName()); private static final IRetriesPolicy DEFAULT_RETRIES_POLICY = new DirectRetriesPolicy(); private static final String MSG_EXCEPTION_IN_THE_HANDLER = Strings.error("failed.to.handle"); private static final String MSG_IO_ERROR = Strings.error("net.there.was.an.error.during.io"); /** Unlimited bandwidth. */ public static final int BANDWIDTH_UNLIMITED = 0; // URL we use to read data private URL sourceUrl; // Stream with controlled bandwidth private BandwidthInputStream bis; // Number of bytes which could be read per second (-1 means unlimited). private int bandwidth; // Bytes read private int read; // Length of the content private int contentLength; // Set to TRUE once the stream get closed private boolean closed; // TRUE when the read bytes count reached the contentLength mark. private boolean finished; private IRetriesPolicy retriesPolicy; // Listener of the progress events (can be null) private List listeners; // Date of last successful fetching of data from source URL. private long lastFetchingTime; // Listener for redirection events. private IPermanentRedirectionListener redirectionListener; // TRUE to pause stream reading/skipping operation. private boolean paused; // The code returned by source of stream data in response to connection attempt // It's (-1) if there was not connection attempt yet, valid HTTP response code // for HTTP-series connection or 200 for successful connection of other type. private int responseCode = -1; // A time of the last update taken from a response during a connection. private long lastModifiedTime; // A time of a connection attempt (server time-zone). private long serverTime; // A user agent to use for HTTP connections. private String userAgent; // Basic HTTP Authentication info. private String username; private String password; /** * Creates stream out of URL. * * @param aSourceUrl non-NULL URL. */ public URLInputStream(URL aSourceUrl) { this(aSourceUrl, -1); } /** * Creates stream out of URL. * * @param aSourceUrl non-NULL URL. * @param aUserAgent user agent for HTTP connections. */ public URLInputStream(URL aSourceUrl, String aUserAgent) { this(aSourceUrl, -1); userAgent = aUserAgent; } /** * Creates stream out of URL. * * @param aSourceUrl non-NULL URL. * @param aLastFetchingTime when this URL was fetched for the last time (-1 if never). */ public URLInputStream(URL aSourceUrl, long aLastFetchingTime) { this(aSourceUrl, 0, aLastFetchingTime); } /** * Creates stream out of URL. * * @param aSourceUrl non-NULL URL. * @param aResumeFrom position to resume from. * @param aLastFetchingTime when this URL was fetched for the last time (-1 if never). */ public URLInputStream(URL aSourceUrl, int aResumeFrom, long aLastFetchingTime) { if (aSourceUrl == null) throw new NullPointerException(Strings.error("unspecified.url")); retriesPolicy = DEFAULT_RETRIES_POLICY; listeners = Collections.synchronizedList(new ArrayList()); sourceUrl = aSourceUrl; lastFetchingTime = aLastFetchingTime; bis = null; closed = false; setBandwidth(BANDWIDTH_UNLIMITED); read = aResumeFrom; finished = false; paused = false; lastModifiedTime = -1; serverTime = -1; userAgent = null; } /** * Sets basic authentication info. * * @param username user name. * @param password password */ public void setBasicAuthenticationInfo(String username, String password) { this.username = username; this.password = password; } /** * Registers new redirection listener. * * @param listener new listener. */ public void setRedirectionListener(IPermanentRedirectionListener listener) { redirectionListener = listener; } /** * Sets alternative retries policy. * * @param aRetriesPolicy policy. * * @see #BANDWIDTH_UNLIMITED */ public void setRetriesPolicy(IRetriesPolicy aRetriesPolicy) { retriesPolicy = aRetriesPolicy; } /** * Adds new listener for progress events capturing. * * @param aListener listener. */ public void addListener(IStreamProgressListener aListener) { listeners.add(aListener); } /** * Removes listener from list. * * @param aListener listener. */ public void removeListener(IStreamProgressListener aListener) { listeners.remove(aListener); } /** * Returns current allowed bandwidth (bytes per second). * * @return bandwidth (byte/sec) (<=0 means unlimited). */ public int getBandwidth() { return bandwidth; } /** * Returns URL of the source. * * @return URL. */ public URL getSourceURL() { return sourceUrl; } /** * Sets new allowed bandwidth (bytes per second). * * @param aBandwidth new bandwidth (bytes/sec) (<=0 means unlimited). */ public void setBandwidth(int aBandwidth) { bandwidth = aBandwidth; if (bis != null) bis.setBandwidth(aBandwidth); } /** * Increments number of read bytes. * * @param delta delta. */ private synchronized void incRead(int delta) { if (!finished) { read += delta; fireRead(delta); if (contentLength != -1 && read >= contentLength) finished(); } } /** * Sets the finish flag and fires the event. */ private synchronized void finished() { if (!finished) { fireFinished(); finished = true; } } /** * Reads the next byte of data from the input stream. * * @return the next byte of data, or <code>-1</code> if the end of the stream is reached. * * @throws IOException if an I/O error occurs. */ public int read() throws IOException { int ch = -1; blockOnPause(); boolean success = false; while (!success) { connect(); try { ch = bis.read(); success = true; } catch (IOException e) { // We need to reconnect LOG.log(Level.WARNING, MSG_IO_ERROR, e); bis = null; } } if (ch == -1) finished(); return ch; } /** * Returns the number of bytes that can be read (or skipped over) from this input stream without * blocking by the next caller of a method for this input stream. * * @return the number of bytes that can be read from this input stream without blocking. * * @throws IOException if an I/O error occurs. */ public int available() throws IOException { int num = 0; boolean success = false; while (!success) { connect(); try { num = bis.available(); success = true; } catch (IOException e) { // We need to reconnect LOG.log(Level.WARNING, MSG_IO_ERROR, e); bis = null; } } return num; } /** * Closes this input stream and releases any system resources associated with the stream. * <p/> * <p> The <code>close</code> method of <code>InputStream</code> does nothing. * * @throws java.io.IOException if an I/O error occurs. */ public void close() throws IOException { if (bis != null && !closed) { bis.close(); bis = null; finished(); } closed = true; } /** * Returns TRUE if stream is already closed. * * @return TRUE if stream is already closed. */ public boolean isClosed() { return closed; } /** * Marks the current position in this input stream. * * @param readlimit the maximum limit of bytes that can be read before the mark position becomes * invalid. * * @see java.io.InputStream#reset() */ public synchronized void mark(int readlimit) { try { connect(); bis.mark(readlimit); } catch (IOException e) { LOG.log(Level.SEVERE, Strings.error("net.failed.to.establish.connection"), e); } } /** * Tests if this input stream supports the <code>mark</code> and <code>reset</code> methods. * * @return <code>true</code> if this stream instance supports the mark and reset methods; * <code>false</code> otherwise. * * @see java.io.InputStream#mark(int) * @see java.io.InputStream#reset() */ public boolean markSupported() { boolean supported = false; try { connect(); supported = bis.markSupported(); } catch (IOException e) { LOG.log(Level.SEVERE, Strings.error("net.failed.to.establish.connection"), e); } return supported; } /** * Reads up to <code>len</code> bytes of data from the input stream into an array of bytes. * * @param b the buffer into which the data is read. * @param off the start offset in array <code>b</code> at which the data is written. * @param len the maximum number of bytes to read. * * @return the total number of bytes read into the buffer, or <code>-1</code> if there is no * more data because the end of the stream has been reached. * * @throws java.io.IOException if an I/O error occurs. * @throws NullPointerException if <code>b</code> is <code>null</code>. * @see java.io.InputStream#read() */ public int read(byte b[], int off, int len) throws IOException { int justRead = 0; blockOnPause(); boolean success = false; while (!success) { connect(); try { justRead = bis.read(b, off, len); if (justRead == -1) finished(); success = true; } catch (IOException e) { // We need to reconnect LOG.log(Level.WARNING, MSG_IO_ERROR, e); bis = null; } } return justRead; } /** * Repositions this stream to the position at the time the <code>mark</code> method was last * called on this input stream. * * @throws java.io.IOException if this stream has not been marked or if the mark has been * invalidated. * @see java.io.InputStream#mark(int) * @see java.io.IOException */ public synchronized void reset() throws IOException { boolean success = false; while (!success) { connect(); try { bis.reset(); success = true; } catch (IOException e) { // We need to reconnect LOG.log(Level.WARNING, MSG_IO_ERROR, e); bis = null; } } } /** * Skips over and discards <code>n</code> bytes of data from this input stream. * * @param n the number of bytes to be skipped. * * @return the actual number of bytes skipped. * * @throws java.io.IOException if an I/O error occurs. */ public long skip(long n) throws IOException { long skipped = 0; blockOnPause(); boolean success = false; while (!success) { connect(); try { skipped = bis.skip(n); success = true; } catch (IOException e) { // We need to reconnect LOG.log(Level.WARNING, MSG_IO_ERROR, e); bis = null; } } return skipped; } /** * Checks for connection to exist. If there's no connection present then it will be * established. * * @throws IOException in case of any I/O exception. */ public synchronized void connect() throws IOException { try { if (bis == null && !closed) { boolean connected = false; int attempt = 0; fireConnecting(); while (!connected) { IRetriesPolicy.Failure failure = null; try { failure = connectionAttempt(attempt++); } catch (RuntimeException e) { if (e.getCause() instanceof AuthCancelException) { throw new NotAuthenticatedException(); } throw e; } // If there was a failure then ask the retries policy for how long we should // wait and whether we should do it at all. if (failure != null) { handleFailure(failure); } else { connected = true; } } fireConnected(contentLength); } else if (closed) { throw new IOException(Strings.error("net.stream.is.already.closed")); } } catch (IOException e) { fireErrored(e); throw e; } } /** * Makes a single attempt to connect to the source URL. * * @param attempt attempt sequence number. * * @return initialized failure object or NULL if successful. */ IRetriesPolicy.Failure connectionAttempt(int attempt) { IRetriesPolicy.Failure failure = null; long start = System.currentTimeMillis(); try { bis = new BandwidthInputStream(makeConnection(read)); bis.setBandwidth(bandwidth); } catch (IOException e) { failure = new IRetriesPolicy.Failure(attempt, start, System.currentTimeMillis(), false, 0, e); } return failure; } /** * Handles connection failure. Asks the retries policy about the time to way before * retry and if policy says that retrying isn't necessary then throws original couse of * failue. Otherwise waits for told period. * * @param aFailure failure description. * * @throws IOException in case of any I/O error. */ void handleFailure(IRetriesPolicy.Failure aFailure) throws IOException { IOException cause = aFailure.getCause(); if (cause instanceof FileNotFoundException || cause instanceof CyclicRedirectionException) { throw cause; } else if (cause instanceof UISException) { int code = ((UISException)cause).getCode(); if (code != HttpURLConnection.HTTP_CLIENT_TIMEOUT && code != HttpURLConnection.HTTP_UNAUTHORIZED) throw cause; } // Ask retries policy what to do after this failed attempt long timeToWait = retriesPolicy.getTimeBeforeRetry(aFailure); if (timeToWait == -1) { throw cause; } else if (timeToWait > 0) { try { Thread.sleep(timeToWait); } catch (InterruptedException e) { // Continue with another attempt right away } } } /** * Performs connection. * * @param read number of bytes already read. * * @return input stream of resource we are connected to. * * @throws IOException in case of any I/O exception. */ protected InputStream makeConnection(long read) throws IOException { URLConnectionHolder holder = ResumingSupport.resume(sourceUrl, read, lastFetchingTime, userAgent, username, password); URLConnection con = holder.getConnection(); URL permRedirURL = holder.getPermanentRedirectionURL(); if (permRedirURL != null) { sourceUrl = permRedirURL; firePermanentRedirection(sourceUrl); } contentLength = 0; responseCode = 200; boolean isCompressed = false; if (con instanceof HttpURLConnection) { HttpURLConnection httpCon = (HttpURLConnection)con; responseCode = httpCon.getResponseCode(); isCompressed = "gzip".equalsIgnoreCase(httpCon.getContentEncoding()); } if (responseCode != HttpURLConnection.HTTP_UNAUTHORIZED) { analyzeResponseCodes(con); contentLength = resolveContentLength(con); lastModifiedTime = resolveLastModifiedTime(con); serverTime = resolveServerTime(con); } InputStream is = new CountingFilterInputStream(con.getInputStream()); if (isCompressed) is = new CorrectedGZIPInputStream(is); return is; } /** * Analyzes response codes after connecting to resource and converts them into exceptions. * * @param con connection. * * @throws IOException in case if something goes wrong or we throw exceptions. */ private void analyzeResponseCodes(URLConnection con) throws IOException { if (!(con instanceof HttpURLConnection)) return; HttpURLConnection hcon = (HttpURLConnection)con; int series = responseCode / 100; if (series == 5) throw new ServerErrorException(responseCode, hcon.getResponseMessage()); if (series == 4 && responseCode != 401) { throw new ClientErrorException(responseCode, hcon.getResponseMessage()); } } /** * Returns response code. * * @return response code. */ public int getResponseCode() { return responseCode; } /** * Resolves content length for a given connection. * * @param aCon connection. * * @return length of content. */ protected int resolveContentLength(URLConnection aCon) { return aCon.getContentLength(); } /** * Resolves a time of the last resource modification (server time-zone). * * @param aCon connection. * * @return time. */ protected long resolveLastModifiedTime(URLConnection aCon) { long time = aCon.getLastModified(); return time == 0 ? -1 : time; } /** * Resolves a time of response generation (server time-zone). * * @param aCon connection. * * @return time. */ protected long resolveServerTime(URLConnection aCon) { long time = aCon.getDate(); return time == 0 ? -1 : time; } /** * Returns wrapped stream. * * @return stream. */ BandwidthInputStream getStream() { return bis; } /** * Returns list of registered listeners. * * @return listeners. */ protected IStreamProgressListener[] getListeners() { IStreamProgressListener[] list; synchronized (listeners) { list = (IStreamProgressListener[])listeners.toArray( new IStreamProgressListener[listeners.size()]); } return list; } /** * Fires event about connecting has started. */ protected void fireConnecting() { IStreamProgressListener[] listeners = getListeners(); for (int i = 0; i < listeners.length; i++) { IStreamProgressListener listener = listeners[i]; try { listener.connecting(this); } catch (Exception e) { LOG.log(Level.WARNING, MSG_EXCEPTION_IN_THE_HANDLER, e); } } } /** * Fires event about connection established. * * @param size size of the stream. */ protected void fireConnected(long size) { IStreamProgressListener[] listeners = getListeners(); for (int i = 0; i < listeners.length; i++) { IStreamProgressListener listener = listeners[i]; try { listener.connected(this, size); } catch (Exception e) { LOG.log(Level.WARNING, MSG_EXCEPTION_IN_THE_HANDLER, e); } } } /** * Fires event about another portion of bytes read. * * @param bytes bytes. */ protected void fireRead(int bytes) { IStreamProgressListener[] listeners = getListeners(); for (int i = 0; i < listeners.length; i++) { IStreamProgressListener listener = listeners[i]; try { listener.read(this, bytes); } catch (Exception e) { LOG.log(Level.WARNING, MSG_EXCEPTION_IN_THE_HANDLER, e); } } } /** * Fires event about the end of stream reached. */ protected void fireFinished() { IStreamProgressListener[] listeners = getListeners(); for (int i = 0; i < listeners.length; i++) { IStreamProgressListener listener = listeners[i]; try { listener.finished(this); } catch (Exception e) { LOG.log(Level.WARNING, MSG_EXCEPTION_IN_THE_HANDLER, e); } } } /** * Fires event about the error appeared. */ protected void fireErrored(IOException ex) { IStreamProgressListener[] listeners = getListeners(); for (int i = 0; i < listeners.length; i++) { IStreamProgressListener listener = listeners[i]; try { listener.errored(this, ex); } catch (Exception e) { LOG.log(Level.WARNING, MSG_EXCEPTION_IN_THE_HANDLER, e); } } } /** * Notifies registered listener about permanent redirection. * * @param newURL new URL. */ private void firePermanentRedirection(URL newURL) { try { if (redirectionListener != null) redirectionListener.redirectedTo(newURL); } catch (Exception e) { LOG.log(Level.SEVERE, Strings.error("failed.to.notify.a.listener"), e); } } /** * Pauses/unpauses the stream reading/skipping operations. * * @param aPaused TRUE to pause. */ public synchronized void setPaused(boolean aPaused) { paused = aPaused; notifyAll(); } /** * Returns TRUE if currently paused. * * @return TRUE if currently paused. */ public boolean isPaused() { return paused; } /** * Blocks the execution until unpaused (if currently paused). */ private synchronized void blockOnPause() { try { while (paused) wait(); } catch (InterruptedException e) { LOG.log(Level.WARNING, Strings.error("interrupted"), e); } } /** * Returns a time of the last modification taken from a server response. * * @return time. */ public long getLastModifiedTime() { return lastModifiedTime; } /** * Returns a time of the response generation taken from a server response. * * @return time. */ public long getServerTime() { return serverTime; } /** * Filter input stream that increments read bytes count as new bytes are read or skipped. * We need it because the number of bytes read from GZIP stream and * the number of bytes read from the URLInputStream using GZIP'ed stream * are two different things. */ private class CountingFilterInputStream extends FilterInputStream { /** * Creates a counting stream. * * @param in stream to wrap. */ public CountingFilterInputStream(InputStream in) { super(in); } @Override public int read() throws IOException { int ch = super.read(); if (ch != -1) incRead(1); return ch; } @Override public int read(byte b[], int off, int len) throws IOException { int read = super.read(b, off, len); if (read != -1) incRead(read); return read; } @Override public long skip(long n) throws IOException { long skipped = super.skip(n); incRead((int)skipped); return skipped; } } }