// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v $
// $Author: derrickoswald $
// $Date: 2005/06/20 01:56:32 $
// $Revision: 1.15 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.lexer;
import java.io.IOException;
import java.io.InputStream;
/**
* Provides for asynchronous fetching from a stream.
*
*/
public class Stream extends InputStream implements Runnable
{
/**
* The number of calls to fill.
* Note: to be removed.
*/
public int fills = 0;
/**
* The number of reallocations.
* Note: to be removed.
*/
public int reallocations = 0;
/**
* The number of synchronous (blocking) fills.
* Note: to be removed.
*/
public int synchronous = 0;
/**
* An initial buffer size.
*/
protected static final int BUFFER_SIZE = 4096;
/**
* Return value when no more characters are left.
*/
protected static final int EOF = -1;
/**
* The underlying stream.
*/
protected volatile InputStream mIn;
/**
* The bytes read so far.
*/
public volatile byte[] mBuffer;
/**
* The number of valid bytes in the buffer.
*/
public volatile int mLevel;
/**
* The offset of the next byte returned by read().
*/
protected int mOffset;
/**
* The content length from the HTTP header.
*/
protected int mContentLength;
/**
* The bookmark.
*/
protected int mMark;
/**
* Construct a stream with no assumptions about the number of bytes available.
* @param in The input stream to use.
*/
public Stream (InputStream in)
{
this (in, 0);
}
/**
* Construct a stream to read the given number of bytes.
* @param in The input stream to use.
* @param bytes The maximum number of bytes to read.
* This should be set to the ContentLength from the HTTP header.
* A negative or zero value indicates an unknown number of bytes.
*/
public Stream (InputStream in, int bytes)
{
mIn = in;
mBuffer = null;
mLevel = 0;
mOffset = 0;
mContentLength = bytes < 0 ? 0 : bytes;
mMark = -1;
}
/**
* Fetch more bytes from the underlying stream.
* Has no effect if the underlying stream has been drained.
* @param force If <code>true</code>, an attempt is made to read from the
* underlying stream, even if bytes are available, If <code>false</code>,
* a read of the underlying stream will not occur if there are already
* bytes available.
* @return <code>true</code> if not at the end of the input stream.
* @exception IOException If the underlying stream read() or available() throws one.
*/
protected synchronized boolean fill (boolean force)
throws
IOException
{
int size;
byte[] buffer;
int read;
boolean ret;
ret = false;
if (null != mIn) // mIn goes null when it's been sucked dry
{
if (!force)
{ // check for change of state while waiting on the monitor in a synchronous call
if (0 != available ())
return (true);
synchronous++;
}
// get some buffer space
if (0 == mContentLength)
{ // unknown content length... keep doubling
if (null == mBuffer)
{
mBuffer = new byte[Math.max (BUFFER_SIZE, mIn.available ())];
buffer = mBuffer;
}
else
{
if (mBuffer.length - mLevel < BUFFER_SIZE / 2)
buffer = new byte[Math.max (mBuffer.length * 2, mBuffer.length + mIn.available ())];
else
buffer = mBuffer;
}
size = buffer.length - mLevel;
}
else
{ // known content length... allocate once
size = mContentLength - mLevel;
if (null == mBuffer)
mBuffer = new byte[size];
buffer = mBuffer;
}
// read into the end of the 'new' buffer
read = mIn.read (buffer, mLevel, size);
if (-1 == read)
{
mIn.close ();
mIn = null;
}
else
{
if (mBuffer != buffer)
{ // copy the bytes previously read
System.arraycopy (mBuffer, 0, buffer, 0, mLevel);
mBuffer = buffer;
reallocations++;
}
mLevel += read;
if ((0 != mContentLength) && (mLevel == mContentLength))
{
mIn.close ();
mIn = null;
}
ret = true;
fills++;
}
}
return (ret);
}
//
// Runnable interface
//
/**
* Continually read the underlying stream untill exhausted.
* @see java.lang.Thread#run()
*/
public void run ()
{
boolean filled;
do
{ // keep hammering the socket with no delay, it's metered upstream
try
{
filled = fill (true);
}
catch (IOException ioe)
{
ioe.printStackTrace ();
// exit the thread if there is a problem,
// let the synchronous reader find out about it
filled = false;
}
}
while (filled);
}
//
// InputStream overrides
//
/**
* Reads the next byte of data from the input stream. The value byte is
* returned as an <code>int</code> in the range <code>0</code> to
* <code>255</code>. If no byte is available because the end of the stream
* has been reached, the value <code>-1</code> is returned. This method
* blocks until input data is available, the end of the stream is detected,
* or an exception is thrown.
* @return The next byte of data, or <code>-1</code> if the end of the
* stream is reached.
* @exception IOException If an I/O error occurs.
*/
public int read () throws IOException
{
int ret;
// The following is unsynchronized code.
// Some would argue that unsynchronized access isn't thread safe
// but I think I can rationalize it in this case...
// The two volatile members are mLevel and mBuffer (besides mIn).
// If (mOffset >= mLevel) turns false after the test, fill is
// superflously called, but it's synchronized and figures it out.
// (mOffset < mLevel) only goes more true by the operation of the
// background thread, it increases the value of mLevel
// and volatile int access is atomic.
// If mBuffer changes by the operation of the background thread,
// the array pointed to can only be bigger than the previous buffer,
// and hence no array bounds exception can be raised.
if (0 == (mLevel - mOffset)) // (0 == available ())
fill (false);
if (0 != (mLevel - mOffset)) // (0 != available ())
ret = mBuffer[mOffset++] & 0xff;
else
ret = EOF;
return (ret);
}
/**
* Returns the number of bytes that can be read (or skipped over) from
* this input stream without blocking by the next caller of a method for
* this input stream. The next caller might be the same thread or or
* another thread.
* @return The number of bytes that can be read from this input stream
* without blocking.
* @exception IOException If an I/O error occurs.
*/
public int available () throws IOException
{
return (mLevel - mOffset);
}
/**
* Closes this input stream and releases any system resources associated
* with the stream.
* @exception IOException If an I/O error occurs.
*/
public synchronized void close () throws IOException
{
if (null != mIn)
{
mIn.close ();
mIn = null;
}
mBuffer = null;
mLevel = 0;
mOffset = 0;
mContentLength =0;
mMark = -1;
}
/**
* Repositions this stream to the position at the time the
* <code>mark</code> method was last called on this input stream.
*
* <p> The general contract of <code>reset</code> is:
*
* <p><ul>
*
* <li> If the method <code>markSupported</code> returns
* <code>true</code>, then:
*
* <ul><li> If the method <code>mark</code> has not been called since
* the stream was created, or the number of bytes read from the stream
* since <code>mark</code> was last called is larger than the argument
* to <code>mark</code> at that last call, then an
* <code>IOException</code> might be thrown.
*
* <li> If such an <code>IOException</code> is not thrown, then the
* stream is reset to a state such that all the bytes read since the
* most recent call to <code>mark</code> (or since the start of the
* file, if <code>mark</code> has not been called) will be resupplied
* to subsequent callers of the <code>read</code> method, followed by
* any bytes that otherwise would have been the next input data as of
* the time of the call to <code>reset</code>. </ul>
*
* <li> If the method <code>markSupported</code> returns
* <code>false</code>, then:
*
* <ul><li> The call to <code>reset</code> may throw an
* <code>IOException</code>.
*
* <li> If an <code>IOException</code> is not thrown, then the stream
* is reset to a fixed state that depends on the particular type of the
* input stream and how it was created. The bytes that will be supplied
* to subsequent callers of the <code>read</code> method depend on the
* particular type of the input stream. </ul></ul>
*
* @exception IOException <em>Never thrown. Just for subclassers.</em>
* @see java.io.InputStream#mark(int)
* @see java.io.IOException
*
*/
public void reset () throws IOException
{
if (-1 != mMark)
mOffset = mMark;
else
mOffset = 0;
}
/**
* Tests if this input stream supports the <code>mark</code> and
* <code>reset</code> methods. Whether or not <code>mark</code> and
* <code>reset</code> are supported is an invariant property of a
* particular input stream instance. The <code>markSupported</code> method
* of <code>InputStream</code> returns <code>false</code>.
*
* @return <code>true</code>.
* @see java.io.InputStream#mark(int)
* @see java.io.InputStream#reset()
*
*/
public boolean markSupported ()
{
return (true);
}
/**
* Marks the current position in this input stream. A subsequent call to
* the <code>reset</code> method repositions this stream at the last marked
* position so that subsequent reads re-read the same bytes.
*
* <p> The <code>readlimit</code> arguments tells this input stream to
* allow that many bytes to be read before the mark position gets
* invalidated.
*
* <p> The general contract of <code>mark</code> is that, if the method
* <code>markSupported</code> returns <code>true</code>, the stream somehow
* remembers all the bytes read after the call to <code>mark</code> and
* stands ready to supply those same bytes again if and whenever the method
* <code>reset</code> is called. However, the stream is not required to
* remember any data at all if more than <code>readlimit</code> bytes are
* read from the stream before <code>reset</code> is called.
*
* @param readlimit <em>Not used.</em>
* @see java.io.InputStream#reset()
*
*/
public void mark (int readlimit)
{
mMark = mOffset;
}
}