// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.2
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.io.*;
import java.nio.*;
/**
* Implements a buffered window into a stream of characters.
* <p>
* Unless the buffer is explicitly {@linkplain #setBuffer(char[]) set}, it expands automatically as further characters are fetched from the stream.
* <p>
* The {@link #setMinRequiredBufferBegin(int)} method can be used to inform the <code>StreamedText</code> object that characters up to a specified
* position are no longer required, allowing more characters to be fetched without the need to increase the buffer size.
*/
final class StreamedText implements CharSequence {
private final Reader reader;
private char[] buffer;
private boolean expandableBuffer;
private int bufferBegin=0; // the current position of the first byte of the buffer. all text before it has been discarded.
private int readerPos=0; // the next position into which text will be loaded from the reader stream. must be >=bufferBegin and <=bufferBegin+buffer.length, except if one of the "text" argument constructors was used, in which case =Integer.MAX_VALUE.
private int minRequiredBufferBegin=0; // the minimum pos that must be kept in buffer. always >=bufferBegin.
private int end=Integer.MAX_VALUE;
public static int INITIAL_EXPANDABLE_BUFFER_SIZE=8192; // same default as StAX
public StreamedText(final Reader reader, final char[] buffer) {
this.reader=reader;
setBuffer(buffer);
}
public StreamedText(final Reader reader) {
this(reader,null);
}
private StreamedText(final char[] text, final int length) {
reader=null;
buffer=text;
expandableBuffer=false;
end=length;
readerPos=Integer.MAX_VALUE;
}
public StreamedText(final char[] text) {
this(text,text.length);
}
public StreamedText(final CharBuffer text) {
this(text.array(),text.length());
}
public StreamedText(final CharSequence text) {
this(toCharArray(text));
}
public StreamedText setBuffer(char[] buffer) {
if (buffer!=null) {
this.buffer=buffer;
expandableBuffer=false;
} else {
this.buffer=new char[INITIAL_EXPANDABLE_BUFFER_SIZE];
expandableBuffer=true;
}
return this;
}
public boolean hasExpandableBuffer() {
return expandableBuffer;
}
/**
* Returns the character at the specified index.
* @param index the index of the character.
* @return the character at the specified index.
*/
public char charAt(final int pos) {
if (pos>=readerPos) readToPosition(pos);
checkPos(pos);
return buffer[pos-bufferBegin];
}
public void setMinRequiredBufferBegin(final int minRequiredBufferBegin) {
if (minRequiredBufferBegin<bufferBegin) throw new IllegalArgumentException("Cannot set minimum required buffer begin to already discarded position "+minRequiredBufferBegin);
this.minRequiredBufferBegin=minRequiredBufferBegin;
}
public int getMinRequiredBufferBegin() {
return minRequiredBufferBegin;
}
/**
* Returns the length of the text stream.
* <p>
* This method returns Integer.MAX_VALUE until an attempt is made to access a position past the end of the stream.
*
* @return the length of the text stream.
*/
public int length() {
if (end==Integer.MAX_VALUE) throw new IllegalStateException("Length of streamed text cannot be determined until end of file has been reached");
return end;
}
public int getEnd() {
return end;
}
private void prepareBufferRange(final int begin, final int end) {
final int lastRequiredPos=end-1;
if (lastRequiredPos>readerPos) readToPosition(lastRequiredPos);
checkPos(begin);
if (end>this.end) throw new IndexOutOfBoundsException();
}
public void writeTo(final Writer writer, final int begin, final int end) throws IOException {
prepareBufferRange(begin,end);
writer.write(buffer,begin-bufferBegin,end-begin);
}
/**
* Returns a new string that is a substring of this text.
* <p>
* The substring begins at the specified <code>begin</code> position and extends to the character at position <code>end</code> - 1.
* Thus the length of the substring is <code>end-begin</code>.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new string that is a substring of this text.
*/
public String substring(final int begin, final int end) {
prepareBufferRange(begin,end);
return new String(buffer,begin-bufferBegin,end-begin);
}
/**
* Returns a new character sequence that is a subsequence of this sequence.
* <p>
* The returned <code>CharSequence</code> is only guaranteed to be valid as long as no futher operations are performed on this <code>StreamedText</code> object.
* Any subsequent method call could invalidate the underlying buffer used by the <code>CharSequence</code>.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new character sequence that is a subsequence of this sequence.
*/
public CharSequence subSequence(final int begin, final int end) {
// This has not been benchmarked. It is possible that returning substring(begin,end) results in faster code even though it requires more memory allocation.
return getCharBuffer(begin,end);
}
public CharBuffer getCharBuffer(final int begin, final int end) {
prepareBufferRange(begin,end);
return CharBuffer.wrap(buffer,begin-bufferBegin,end-begin);
}
public String toString() {
throw new UnsupportedOperationException("Streamed text can not be converted to a string");
}
public String getDebugInfo() {
return "Buffer size: \""+buffer.length+"\", bufferBegin="+bufferBegin+", minRequiredBufferBegin="+minRequiredBufferBegin+", readerPos="+readerPos;
}
public char[] getBuffer() {
return buffer;
}
public int getBufferBegin() {
return bufferBegin;
}
private void checkPos(final int pos) {
// hopefully inlined by the compiler
if (pos<bufferBegin) throw new IllegalStateException("StreamedText position "+pos+" has been discarded");
if (pos>=end) throw new IndexOutOfBoundsException();
}
public int getBufferOverflowPosition() {
return minRequiredBufferBegin+buffer.length;
}
private void readToPosition(final int pos) {
try {
if (pos>=bufferBegin+buffer.length) {
if (pos>=minRequiredBufferBegin+buffer.length) {
if (!expandableBuffer) throw new BufferOverflowException(); // unfortunately BufferOverflowException doesn't accept a message argument, otherwise it would include the message "StreamedText buffer too small to keep positions "+minRequiredBufferBegin+" and "+pos+" simultaneously"
expandBuffer(pos-minRequiredBufferBegin+1);
}
discardUsedText();
}
while (readerPos<=pos) {
final int charCount=reader.read(buffer,readerPos-bufferBegin,bufferBegin+buffer.length-readerPos);
if (charCount==-1) {
end=readerPos;
break;
}
readerPos+=charCount;
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private void expandBuffer(final int minSize) throws IOException {
int newSize=buffer.length*2;
if (newSize<minSize) newSize=minSize;
final char[] newBuffer=new char[newSize];
shiftBuffer(buffer,newBuffer);
buffer=newBuffer;
}
private void discardUsedText() throws IOException {
if (minRequiredBufferBegin==bufferBegin) return;
shiftBuffer(buffer,buffer);
}
private void shiftBuffer(final char[] fromBuffer, final char[] toBuffer) throws IOException {
final int shift=minRequiredBufferBegin-bufferBegin;
final int usedBufferLength=readerPos-bufferBegin;
for (int i=shift; i<usedBufferLength; i++) toBuffer[i-shift]=fromBuffer[i];
bufferBegin=minRequiredBufferBegin;
while (readerPos<bufferBegin) {
final long charCount=reader.skip(bufferBegin-readerPos);
if (charCount==0) {
end=readerPos;
break;
}
readerPos+=charCount;
}
}
String getCurrentBufferContent() {
return substring(bufferBegin,Math.min(end,readerPos));
}
private static char[] toCharArray(final CharSequence text) {
if (text instanceof String) return ((String)text).toCharArray();
final char[] charArray=new char[text.length()];
for (int i=0; i<charArray.length; i++) charArray[i]=text.charAt(i);
return charArray;
}
}