/*** ** @(#) TradeCard.com 1.0 ** ** Copyright (c) 1999 TradeCard, Inc. All Rights Reserved. ** ** ** THIS COMPUTER SOFTWARE IS THE PROPERTY OF TradeCard, Inc. ** ** Permission is granted to use this software as specified by the TradeCard ** COMMERCIAL LICENSE AGREEMENT. You may use this software only for ** commercial purposes, as specified in the details of the license. ** TRADECARD SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY ** THE LICENSEE AS A RESULT OF USING OR MODIFYING THIS SOFTWARE IN ANY WAY. ** ** YOU MAY NOT DISTRIBUTE ANY SOURCE CODE OR OBJECT CODE FROM THE TradeCard.com ** TOOLKIT AT ANY TIME. VIOLATORS WILL BE PROSECUTED TO THE FULLEST EXTENT ** OF UNITED STATES LAW. ** ** @version 1.0 ** @author Copyright (c) 1999 TradeCard, Inc. All Rights Reserved. ** **/ package com.partydj.util; import java.io.*; import java.nio.*; import java.nio.charset.*; import java.util.*; import java.util.regex.*; /** * A ChunkedCharBuffer operates similarly to a java.lang.StringBuffer. However, * it requires less contiguous memory to manage large datasets by aligning * chunks of contiguous memory to maintain the whole buffer. ChunkedCharBuffers * are NOT THREAD SAFE, for performance reasons the buffer assumes that it is * being operated on with a single thread */ public class ChunkedCharBuffer implements CharSequence, Appendable { public static final int DEFAULT_CHUNK_SIZE = 4096; public static final int DEFAULT_NUMBER_OF_CHUNKS = 16; public static final int MIN_CHUNK_SIZE = 64; public static final int MIN_NUMBER_OF_CHUNKS = 1; public static final int MAX_RECOMMENDED_CHUNK_SIZE = DEFAULT_CHUNK_SIZE * 16; int chunkSize; char chunks[][]; int lastChunk; int firstFree; int hash = 0; boolean copyOnWrite = false; //default byte encoding - a string because charset is not serializable String charsetName = CharsetConstants.UTF8.name(); /** * Static Builders offer optimizations based on initial input */ public static ChunkedCharBuffer of(String startsWith) { ChunkedCharBuffer buffer; if (startsWith == null) { buffer = new ChunkedCharBuffer(); } else { int chunkSize = Math.min(startsWith.length(), DEFAULT_CHUNK_SIZE * 8); buffer = new ChunkedCharBuffer(chunkSize, Math.max((startsWith.length() / chunkSize) + 1, DEFAULT_NUMBER_OF_CHUNKS)); buffer.append(startsWith); } return buffer; } /** * Static Builders offer optimizations based on initial input */ public static ChunkedCharBuffer immutableOf(String startsWith) { ChunkedCharBuffer buffer; if (startsWith == null) { buffer = new ChunkedCharBuffer(); } else { int chunkSize = Math.min(startsWith.length(), DEFAULT_CHUNK_SIZE * 8); buffer = new ChunkedCharBuffer(chunkSize, Math.max((startsWith.length() / chunkSize) + 1, DEFAULT_NUMBER_OF_CHUNKS)); buffer.append(startsWith); } return buffer.getImmutableHandle(); } /** * Create a new ChunkedCharBuffer with the passed incremental chunkSize and * number of starting chunks. The number of starting chunks is mearly the * size of the chunk holder array - which will have to grow whenever * chunkSize * startingChunks of data is presented. At that time the growth * is simply of the main array * @see init each constructor must call init once */ public ChunkedCharBuffer(int chunkSize, int startingChunks) { init(chunkSize, startingChunks); } public ChunkedCharBuffer(int chunkSize) { init(chunkSize, DEFAULT_NUMBER_OF_CHUNKS); } public ChunkedCharBuffer() { init(DEFAULT_CHUNK_SIZE, DEFAULT_NUMBER_OF_CHUNKS); } public ChunkedCharBuffer(String startsWith) { if (startsWith == null) { init(DEFAULT_CHUNK_SIZE, DEFAULT_NUMBER_OF_CHUNKS); } else { int chunkSize = Math.min(startsWith.length(), DEFAULT_CHUNK_SIZE * 8); init(chunkSize, Math.max((startsWith.length() / chunkSize) + 1, DEFAULT_NUMBER_OF_CHUNKS)); append(startsWith); } } private ChunkedCharBuffer(boolean uninitialized) { //inaccessible non-initializing constructor for access from immutable subclass } /** * Init must be called by each constructor to initialize the structures */ private void init(int chunkSize, int startingChunks) { //rather than assertions just fix bad arguments startingChunks = Math.max(startingChunks, MIN_NUMBER_OF_CHUNKS); chunkSize = Math.max(chunkSize, MIN_CHUNK_SIZE); lastChunk = 0; firstFree = 0; chunks = new char[startingChunks][]; this.chunkSize = chunkSize; this.chunks[0] = new char[chunkSize]; copyOnWrite = false; } public final int size() { return length(); } @Override public final int length() { return (lastChunk * chunkSize) + firstFree; } /** * @return true if the current length is zero */ public final boolean isEmpty() { return firstFree == 0 && lastChunk == 0; } public final int getChunkSize() { return chunkSize; } public final Charset getCharset() { return Charset.forName(charsetName); } //@note this method modifies the buffer public void setCharset(Charset charset) { beforeMod(false); if (charset != null) { this.charsetName = charset.name(); } } /** * Append a single char to the buffer - if there is space on the current * chunk then whoopee this is easy - else regrow to make room */ @Override public ChunkedCharBuffer append(char value) { beforeMod(true); chunks[lastChunk][firstFree++] = value; return this; } public ChunkedCharBuffer append(int i) { return append(String.valueOf(i)); } /** * Convenience method to append a StringBuilder to the buffer */ public ChunkedCharBuffer append(StringBuilder buf) { int remaining = buf.length(); if (buf != null && remaining > 0) { int available = 0; int copyfrom = 0; while (remaining > 0) { available = beforeMod(true); if (available > remaining) { available = remaining; } buf.getChars(copyfrom, copyfrom + available, chunks[lastChunk], firstFree); remaining -= available; copyfrom += available; firstFree += available; } } return this; } /** * Convenience method to append a StringBuffer to the buffer */ public ChunkedCharBuffer append(StringBuffer buf) { int remaining = buf.length(); if (buf != null && remaining > 0) { int available = 0; int copyfrom = 0; while (remaining > 0) { available = beforeMod(true); if (available > remaining) { available = remaining; } buf.getChars(copyfrom, copyfrom + available, chunks[lastChunk], firstFree); remaining -= available; copyfrom += available; firstFree += available; } } return this; } /** * Convenience method to append a java.nio CharBuffer to the buffer */ public ChunkedCharBuffer append(CharBuffer buf) { if (buf != null) { int remaining = buf.length(); int available = 0; while (remaining > 0) { available = beforeMod(true); if (available > remaining) { available = remaining; } buf.get(chunks[lastChunk], firstFree, available); remaining -= available; firstFree += available; } } return this; } /** * Append a string to the buffer - try to disassemble the string in the most * optimal size (==chunkSize) and reassemble onto the buffer */ public ChunkedCharBuffer append(String value) { if (value != null && value.length() > 0) { return append(value, 0, value.length()); } else { return this; } } /** * Append a string to the buffer - try to disassemble the string in the most * optimal size (==chunkSize) and reassemble onto the buffer */ public ChunkedCharBuffer append(String value, int copyfrom, int strlen) { if (value != null && value.length() > 0) { int available = 0; while (strlen > 0) { available = beforeMod(true); if (available > strlen) { available = strlen; } value.getChars(copyfrom, copyfrom + available, chunks[lastChunk], firstFree); strlen -= available; copyfrom += available; firstFree += available; } } return this; } /** * Appends the specified character sequence to this buffer */ @Override public ChunkedCharBuffer append(CharSequence csq){ return append(csq, 0, csq.length()); } /** * Appends a subsequence of the specified character sequence to this buffer */ @Override public ChunkedCharBuffer append(CharSequence csq, int start, int end) { int seqlen = csq.length(); //assertions as defined in java.lang.Appendable if (start < 0 || end < 0 || start > end || end > seqlen) { throw new IndexOutOfBoundsException("Assertions for start and end failed. Start:" + start + ", End:" + end); } int inputlen = end - start; if (inputlen > 0) { //chars remaining in current chunk int available = chunkSize - firstFree; //chars which will not fit in the current chunk int overflow = inputlen - available; //the number of additional chunks that will be required for the overflow int addlchunks = overflow > 0 ? (chunkSize % overflow) + 1 : 0; //the index into the sequence int seqidx = start; //prepare for mod beforeMod(false); //first pass fill the remainder of the lastChunk for (; seqidx < available && seqidx < end; seqidx++) { chunks[lastChunk][firstFree++] = csq.charAt(seqidx); } //second, work over the even sized chunks to complete the overflow for (int i = 0; i < addlchunks; i++) { beforeMod(true); for (int j = 0; j < chunkSize && overflow > 0; j++) { overflow--; chunks[lastChunk][firstFree++] = csq.charAt(seqidx++); } } } return this; } /** * Convenience method to append an entire char[] area to the buffer */ public ChunkedCharBuffer append(char chars[]) { if (chars != null && chars.length > 0) { return append(chars, 0, chars.length); } else { return this; } } /** * Append a char[] area to the buffer - using System.arrayCopy we can move * the array into the chunks directly */ public ChunkedCharBuffer append(char chars[], int start, int length) { if (chars != null && chars.length > 0) { int copyfrom = start; int available = 0; while (length > 0) { available = beforeMod(true); if (available > length) { available = length; } System.arraycopy(chars, copyfrom, chunks[lastChunk], firstFree, available); length -= available; copyfrom += available; firstFree += available; } } return this; } /** * Read the contents of the reader into the buffer */ public ChunkedCharBuffer append(Reader reader) throws IOException { if (reader != null) { int charsRead = 0; int available = 0; while (charsRead != -1) { available = beforeMod(true); charsRead = reader.read(chunks[lastChunk], firstFree, available); if (charsRead > 0) { firstFree += charsRead; } } } return this; } /** * Read the contents of the passed ChunkedCharBuffer into this buffer */ public ChunkedCharBuffer append(ChunkedCharBuffer buffer) { beforeMod(false); buffer.fillChunkedCharBuffer(this, 0, 0, buffer.length()); return this; } /** * Internal method which must be called before any modification is made to the buffer * * @param expand, grow the capacity of the buffer as necessary to hold at least one more char * @return the number of chars available for writing on the lastChunk, i.e. the current capacity */ private int beforeMod(boolean expand) { //set the cached hash to zero hash = 0; //if copyOnWrite has been set, issue copy, then unset the indicator if (copyOnWrite) { int chunkcount = lastChunk + 1; char[][] chunksCopy = new char[chunkcount][]; for (int i = 0; i < chunkcount; i++) { char[] copy = new char[chunkSize]; System.arraycopy(chunks[i], 0, copy, 0, chunkSize); chunksCopy[i] = copy; } //reassign the chunks variable chunks = chunksCopy; copyOnWrite = false; } //expand the capacity of the buffer if requested if (expand) { //the amount of space available in the last chunk int available = chunkSize - firstFree; if (available == 0) { if (lastChunk + 1 == chunks.length) { //reconsider main array growth strategy int len = chunks.length; char newarray[][] = new char[(len + 1) * 2][]; System.arraycopy(chunks, 0, newarray, 0, len); chunks = newarray; } //add a data chunk chunks[++lastChunk] = new char[chunkSize]; available = chunkSize; firstFree = 0; } return available; } else { return chunkSize - firstFree; } } /** * Return an immutable handle to the data in this buffer. This call is not inherently expensive, * as it does not require a copy of the data. However, after this call, this buffer is put into * CopyOnWrite mode. In this mode, any future modification to the buffer will first trigger a copy * and then apply the update. * @see beforeModification */ public ChunkedCharBuffer getImmutableHandle() { copyOnWrite = true; return new ImmutableChunkedCharBuffer(this); } /** * Allocate a substring from the chunked buffer to the end * @param start - the starting index */ public String substring(int start) { return substring(start, length()); } /** * Allocate a substring from the chunked buffer * @param start - the starting index * @param end - the end index */ public String substring(int start, int end) { if (start < 0) { throw new StringIndexOutOfBoundsException(start); } if (end > length()) { throw new StringIndexOutOfBoundsException(end); } if (start > end) { throw new StringIndexOutOfBoundsException(end - start); } return fillStringBuilder(new StringBuilder(end - start), start / chunkSize, start % chunkSize, end - start).toString(); } /** * Allocate a ChunkedCharBuffer from the chunked buffer to the end * @param start - the starting index */ public ChunkedCharBuffer subbuffer(int start) { return subbuffer(start, length()); } /** * Allocate a ChunkedCharBuffer from the chunked buffer * @param start - the starting index * @param end - the end index */ public ChunkedCharBuffer subbuffer(int start, int end) { if (start < 0) { throw new StringIndexOutOfBoundsException(start); } if (end > length()) { throw new StringIndexOutOfBoundsException(end); } if (start > end) { throw new StringIndexOutOfBoundsException(end - start); } ChunkedCharBuffer subBuffer = new ChunkedCharBuffer(chunkSize, (end - start) / chunkSize); subBuffer.setCharset(getCharset()); return fillChunkedCharBuffer(subBuffer, start / chunkSize, start % chunkSize, end - start); } /** * This sequence is a read only view into this ChunkedCharBuffer changes to the buffer are * reflected in the sequence, and may damage the integrity of the sequence, for a copy of * the buffer use substring * @see java.lang.CharSequence#subSequence(int, int) */ @Override public CharSequence subSequence(final int start, final int end) { final ChunkedCharBuffer buffer = this; if (start < 0) { throw new StringIndexOutOfBoundsException(start); } if (end > length()) { throw new StringIndexOutOfBoundsException(end); } if (start > end) { throw new StringIndexOutOfBoundsException(end - start); } //return a read only view into the chunked char buffer return new CharSequence() { @Override public int length() { return end - start; } @Override public char charAt(int index) { return buffer.charAt(index + start); } @Override public CharSequence subSequence(int subStart, int subEnd) { return buffer.subSequence(start + subStart, start + subEnd); } @Override public String toString() { return buffer.substring(start, end); } @Override public int hashCode() { return buffer.hashCode(); } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } try { CharSequence other = (CharSequence)obj; int length = length(); if (other.length() != length) { return false; } for (int i = 0; i < length; i++) { if (charAt(i) != other.charAt(i)) { return false; } } } catch (ClassCastException e) { return false; } return true; } }; } /** * Create a new string from the entire chunked buffer */ @Override public String toString() { int length = length(); return fillStringBuilder(new StringBuilder(length), 0, 0, length).toString(); } /** * Write the contents of the buffer to the writer, defensively protect the buffer * when copyOnWrite semantics are enabled */ public void writeTo(Writer writer) throws IOException { int length = length(); int stopChunk = length / chunkSize; int stopColumn = length % chunkSize; if (copyOnWrite) { //making a working buffer to protect the current data state char[] workbuffer = new char[chunkSize]; for (int i = 0; i < stopChunk; i++) { System.arraycopy(chunks[i], 0, workbuffer, 0, chunkSize); writer.write(workbuffer, 0, chunkSize); } if (stopColumn > 0) { System.arraycopy(chunks[stopChunk], 0, workbuffer, 0, stopColumn); writer.write(workbuffer, 0, stopColumn); } } else { //risky approach, give away our data arrays for (int i = 0; i < stopChunk; i++) { writer.write(chunks[i], 0, chunkSize); } if (stopColumn > 0) { writer.write(chunks[stopChunk], 0, stopColumn); } } } @Override public char charAt(int pos) { if ((pos < 0) || (pos >= length())) { throw new StringIndexOutOfBoundsException(pos); } int startChunk = pos / chunkSize; return chunks[startChunk][pos % chunkSize]; } /** * Returns this after resulting replacing all occurrences of oldChar in this * buffer with newChar. */ public ChunkedCharBuffer replace(char oldChar, char newChar) { beforeMod(false); int length = length(); for (int pos = 0; pos < length; pos++) { int startChunk = pos / chunkSize; if (chunks[startChunk][pos % chunkSize] == oldChar) { chunks[startChunk][pos % chunkSize] = newChar; } } return this; } /** * Replaces the character at the specific location with the given newChar. * @note this method modifies the buffer */ public ChunkedCharBuffer replaceCharAt(int pos, char newChar) { beforeMod(false); if (pos < 0 || pos > length()) { if ((pos < 0) || (pos >= length())) { throw new StringIndexOutOfBoundsException(pos); } } chunks[pos / chunkSize][pos % chunkSize] = newChar; return this; } /** * Return a new ChunkedCharBuffer which contains the same data as this, but reversed */ public ChunkedCharBuffer reverse() { ChunkedCharBuffer reversedBuffer = new ChunkedCharBuffer(this.chunkSize, this.chunks.length); for (int i = length() - 1; i >= 0; i--) { reversedBuffer.append(charAt(i)); } return reversedBuffer; } /** * Create a reader with access to the data in the buffer, this allows classes * to read directly out of the buffer. This reader will only read within the * bounds specified. * @param beginIndex inclusive * @param endIndex exclusive */ public Reader toRangeReader(final int beginIndex, final int endIndex) { if (beginIndex < 0) { throw new StringIndexOutOfBoundsException(beginIndex); } if (endIndex > length()) { throw new StringIndexOutOfBoundsException("end index is greater than length: " + endIndex + " > " + length()); } if (beginIndex > endIndex) { throw new StringIndexOutOfBoundsException("begin index is greater than endindex: " + beginIndex + " > " + endIndex); } return new Reader() { int index = beginIndex; int marked = 0; boolean closed = false; boolean eos = false; private void ensureOpen() throws IOException { if (closed) { throw new IOException("ChunkedCharBuffer Reader Closed by request"); } } @Override public int read() throws IOException { ensureOpen(); //already reached end-of-stream if (eos) { return -1; } int end = endIndex; //indicate the end-of-stream if (index >= end) { eos = true; return -1; } char cbuf[] = new char[1]; getChars(index / chunkSize, index % chunkSize, 1, cbuf, 0); index++; return cbuf[0]; } @Override public int read(char cbuf[]) throws IOException { return read(cbuf, 0, cbuf.length); } @Override public int read(char cbuf[], int off, int len) throws IOException { ensureOpen(); //already reached end-of-stream if (eos) { return -1; } int end = endIndex; //indicate the end-of-stream if (index >= end) { eos = true; return -1; } len = Math.min(len, end - index); getChars(index / chunkSize, index % chunkSize, len, cbuf, off); index += len; return len; } @Override public boolean ready() throws IOException { ensureOpen(); return true; } /** * Closing a previously closed stream has no effect. * @see java.io.Reader */ @Override public void close() throws IOException { //ensureOpen(); closed = true; } @Override public void reset() throws IOException { ensureOpen(); index = marked; eos = false; } @Override public void mark(int readAheadLimit) throws IOException { ensureOpen(); marked = index; } @Override public boolean markSupported() { return true; } public long skip(int ns) { if (index >= endIndex) { return 0; } long n = Math.min(endIndex - index, ns); index += n; return n; } @Override public String toString() { return String.format("ChunkedCharBuffer.RangeReader[%d,%d]", Integer.valueOf(beginIndex), Integer.valueOf(endIndex)); } }; } /** * Create a reader with access to the data in the buffer, this allows classes * to read directly out of the buffer */ public Reader toReader() { return toRangeReader(0, length()); } /** * Create a Writer with direct write access to the data in the buffer, this allows classes * to write directly into the buffer via the Writer interface */ public Writer toWriter() { return new Writer(this) { boolean closed = false; @Override public void write(int c) throws IOException { ensureOpen(); super.write(c); } @Override public void write(char cbuf[], int off, int len) throws IOException { ensureOpen(); ChunkedCharBuffer.this.append(cbuf, off, len); } @Override public void write(String str, int off, int len) throws IOException { ensureOpen(); ChunkedCharBuffer.this.append(str, off, len); } @Override public void close() throws IOException { closed = true; } @Override public void flush() throws IOException { //no-op } private void ensureOpen() throws IOException { if (closed) { throw new IOException("ChunkedCharBuffer Writer closed by request"); } } }; } /** * Create an InputStream with access to the data in the buffer, this allows * classes to read directly out of the buffer */ public InputStream toInputStream() throws IOException { return toInputStream(Charset.forName(charsetName)); } /** * Create an InputStream with access to the data in the buffer, this allows * classes to read directly out of the buffer */ public InputStream toInputStream(Charset charset) throws IOException { return toChunkedByteBuffer(Charset.forName(charsetName)).toInputStream(); } /** * Create a copy of the current data converted to a ChunkedByteBuffer */ public ChunkedByteBuffer toChunkedByteBuffer(Charset charset) throws CharacterCodingException, UnsupportedEncodingException { //configure the encoder the way that string encoder does CharsetEncoder encoder = charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE); float avgBytesPerChar = encoder.averageBytesPerChar(); ChunkedByteBuffer cbb = new ChunkedByteBuffer((int)(chunkSize * avgBytesPerChar), chunks.length); cbb.setCharset(charset); int length = length(); int stopChunk = length / chunkSize; int stopColumn = length % chunkSize; //encoding per char array chunk is much faster than the CharSequence interface to CharBuffer for (int i = 0; i < stopChunk; i++) { ByteBuffer bb = encoder.encode(CharBuffer.wrap(chunks[i], 0, chunkSize)); cbb.append(bb); } if (stopColumn > 0) { ByteBuffer bb = encoder.encode(CharBuffer.wrap(chunks[stopChunk], 0, stopColumn)); cbb.append(bb); } return cbb; } /** * Tests if this buffer starts with the specified prefix beginning * a specified index. * @param prefix the prefix * @param toffset where to begin looking in the string */ public boolean startsWith(String prefix, int index) { int plen = prefix.length(); int count = 0; if ((index < 0) || (index > (length() - plen))) { return false; } while (--plen >= 0) { if (unsafeCharAt(index++) != prefix.charAt(count++)) { return false; } } return true; } /** * Tests if this buffer starts with the specified prefix */ public boolean startsWith(String prefix) { return startsWith(prefix, 0); } /** * Returns the index within this buffer of the first occurrence of the * specified substring, starting at the begining of this buffer * @param queryStr the substring to search for. */ public int indexOf(String queryStr) { return indexOf(queryStr, 0); } /** * This version of the indexOf method is based on an algorithm by Thomas Wang, * details can be found at http://www.concentric.net/~Ttwang/tech/stringscan.htm * The algorithm should be faster than a simple left to right scanning approach. * * The key to this faster scanning algorithm is to look at the last character of the pattern string first. * If the last pattern character matches, then we continue to search for pattern characters from left to right, * until the remaining pattern characters are matched. The algorithm skips parts of the string when it determines * that no match could possibly be found over that stretch. * * Two variables help speed the algorithm: * @var cache, which is a number representing a logical OR of all the queryStr characters * @var md2, is equal to the length of the queryStr except when the last char of the queryStr exists elsewhere in the * queryStr, in which case the value is the number of characters from the end of the string to that char * * The cache is used for checking whether a character could possibly be in the queryStr. By logical ANDing on any * char you can confirm if that character is possible/impossible as a component of a matching substring * * The md2 is used as the base number for how many characters can be skipped by the algorithm moving past a last char * match * */ public int indexOf(String queryStr, int fromIndex) { //reuseable lens int querylen = queryStr.length(); int length = length(); //simple assertions if (fromIndex >= length) { return (querylen == 0 ? length : -1); } if (fromIndex < 0) { fromIndex = 0; } if (querylen == 0) { return fromIndex; } //working vars long cache = 0; int md2 = querylen; char lastQueryChar = queryStr.charAt(querylen - 1); for (int i = 0; i < querylen; i++) { char c = queryStr.charAt(i); //initialize the cache cache |= 1L << (c & 63); //calculate the md2, ignoring the last char if (lastQueryChar == c && i < (querylen - 1)) { md2 = querylen - (i + 1); } } //whether the md2 is less than the query len boolean shortmd2 = md2 < querylen; //the current chunk int chunk = ((querylen - 1) + fromIndex) / chunkSize; //index into the current chunk int i = ((querylen - 1) + fromIndex) % chunkSize; //overall index in the buffer int index = i + (chunk * chunkSize); //character used for checking char c = Character.MIN_VALUE; //how much to skip each iteration int skip = md2; //chunk loop while (chunk <= lastChunk) { //scan loop scan_loop: while (i < chunkSize) { c = chunks[chunk][i]; if (lastQueryChar == c) { //last character matched, try to match the rest for (int j = 0; j < (querylen - 1); j++) { //$TS optimize me, it isn't necessary to do the division to find the chunk each time c = unsafeCharAt((index - querylen) + j + 1); if (queryStr.charAt(j) != c) { skip = md2; if (shortmd2) { int altskip = 1; //see if the char is "impossible" if (((cache & (1L << (c & 63))) == 0L)) { altskip = j + 1; } //skip the max of md2 and impossible calc skip = Math.max(md2, altskip); } //skip the indicated chars i += skip; index += skip; continue scan_loop; } } //full match return index - querylen + 1; } else if ((cache & (1L << (c & 63))) == 0L) { //the char is "impossible" i += querylen; index += querylen; } else { i += 1; index += 1; } } //advance the chunk chunk = (index / chunkSize); i = i % chunkSize; } //return not found return -1; } /** * Returns the index within this string of the first occurrence of the * specified character, starting the search at the begining of the buffer */ public final int indexOf(char ch) { return indexOf(ch, 0); } /** * Returns the index within this string of the first occurrence of the * specified character, starting the search at the specified index. There is * no restriction on the value of fromIndex. If it is negative, it has the * same effect as if it were zero: this entire buffer may be searched. If it * is greater than the length of this buffer, it has the same effect as if it * were equal to the length of this string: -1 is returned. */ public final int indexOf(char ch, int fromIndex) { int length = length(); if (fromIndex < 0) { fromIndex = 0; } else if (fromIndex >= length) { return -1; } int chunk = fromIndex / chunkSize; int i = fromIndex % chunkSize; int index = i + (chunk * chunkSize); for (; chunk <= lastChunk; chunk++) { for (; index < length && i < chunkSize; i++, index++) { if (chunks[chunk][i] == ch) { return index; } } i = 0; } return -1; } /** * Returns the index within this string of the last occurrence of the * specified substring. */ public final int lastIndexOf(String queryStr) { return lastIndexOf(queryStr, length()); } /** * Returns the index within this string of the last occurrence of the * specified substring. There is no restriction on the value of fromIndex. If * it is negative, it has the same effect as if it were zero: this entire * buffer may be searched. If it is greater than the length of this buffer, * it has the same effect as if it were equal to the length of this string: * -1 is returned. When there is time to do so, further research should be * made into enhancing the performance of this method, current seek time is * nearly 4x times the equvilent call on String. The time is entirely spent * dealing with additional bounds checking required by the simple * implementation, perhaps an enhanced version could be more aggressive about * bounds checking, or could use a more advanced search algorithm such as * Boyer-Moore or Knuth-Morris-Pratt * @param queryStr the substring to search for * @param fromIndex the index to start the search from */ public final int lastIndexOf(String queryStr, int fromIndex) { int querylen = queryStr.length(); int length = length(); if (fromIndex < 0) { return -1; } if (fromIndex > (length - querylen)) { fromIndex = length - querylen; } if (querylen == 0) { return fromIndex; } int min = querylen - 1; char lastQueryChar = queryStr.charAt(min); fromIndex += min; int chunk = fromIndex / chunkSize; int i = fromIndex % chunkSize; int index = i + (chunk * chunkSize); //iterating chunks while (chunk >= 0) { //look for first character while (index >= min && i >= 0 && chunks[chunk][i] != lastQueryChar) { i--; index--; } if (index < min) { return -1; } if (i == -1) { //wrap to the next chunk i = chunkSize - 1; chunk--; } else { //we found a first char match int j = i - 1; if (i == 0) { j = chunkSize - 1; i = chunkSize - 1; chunk--; } else { i--; } int followIndex = index - 1; int endIndex = followIndex - (querylen - 1); int k = querylen - 2; int followChunk = followIndex / chunkSize; while (followIndex >= endIndex) { while (j >= 0 && k >= 0 && followIndex >= 0) { followIndex--; if (chunks[followChunk][j--] != queryStr.charAt(k)) { //look for first char again break; } else { k--; } } if (k == -1) { //full match return endIndex + 1; } if (j != -1) { break; } //continue to the next chunk j = chunkSize - 1; followChunk--; } //we are pointing at a good i index--; } } return -1; } /** * Returns a java.util.regex.Matcher for this buffer. * @param regex the delimiting regular expression * @return Matcher for the matches against the buffer * @throws PatternSyntaxException if the regular expression's syntax is * invalid * @see java.util.regex.Pattern * @see java.util.regex.Matcher */ public final Matcher matcher(String regex) { return Pattern.compile(regex).matcher(this); } /** * The array returned by this method contains each substring of this buffer * that is terminated by another substring that matches the given expression * or is terminated by the end of the buffer. The substrings in the array are * in the order in which they occur in this buffer. If the expression does * not match any part of the input then the resulting array has just one * element, namely this string. * @param regex the delimiting regular expression * @return the array of strings computed by splitting this string around * matches of the given regular expression * @throws PatternSyntaxException if the regular expression's syntax is * invalid * @see java.util.regex.Pattern */ public final String[] split(String regex) { return Pattern.compile(regex).split(this, 0); } /** * The array returned by this method contains each substring of this buffer * that is terminated by another substring that matches the given expression * or is terminated by the end of the buffer. The substrings in the array are * in the order in which they occur in this buffer. If the expression does * not match any part of the input then the resulting array has just one * element, namely this string. * @param regex the delimiting regular expression * @param limit the result threshold, as described above * @return the array of strings computed by splitting this string around * matches of the given regular expression * @throws PatternSyntaxException if the regular expression's syntax is * invalid * @see java.util.regex.Pattern */ public final String[] split(String regex, int limit) { return Pattern.compile(regex).split(this, limit); } /** * Valid implmentation of the .equals method for ChunkedCharBuffer */ @Override public final boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } try { ChunkedCharBuffer other = (ChunkedCharBuffer)obj; int length = length(); if (other.length() != length) { return false; } int slowCheckFrom = 0; //we can optmize a lot if our chunksize is the same if (chunkSize == other.chunkSize) { for (int i = 0; i < lastChunk; i++) { if (!Arrays.equals(chunks[i], other.chunks[i])) { return false; } } slowCheckFrom = lastChunk * chunkSize; } //this method of checking is much slower for (int i = slowCheckFrom; i < length; i++) { if (unsafeCharAt(i) != other.unsafeCharAt(i)) { return false; } } } catch (ClassCastException e) { return false; } return true; } /** * Fill the dest array with content from the buffer * @param srcBegin start index (inclusive) in the buffer * @param srcEnd end index (exclusive) in the buffer * @param dest the array to fill * @param destBegin start index in the destination array * @throws ArrayIndexOutOfBounds exception if the dest array cannot hold the requested data */ public final void getChars(int srcBegin, int srcEnd, char[] dest, int destBegin) throws ArrayIndexOutOfBoundsException { if (srcBegin < 0) { throw new StringIndexOutOfBoundsException(srcBegin); } if (srcEnd > length()) { throw new StringIndexOutOfBoundsException(srcEnd); } if (srcBegin > srcEnd) { throw new StringIndexOutOfBoundsException(srcEnd - srcBegin); } getChars(srcBegin / chunkSize, srcBegin % chunkSize, srcEnd - srcBegin, dest, destBegin); } @Override public final int hashCode() { int h = hash; if (h == 0) { for (int i = 0; i < lastChunk - 1; i++) { for (char c : chunks[i]) { h = 31 * h + c; } } for (int i = 0; i < firstFree; i++) { char c = chunks[lastChunk][i]; h = 31 * h + c; } hash = h; } return h; } /** * CharAt call which does no bounds checking, for internal calls that ensure safe boundary processing */ private char unsafeCharAt(int pos) { return chunks[pos / chunkSize][pos % chunkSize]; } private void getChars(int startChunk, int startColumn, int length, char[] dest, int destBegin) throws ArrayIndexOutOfBoundsException { int stop = (startChunk * chunkSize) + startColumn + length; int stopChunk = stop / chunkSize; int stopColumn = stop % chunkSize; for (int i = startChunk; i < stopChunk; i++) { int size = chunkSize - startColumn; System.arraycopy(chunks[i], startColumn, dest, destBegin, size); destBegin += size; startColumn = 0; } if (stopColumn > 0) { System.arraycopy(chunks[stopChunk], startColumn, dest, destBegin, stopColumn - startColumn); } } /** * Write the chunk data into the string bufffer, assume the buffer is well * allocated */ private StringBuilder fillStringBuilder(StringBuilder sb, int startChunk, int startColumn, int length) { int stop = (startChunk * chunkSize) + startColumn + length; int stopChunk = stop / chunkSize; int stopColumn = stop % chunkSize; for (int i = startChunk; i < stopChunk; i++) { sb.append(chunks[i], startColumn, chunkSize - startColumn); startColumn = 0; } if (stopColumn > 0) { sb.append(chunks[stopChunk], startColumn, stopColumn - startColumn); } return sb; } /** * Write the chunk data into the ChunkedCharBuffer, assume the buffer is well * allocated */ private ChunkedCharBuffer fillChunkedCharBuffer(ChunkedCharBuffer ccb, int startChunk, int startColumn, int length) { int stop = (startChunk * chunkSize) + startColumn + length; int stopChunk = stop / chunkSize; int stopColumn = stop % chunkSize; for (int i = startChunk; i < stopChunk; i++) { ccb.append(chunks[i], startColumn, chunkSize - startColumn); startColumn = 0; } if (stopColumn > 0) { ccb.append(chunks[stopChunk], startColumn, stopColumn - startColumn); } return ccb; } /** * All modifier methods of ChunkedCharBuffer MUST be implemented here to protect immutability */ private class ImmutableChunkedCharBuffer extends ChunkedCharBuffer { public ImmutableChunkedCharBuffer(ChunkedCharBuffer source) { super(true); //copy initialization chunkSize = source.chunkSize; chunks = source.chunks; lastChunk = source.lastChunk; firstFree = source.firstFree; hash = source.hash; copyOnWrite = true; charsetName = source.charsetName; } public ImmutableChunkedCharBuffer() { //for serialization only (must be public) } public ChunkedCharBuffer unmodifiable() { throw new UnsupportedOperationException("Cannot change immutable chunked char buffer"); } @Override public void setCharset(Charset charset) { unmodifiable();} @Override public ChunkedCharBuffer append(char value) { return unmodifiable();} @Override public ChunkedCharBuffer append(char[] value) { return unmodifiable();} @Override public ChunkedCharBuffer append(char[] chars, int start, int length) { return unmodifiable();} @Override public ChunkedCharBuffer append(CharBuffer buf) { return unmodifiable();} @Override public ChunkedCharBuffer append(CharSequence csq) { return unmodifiable();} @Override public ChunkedCharBuffer append(CharSequence csq, int start, int end) { return unmodifiable();} @Override public ChunkedCharBuffer append(ChunkedCharBuffer buffer) { return unmodifiable();} @Override public ChunkedCharBuffer append(int i) { return unmodifiable();} @Override public ChunkedCharBuffer append(Reader reader) throws IOException { return unmodifiable();} @Override public ChunkedCharBuffer append(String value) { return unmodifiable();} @Override public ChunkedCharBuffer append(String value, int copyfrom, int strlen) { return unmodifiable();} @Override public ChunkedCharBuffer append(StringBuilder buf) { return unmodifiable();} @Override public ChunkedCharBuffer append(StringBuffer buf) { return unmodifiable();} @Override public ChunkedCharBuffer replace(char oldChar, char newChar) { return unmodifiable();} @Override public ChunkedCharBuffer replaceCharAt(int pos, char newChar) { return unmodifiable();} @Override public ChunkedCharBuffer getImmutableHandle() { return this; } } }