UCSReader.java example

Explorer
geoserver-master
- doc
  - en
    - developer
      - source
        programming-guide
        ows-services
        hello
        src
        main
        java
        HelloWorld.java
        web-ui
        ComponentInfo.java
    - user
      - source
        security
        tutorials
        ldap
        acme-ldap
        src
        main
        java
        org
        acme
        Ldap.java
- src
/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

//package org.apache.xerces.impl.io;
package org.geoserver.ows.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;


/**
 * Reader for UCS-2 and UCS-4 encodings.
 * (more precisely ISO-10646-UCS-(2|4) encodings).
 *
 * This variant is modified to handle supplementary Unicode code points
 * correctly. Though this required a lot of new code and definitely
 * reduced the perfomance comparing to original version. I tried my best
 * to preserve exsiting code and comments whenever it was possible.
 * I performed some basic tests, but not too thorough ones, so
 * some bugs may still nest in the code. -AK
 *
 * @author Neil Graham, IBM
 *
 * @version $Id$
 */
public class UCSReader extends Reader {
    //
    // Constants
    //

    /**
     * Default byte buffer size (8192, larger than that of ASCIIReader
     * since it's reasonable to surmise that the average UCS-4-encoded
     * file should be 4 times as large as the average ASCII-encoded file).
     */
    public static final int DEFAULT_BUFFER_SIZE = 8192;

    /**
     * Starting size of the internal char buffer. Internal char buffer is
     * maintained to hold excess chars that may left from previous read
     * operation when working with UCS-4 data (never used for UCS-2).
     */
    public static final int CHAR_BUFFER_INITIAL_SIZE = 1024;
    public static final short UCS2LE = 1;
    public static final short UCS2BE = 2;
    public static final short UCS4LE = 4;
    public static final short UCS4BE = 8;

    /**
     * The minimum value of a supplementary code point.
     */
    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;

    /**
     * The minimum value of a Unicode code point.
     */
    public static final int MIN_CODE_POINT = 0x000000;

    /**
     * The maximum value of a Unicode code point.
     */
    public static final int MAX_CODE_POINT = 0x10ffff;

    //
    // Data
    //

    /** Input stream. */
    protected InputStream fInputStream;

    /** Byte buffer. */
    protected byte[] fBuffer;

    /** what kind of data we're dealing with */
    protected short fEncoding;

    /**
     * Stores aforeread or "excess" characters that may appear during
     * <code>read</code> methods invocation due to the fact that one input
     * UCS-4 supplementary character results in two output Java
     * <code>char</code>`s - high surrogate and low surrogate code units.
     * Because of that, if <code>read()</code> method encounters supplementary
     * code point in the input stream, it returns UTF-16-encoded high surrogate
     * code unit and stores low surrogate in buffer. When called next time,
     * <code>read()</code> will return this low surrogate, instead of reading
     * more bytes from the <code>InputStream</code>. Similarly if
     * <code>read(char[], int, int)</code> is invoked to read, for example,
     * 10 chars into specified buffer, and 4 of them turn out to
     * be supplementary Unicode characters, each written as two chars, then we
     * end up having 4 excess chars that we cannot immediately return or
     * push back to the input stream. So we need to store them in the buffer
     * awaiting further <code>read</code> invocations.
     * Note that char buffer functions like a stack, i.e. chars and surrogate
     * pairs are stored in reverse order.
     */
    protected char[] fCharBuf;

    /**
     * Count of Java chars currently being stored in in the
     * <code>fCharBuf</code> array.
     */
    protected int fCharCount;

    //
    // Constructors
    //

    /**
     * Constructs an <code>ISO-10646-UCS-(2|4)</code> reader from the specified
     * input stream using default buffer size. The Endianness and exact input
     * encoding (<code>UCS-2</code> or <code>UCS-4</code>) also should be known
     * in advance.
     *
     * @param inputStream input stream with UCS-2|4 encoded data
     * @param encoding    One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
     */
    public UCSReader(InputStream inputStream, short encoding) {
        this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
    } // <init>(InputStream, short)

    /**
     * Constructs an <code>ISO-10646-UCS-(2|4)</code> reader from the source
     * input stream using explicitly specified initial buffer size. Endianness
     * and exact input encoding (<code>UCS-2</code> or <code>UCS-4</code>) also
     * should be known in advance.
     *
     * @param inputStream input stream with UCS-2|4 encoded data
     * @param size        The initial buffer size. You better make sure
     *                    this number is divisible by 4 if you plan to
     *                    to read UCS-4 with this class.
     * @param encoding    One of UCS2LE, UCS2BE, UCS4LE or UCS4BE
     */
    public UCSReader(InputStream inputStream, int size, short encoding) {
        fInputStream = inputStream;
        fBuffer = new byte[size];
        fEncoding = encoding;

        fCharBuf = new char[CHAR_BUFFER_INITIAL_SIZE];
        fCharCount = 0;
    } // <init>(InputStream, int, short)

    //
    // Reader methods
    //

    /**
     * Read a single character.  This method will block until a character is
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * If supplementary Unicode character is encountered in <code>UCS-4</code>
     * input, it will be encoded into <code>UTF-16</code> surrogate pair
     * according to RFC 2781. High surrogate code unit will be returned
     * immediately, and low surrogate saved in the internal buffer to be read
     * during next <code>read()</code> or <code>read(char[], int, int)</code>
     * invocation. -AK
     *
     * @return     Java 16-bit <code>char</code> value containing UTF-16 code
     *             unit which may be either code point from Basic Multilingual
     *             Plane or one of the surrogate code units (high or low)
     *             of the pair representing supplementary Unicode character
     *             (one in <code>0x10000 - 0x10FFFF</code> range) -AK
     *
     * @exception  IOException  when I/O error occurs
     */
    public int read() throws IOException {
        // If we got something in the char buffer, let's use it.
        if (0 != fCharCount) {
            fCharCount--;

            return ((int) fCharBuf[fCharCount]) & 0xFFFF;
        }

        int b0 = fInputStream.read() & 0xff; // 1st byte

        if (b0 == 0xff) {
            return -1;
        }

        int b1 = fInputStream.read() & 0xff; // 2nd byte

        if (b1 == 0xff) {
            return -1;
        }

        if (fEncoding >= 4) { // UCS-4

            int b2 = fInputStream.read() & 0xff; // 3rd byte

            if (b2 == 0xff) {
                return -1;
            }

            int b3 = fInputStream.read() & 0xff; // 4th byte

            if (b3 == 0xff) {
                return -1;
            }

            int codepoint;

            if (UCS4BE == fEncoding) {
                codepoint = ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
            } else {
                codepoint = ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
            }

            /*
             * Encoding from UCS-4 to UTF-16 as described in RFC 2781
             * In theory there should be additional `isValidCodePoint()` check
             * but I simply don't know what to do if invalid one is encountered.
             */
            if (!isSupplementaryCodePoint(codepoint)) {
                return codepoint;
            } else {
                int cp1 = (codepoint - 0x10000) & 0xFFFFF;
                int highSurrogate = 0xD800 + (cp1 >>> 10); // ">>" should work too
                                                           // Saving low surrogate for future use

                fCharBuf[fCharCount] = (char) (0xDC00 + (cp1 & 0x3FF));

                // low surrogate code unit will be returned during next call
                return highSurrogate;
            }
        } else { // UCS-2

            if (fEncoding == UCS2BE) {
                return (b0 << 8) + b1;
            } else {
                return (b1 << 8) + b0;
            }
        }
    } // read():int

    /**
     * Read characters into a portion of an array.  This method will block
     * until some input is available, an I/O error occurs, or the end of the
     * stream is reached.
     *
     * I suspect that the whole stuff works awfully slow, so if you know
     * for sure that your <code>UCS-4</code> input does not contain any
     * supplementary code points you probably should use original
     * <code>UCSReader</code> class from Xerces team
     * (<code>org.apache.xerces.impl.io.UCSReader</code>). -AK
     *
     * @param      ch     Destination buffer
     * @param      offset Offset at which to start storing characters
     * @param      length Maximum number of characters to read
     *
     * @return     The number of characters read, or <code>-1</code> if the
     *             end of the stream has been reached. Note that this is not
     *             a number of <code>UCS-4</code> characters read, but
     *             instead number of <code>UTF-16</code> code units. These
     *             two are equal only if there were no supplementary Unicode
     *             code points among read chars.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public int read(char[] ch, int offset, int length)
        throws IOException {
        /*
         * The behavior of this method is _intended_ to be like this:
         *
         * 1. In case if we are working with UCS-2 data, `readUCS2` method
         *    handles the stuff.
         *
         * 2. For UCS-4 data method first looks if there is some data stored in
         *    the internal character buffer (fCharBuf). Usually this data is
         *    left from previous reading operation if there were any
         *    supplementary Unicode (ISO-10646) characters.
         *
         * 3. If buffer holds something, these chars are put directly in passed
         *    `ch` buffer (maximum `length` of them).
         *
         * 4. If char buffer ends and more data can be put into `ch`,
         *    then they are read from the underlying byte stream.
         *
         * 5. Method tries to read maximum possible number of bytes from
         *    InputStream, as if all read code points were from BMP (Basic
         *    Multilingual Plane).
         *
         * 6. Read UCS-4 characters are encoded to UTF-16 (which is native Java
         *     encoding) ant put into `ch` array.
         *
         * 7. It is possible that we end up with more chars than we can
         *    currently put into passed buffer due to the fact that
         *    supplementary Unicode characters are encoded into _two_ Java
         *    char's each. In this situation excess chars are stored in the
         *    internal char buffer (in reverse order, i.e. those read last
         *    are at the beginning of the `fCharBuf`). They are usually picked
         *    up during next call(s) to one of the `read` methods.
         */
        if ((0 > offset) || (offset > ch.length) || (0 > length) || ((offset + length) > ch.length)
                || (0 > (offset + length))) {
            throw new IndexOutOfBoundsException();
        } else if (0 == length) {
            return 0;
        }

        /*
         * Well, it is clear that the code should be separated for
         * UCS-2 and UCS-4 now with all that char buffer stuff around.
         * Things are already getting nasty.
         */
        if (fEncoding < 4) {
            return readUCS2(ch, offset, length);
        }

        // First using chars from internal char buffer (if any)
        int charsRead = 0;

        while (charsRead <= length) {
            if (0 != fCharCount) {
                ch[offset + charsRead] = fCharBuf[--fCharCount];
                charsRead++;
            } else {
                break;
            }
        }

        // Reading remaining chars from InputStream.
        if (0 != (length - charsRead)) {
            /*
             * Each output char (two for supplementary characters) will require
             * us to read 4 input bytes. But as we cannot predict how many
             * supplementary chars we will encounter, so we should try to read
             * maximum possible number.
             */
            int byteLength = (length - charsRead) << 2;

            if (byteLength > fBuffer.length) {
                byteLength = fBuffer.length;
            }

            int count = fInputStream.read(fBuffer, 0, byteLength);

            if (-1 == count) {
                return (0 == charsRead) ? (-1) : charsRead;
            } else {
                // try and make count be a multiple of the number of bytes we're
                // looking for (simply reading 1 to 3 bytes from input stream to
                // ensure the last code point is complete)
                // this looks ugly, but it avoids an if at any rate...
                int numToRead = ((4 - (count & 3)) & 3);

                for (int i = 0; i < numToRead; i++) {
                    int charRead = fInputStream.read();

                    if (charRead == -1) {
                        // end of input; something likely went wrong! Pad buffer
                        // with zeros.
                        for (int j = i; j < numToRead; j++)
                            fBuffer[count + j] = 0;

                        break;
                    } else {
                        fBuffer[count + i] = (byte) charRead;
                    }
                }

                count += numToRead;

                // now count is a multiple of the right number of bytes
                int numChars = count >> 2;
                int curPos = 0;

                /*
                 * `i` is index of currently processed char from InputStream.
                 * `charsCount` also counts number of chars that were (possibly)
                 * read from internal char buffer.
                 */
                int charsCount = charsRead;
                int i;

                for (i = 0; (i < numChars) && (length >= charsCount); i++) {
                    int b0 = fBuffer[curPos++] & 0xff;
                    int b1 = fBuffer[curPos++] & 0xff;
                    int b2 = fBuffer[curPos++] & 0xff;
                    int b3 = fBuffer[curPos++] & 0xff;

                    int codepoint;

                    if (UCS4BE == fEncoding) {
                        codepoint = ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
                    } else {
                        codepoint = ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
                    }

                    // Again, validity of this codepoint is never checked, this
                    // can yield problems sometimes.
                    if (!isSupplementaryCodePoint(codepoint)) {
                        ch[offset + charsCount] = (char) codepoint;
                        charsCount++;
                    } else {
                        // Checking if we can put another 2 chars in buffer.
                        if (2 <= (length - charsCount)) {
                            int cp1 = (codepoint - 0x10000) & 0xFFFFF;
                            ch[offset + charsCount] = (char) (0xD800 + (cp1 >>> 10));
                            ch[offset + charsCount + 1] = (char) (0xDC00 + (cp1 & 0x3FF));
                            charsCount += 2;
                        } else {
                            break; // END for
                        }
                    }
                } // END for

                // Storing data, that possibly remain in `fBuffer` into internal
                // char buffer for future use :)
                curPos = (numChars << 2) - 1;

                for (int k = numChars; k > i; k--) {
                    // Reading bytes in reverse order
                    int b3 = fBuffer[curPos--] & 0xff;
                    int b2 = fBuffer[curPos--] & 0xff;
                    int b1 = fBuffer[curPos--] & 0xff;
                    int b0 = fBuffer[curPos--] & 0xff;

                    int codepoint;

                    if (UCS4BE == fEncoding) {
                        codepoint = ((b0 << 24) + (b1 << 16) + (b2 << 8) + b3);
                    } else {
                        codepoint = ((b3 << 24) + (b2 << 16) + (b1 << 8) + b0);
                    }

                    // Look if we need to increase buffer size
                    if (2 > (fCharBuf.length - k)) {
                        char[] newBuf = new char[fCharBuf.length << 1];
                        System.arraycopy(fCharBuf, 0, newBuf, 0, fCharBuf.length);
                        fCharBuf = newBuf;
                    }

                    if (!isSupplementaryCodePoint(codepoint)) {
                        fCharBuf[fCharCount++] = (char) codepoint;
                    } else {
                        int cp1 = (codepoint - 0x10000) & 0xFFFFF;
                        // In this case store low surrogate code unit first, so that
                        // it can be read back after high one.
                        fCharBuf[fCharCount++] = (char) (0xDC00 + ((char) cp1 & 0x3FF));
                        fCharBuf[fCharCount++] = (char) (0xD800 + (cp1 >>> 10));
                    }
                } // END for

                return charsCount;
            } // END if (-1 == count) ELSE
        } // END if (0 != (length - charsRead))

        return charsRead;
    } // read(char[],int,int)

    /**
     * Read <code>UCS-2</code> characters into a portion of an array.
     * This method will block until some input is available, an I/O
     * error occurs, or the end of the stream is reached.
     * <p>
     * In original <code>UCSReader</code> this code was part of
     * <code>read(char[], int, int)</code> method, but I removed it
     * from there to reduce complexity of the latter.
     * </p>
     *
     * @param      ch      destination buffer
     * @param      offset  offset at which to start storing characters
     * @param      length  maximum number of characters to read
     *
     * @return     The number of characters read, or <code>-1</code>
     *             if the end of the stream has been reached
     *
     * @exception  IOException  If an I/O error occurs
     */
    protected int readUCS2(char[] ch, int offset, int length)
        throws IOException {
        int byteLength = length << 1;

        if (byteLength > fBuffer.length) {
            byteLength = fBuffer.length;
        }

        int count = fInputStream.read(fBuffer, 0, byteLength);

        if (count == -1) {
            return -1;
        }

        // try and make count be a multiple of the number of bytes we're
        // looking for (simply reading 1 to 3 bytes from input stream to
        // ensure the last code point is complete)
        int numToRead = count & 1;

        if (numToRead != 0) {
            count++;

            int charRead = fInputStream.read();

            if (charRead == -1) { // end of input; something likely went
                                  // wrong! Pad buffer with nulls.
                fBuffer[count] = 0;
            } else {
                fBuffer[count] = (byte) charRead;
            }
        }

        // now count is a multiple of the right number of bytes
        int numChars = count >> 1;
        int curPos = 0;

        for (int i = 0; i < numChars; i++) {
            int b0 = fBuffer[curPos++] & 0xff;
            int b1 = fBuffer[curPos++] & 0xff;

            if (fEncoding == UCS2BE) {
                ch[offset + i] = (char) ((b0 << 8) + b1);
            } else {
                ch[offset + i] = (char) ((b1 << 8) + b0);
            }
        }

        return numChars;
    } // END readUCS2(char[], int, int)

    /**
     * Skip characters.  This method will block until some characters are
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * @param  n  The number of characters to skip
     *
     * @return    The number of characters actually skipped
     *
     * @exception  IOException  If an I/O error occurs
     */
    public long skip(long n) throws IOException {
        /*
         * charWidth will represent the number of bits to move
         * n leftward to get num of bytes to skip, and then move the result
         * rightward
         * to get num of chars effectively skipped.
         * The trick with &'ing, as with elsewhere in this dcode, is
         * intended to avoid an expensive use of / that might not be optimized
         * away.
         */
        int charWidth = (fEncoding >= 4) ? 2 : 1;
        long bytesSkipped = fInputStream.skip(n << charWidth);

        if ((bytesSkipped & (charWidth | 1)) == 0) {
            return bytesSkipped >>> charWidth;
        }

        return (bytesSkipped >>> charWidth) + 1;
    } // skip(long):long

    /**
     * Tell whether this stream is ready to be read.
     *
     * @return True if the next read() is guaranteed not to block for input,
     * false otherwise.  Note that returning false does not guarantee that the
     * next read will block.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public boolean ready() throws IOException {
        return false;
    } // ready()

    /**
     * Tell whether this stream supports the mark() operation.
     */
    public boolean markSupported() {
        return fInputStream.markSupported();
    } // markSupported()

    /**
     * Mark the present position in the stream.  Subsequent calls to
     * <code>reset</code> will attempt to reposition the stream to this point.
     * Not all character-input streams support the <code>mark</code> operation.
     * This is one of them :) It relies on marking facilities of underlying
     * byte stream.
     *
     * @param  readAheadLimit  Limit on the number of characters that may be
     *                         read while still preserving the mark.  After
     *                         reading this many characters, attempting to
     *                         reset the stream may fail.
     *
     * @exception  IOException  If the stream does not support
     *                          <code>mark</code>, or if some other I/O error
     *                          occurs
     */
    public void mark(int readAheadLimit) throws IOException {
        fInputStream.mark(readAheadLimit);
    } // mark(int)

    /**
     * Reset the stream.  If the stream has been marked, then attempt to
     * reposition it at the mark.  If the stream has not been marked, then
     * attempt to reset it in some way appropriate to the particular stream,
     * for example by repositioning it to its starting point. This stream
     * implementation does not support <code>mark</code>/<code>reset</code>
     * by itself, it relies on underlying byte stream in this matter.
     *
     * @exception  IOException  If the stream has not been marked,
     *                          or if the mark has been invalidated,
     *                          or if the stream does not support reset(),
     *                          or if some other I/O error occurs
     */
    public void reset() throws IOException {
        fInputStream.reset();
    } // reset()

    /**
     * Close the stream.  Once a stream has been closed, further
     * <code>read</code>, <code>ready</code>, <code>mark</code>,
     * or <code>reset</code> invocations will throw an IOException.
     * Closing a previously-closed stream, however, has no effect.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public void close() throws IOException {
        fInputStream.close();
        fInputStream = null;
        fCharBuf = null;
        fBuffer = null;
    } // close()

    /**
     * Returns the encoding currently in use by this character stream.
     *
     * @return Encoding of this stream. Either ISO-10646-UCS-2 or
     *         ISO-10646-UCS-4. Problem is that this string doesn't indicate
     *         the byte order of that encoding. What to do, then? Unlike
     *         UTF-16 byte order cannot be made part of the encoding name
     *         in this case and still can be critical. Currently you can
     *         find out the byte order by invoking <code>getByteOrder</code>
     *         method.
     */
    public String getEncoding() {
        if (4 > fEncoding) {
            return "ISO-10646-UCS-2";
        } else {
            return "ISO-10646-UCS-4";
        }
    }

    /**
     * Returns byte order ("endianness") of the encoding currently in use by
     * this character stream. This is a string with two possible values:
     * <code>LITTLE_ENDIAN</code> and <code>BIG_ENDIAN</code>. Maybe using
     * a named constant is a better alternative, but I just don't like them.
     * But feel free to change this behavior if you think that would be
     * better.
     *
     * @return <code>LITTLE_ENDIAN</code> or <code>BIG_ENDIAN</code> depending
     *         on byte order of current encoding of this stream.
     */
    public String getByteOrder() {
        if ((1 == fEncoding) || (4 == fEncoding)) {
            return "LITTLE_ENDIAN";
        } else {
            return "BIG_ENDIAN";
        }
    }

    /**
     * Determines whether the specified character (Unicode code point)
     * is in the supplementary character range. The method call is
     * equivalent to the expression:
     * <blockquote><pre>
     * codePoint >= 0x10000 && codePoint <= 0x10ffff
     * </pre></blockquote>
     *
     * Stolen from JDK 1.5 <code>java.lang.Character</code> class in
     * order to provide JDK 1.4 compatibility.
     *
     * @param  codePoint the character (Unicode code point) to be tested
     * @return <code>true</code> if the specified character is in the Unicode
     *         supplementary character range; <code>false</code> otherwise.
     */
    protected boolean isSupplementaryCodePoint(int codePoint) {
        return (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) && (codePoint <= MAX_CODE_POINT);
    }
} // class UCSReader