Normalizer.java example

Explorer
phoneme-components-cdc-master
- src
- test
  - share
    - basis
      - gunit
        classes
        gunit
        framework
        TestCase.java
      - gunittests
        tests
        appcontext
        FocusMgmtTest.java
        FullScreenTest.java
        TestLet.java
        ixcpermission
        IxcPermissionTest.java
        volatileImage
        ImageTest.java
      - tests
        drawStringAttr
        DrawString.java
        fullScreenMode
        TestFull.java
        ixc
        AccessControlContext
        restricted
        RestrictedXlet.java
        unrestricted
        RestrictedXletContext.java
        UnrestrictedXlet.java
        Permission
        DummyRemoteObject.java
        DummyXletContext.java
        IxcRegisTest.java
        IxcRegisTest2.java
        Subclass
        A
        MyRemote.java
        XletA.java
        B
        MyRemote.java
        XletB.java
        volatileImage
        TestVolatileComponent.java
        TestVolatileGC.java
        xlets
        ComponentEvents
        TestRunner.java
        TestXlet.java
        ContentClassLoader
        ContextClsLoaderXlet.java
        RoundButton.java
        TestRunner.java
        Lifecycle1
        TestRunner.java
        TestXlet.java
        Lifecycle2
        TestRunner.java
        TestXlet.java
        Lifecycle3
        TestRunner.java
        TestXlet.java
        TopLevel
        TestXlet.java
        TwoXlets
        TestRunner.java
        TestXlet1Auto.java
        TestXlet2Auto.java
        XletArgs
        TestRunner.java
        TestXlet.java
    - cdc
      - java
        lang
        ClassLoader
        Assert.java
        package1
        Class1.java
        package3
        Class3.java
        package2
        Class2.java
        Encoding
        EncodingTest.java
        util
        Currency
        CurrencyTest.java
    - foundation
      - java
        net
        Inet6Address
        IPv6Test.java
        URI
        URITest.java
        URItoURLTest.java
    - gunit
      - classes
        gunit
        container
        AWTTestContainer.java
        framework
        BaseTestCase.java
        TestContainer.java
        TestContext.java
        TestFactory.java
        TestFilter.java
        TestResultDescription.java
        TestResultVerifier.java
        image
        RefImageNotFoundException.java
        lister
        BaseTestLister.java
        textui
        ResultVerifier.java
        TestLister.java
        TestRunner.java
        XMLTestLister.java
/*
 * 
 * @(#)Normalizer.java	1.12 06/10/10
 * 
 * Portions Copyright  2000-2008 Sun Microsystems, Inc. All Rights
 * Reserved.  Use is subject to license terms.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 only, as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License version 2 for more details (a copy is
 * included at /legal/license.txt).
 * 
 * You should have received a copy of the GNU General Public License
 * version 2 along with this work; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 * 
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
 * Clara, CA 95054 or visit www.sun.com if you need additional
 * information or have any questions.
 */


/*
 * (C) Copyright IBM Corp. 1996-2002 - All Rights Reserved
 *
 * The original version of this source code and documentation is
 * copyrighted and owned by IBM. These materials are provided
 * under terms of a License Agreement between IBM and Sun.
 * This technology is protected by multiple US and International
 * patents. This notice and attribution to IBM may not be removed.
 */

package sun.text;

import java.lang.Character;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;

/**
 * <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
 * decomposed form, allowing for easier sorting and searching of text.
 * <tt>Normalizer</tt> supports the standard normalization forms described in
 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
 * Unicode Technical Report #15</a>.
 * <p>
 * Characters with accents or other adornments can be encoded in
 * several different ways in Unicode.  For example, take the character "�"
 * (A-acute).   In Unicode, this can be encoded as a single character (the
 * "composed" form):
 * <pre>
 *      00C1    LATIN CAPITAL LETTER A WITH ACUTE</pre>
 * or as two separate characters (the "decomposed" form):
 * <pre>
 *      0041    LATIN CAPITAL LETTER A
 *      0301    COMBINING ACUTE ACCENT</pre>
 * <p>
 * To a user of your program, however, both of these sequences should be
 * treated as the same "user-level" character "�".  When you are searching or
 * comparing text, you must ensure that these two sequences are treated
 * equivalently.  In addition, you must handle characters with more than one
 * accent.  Sometimes the order of a character's combining accents is
 * significant, while in other cases accent sequences in different orders are
 * really equivalent.
 * <p>
 * Similarly, the string "ffi" can be encoded as three separate letters:
 * <pre>
 *      0066    LATIN SMALL LETTER F
 *      0066    LATIN SMALL LETTER F
 *      0069    LATIN SMALL LETTER I</pre>
 * or as the single character
 * <pre>
 *      FB03    LATIN SMALL LIGATURE FFI</pre>
 * <p>
 * The ffi ligature is not a distinct semantic character, and strictly speaking
 * it shouldn't be in Unicode at all, but it was included for compatibility
 * with existing character sets that already provided it.  The Unicode standard
 * identifies such characters by giving them "compatibility" decompositions
 * into the corresponding semantic characters.  When sorting and searching, you
 * will often want to use these mappings.
 * <p>
 * <tt>Normalizer</tt> helps solve these problems by transforming text into the
 * canonical composed and decomposed forms as shown in the first example above.
 * In addition, you can have it perform compatibility decompositions so that
 * you can treat compatibility characters the same as their equivalents.
 * Finally, <tt>Normalizer</tt> rearranges accents into the proper canonical
 * order, so that you do not have to worry about accent rearrangement on your
 * own.
 * <p>
 * <tt>Normalizer</tt> adds one optional behavior, {@link #IGNORE_HANGUL},
 * that differs from
 * the standard Unicode Normalization Forms.  This option can be passed
 * to the {@link #Normalizer constructors} and to the static
 * {@link #compose compose} and {@link #decompose decompose} methods.  This
 * option, and any that are added in the future, will be turned off by default.
 * <p>
 * There are three common usage models for <tt>Normalizer</tt>.  In the first,
 * the static {@link #normalize normalize()} method is used to process an
 * entire input string at once.  Second, you can create a <tt>Normalizer</tt>
 * object and use it to iterate through the normalized form of a string by
 * calling {@link #first} and {@link #next}.  Finally, you can use the
 * {@link #setIndex setIndex()} and {@link #getIndex} methods to perform
 * random-access iteration, which is very useful for searching.
 * <p>
 * <b>Note:</b> <tt>Normalizer</tt> objects behave like iterators and have
 * methods such as <tt>setIndex</tt>, <tt>next</tt>, <tt>previous</tt>, etc.
 * You should note that while the <tt>setIndex</tt> and <tt>getIndex</tt> refer
 * to indices in the underlying <em>input</em> text being processed, the
 * <tt>next</tt> and <tt>previous</tt> methods it iterate through characters
 * in the normalized <em>output</em>.  This means that there is not
 * necessarily a one-to-one correspondence between characters returned
 * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
 * returned from <tt>setIndex</tt> and <tt>getIndex</tt>.  It is for this
 * reason that <tt>Normalizer</tt> does not implement the
 * {@link CharacterIterator} interface.
 * <p>
 * <b>Note:</b> <tt>Normalizer</tt> is currently based on version 3.0
 * of the <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
 * It will be updated as later versions of Unicode are released.  If you are
 * using this class on a JDK that supports an earlier version of Unicode, it
 * is possible that <tt>Normalizer</tt> may generate composed or dedecomposed
 * characters for which your JDK's {@link java.lang.Character} class does not
 * have any data.
 * <p>
 * @author Laura Werner, Mark Davis
 */
public final class Normalizer implements Cloneable {

    /**
     * Constant indicating that the end of the iteration has been reached.
     * This is guaranteed to have the same value as {@link CharacterIterator#DONE}.
     */
    public static final char DONE = CharacterIterator.DONE;

    // This tells us what the bits in the "mode" object mean.
    private static final int COMPAT_BIT = 1;
    private static final int DECOMP_BIT = 2;
    private static final int COMPOSE_BIT = 4;

    /**
     * This class represents the mode of a {@link Normalizer}
     * object, <i>i.e.</i> the Unicode Normalization Form of the
     * text that the <tt>Normalizer</tt> produces.  <tt>Mode</tt> objects
     * are used as arguments to the {@link Normalizer#Normalizer constructors}
     * and {@link Normalizer#setMode setMode} method of <tt>Normalizer</tt>.
     * <p>
     * Clients cannot create <tt>Mode</tt> objects directly.
     * Instead, use the predefined constants {@link Normalizer#NO_OP},
     * {@link Normalizer#COMPOSE}, {@link Normalizer#COMPOSE_COMPAT},
     * {@link Normalizer#DECOMP}, and {@link Normalizer#DECOMP_COMPAT}.
     * <p>
     * @see Normalizer
     */
    public static final class Mode {
        Mode(int m) {
            mode = m;
        }
        final boolean compat() {
            return (mode & COMPAT_BIT) != 0;
        }
        final boolean compose() {
            return (mode & COMPOSE_BIT) != 0;
        }
        final boolean decomp() {
            return (mode & DECOMP_BIT) != 0;
        }
        final int mode;
    };

    /**
     * Null operation for use with the {@link #Normalizer constructors}
     * and the static {@link #normalize normalize} method.  This value tells
     * the <tt>Normalizer</tt> to do nothing but return unprocessed characters
     * from the underlying String or CharacterIterator.  If you have code which
     * requires raw text at some times and normalized text at others, you can
     * use <tt>NO_OP</tt> for the cases where you want raw text, rather
     * than having a separate code path that bypasses <tt>Normalizer</tt>
     * altogether.
     * <p>
     * @see #setMode
     */
    public static final Mode NO_OP = new Mode(0);

    /**
     * Canonical decomposition followed by canonical composition.  Used with the
     * {@link #Normalizer constructors} and the static {@link #normalize normalize}
     * method to determine the operation to be performed.
     * <p>
     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
     * off, this operation produces output that is in
     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical Form</a>
     * <b>C</b>.
     * <p>
     * @see #setMode
     */
    public static final Mode COMPOSE = new Mode(COMPOSE_BIT);

    /**
     * Compatibility decomposition followed by canonical composition.
     * Used with the {@link #Normalizer constructors} and the static
     * {@link #normalize normalize} method to determine the operation to be performed.
     * <p>
     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
     * off, this operation produces output that is in
     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical Form</a>
     * <b>KC</b>.
     * <p>
     * @see #setMode
     */
    public static final Mode COMPOSE_COMPAT = new Mode(COMPOSE_BIT | COMPAT_BIT);

    /**
     * Canonical decomposition.  This value is passed to the
     * {@link #Normalizer constructors} and the static {@link #normalize normalize}
     * method to determine the operation to be performed.
     * <p>
     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
     * off, this operation produces output that is in
     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical Form</a>
     * <b>D</b>.
     * <p>
     * @see #setMode
     */
    public static final Mode DECOMP = new Mode(DECOMP_BIT);

    /**
     * Compatibility decomposition.  This value is passed to the
     * {@link #Normalizer constructors} and the static {@link #normalize normalize}
     * method to determine the operation to be performed.
     * <p>
     * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
     * off, this operation produces output that is in
     * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical Form</a>
     * <b>KD</b>.
     * <p>
     * @see #setMode
     */
    public static final Mode DECOMP_COMPAT = new Mode(DECOMP_BIT | COMPAT_BIT);

    /**
     * Option to disable Hangul/Jamo composition and decomposition.
     * This option applies to Korean text,
     * which can be represented either in the Jamo alphabet or in Hangul
     * characters, which are really just two or three Jamo combined
     * into one visual glyph.  Since Jamo takes up more storage space than
     * Hangul, applications that process only Hangul text may wish to turn
     * this option on when decomposing text.
     * <p>
     * The Unicode standard treates Hangul to Jamo conversion as a
     * canonical decomposition, so this option must be turned <b>off</b> if you
     * wish to transform strings into one of the standard
     * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
     * Unicode Normalization Forms</a>.
     * <p>
     * @see #setOption
     */
    public static final int IGNORE_HANGUL = 0x0001;

    //-------------------------------------------------------------------------
    // Constructors
    //-------------------------------------------------------------------------

    /**
     * Creates a new <tt>Normalizer</tt> object for iterating over the
     * normalized form of a given string.
     * <p>
     * @param str   The string to be normalized.  The normalization
     *              will start at the beginning of the string.
     *
     * @param mode  The normalization mode.
     */
    public Normalizer(String str, Mode mode) {
        this(new StringCharacterIterator(str), mode, 0);
    }

    /**
     * Creates a new <tt>Normalizer</tt> object for iterating over the
     * normalized form of a given string.
     * <p>
     * The <tt>options</tt> parameter specifies which optional
     * <tt>Normalizer</tt> features are to be enabled for this object.
     * <p>
     * @param str   The string to be normalized.  The normalization
     *              will start at the beginning of the string.
     *
     * @param mode  The normalization mode.
     *
     * @param opt   Any optional features to be enabled.
     *              Currently the only available option is {@link #IGNORE_HANGUL}.
     *              If you want the default behavior corresponding to one of the
     *              standard Unicode Normalization Forms, use 0 for this argument.
     */
    public Normalizer(String str, Mode mode, int opt) {
        this(new StringCharacterIterator(str), mode, opt);
    }

    /**
     * Creates a new <tt>Normalizer</tt> object for iterating over the
     * normalized form of the given text.
     * <p>
     * @param iter  The input text to be normalized.  The normalization
     *              will start at the beginning of the string.
     *
     * @param mode  The normalization mode.
     *
     */
    public Normalizer(CharacterIterator iter, Mode mode) {
        this(iter, mode, 0);
    }

    /**
     * Creates a new <tt>Normalizer</tt> object for iterating over the
     * normalized form of the given text.
     * <p>
     * @param iter  The input text to be normalized.  The normalization
     *              will start at the beginning of the string.
     *
     * @param mode  The normalization mode.
     *
     * @param opt   Any optional features to be enabled.
     *              Currently the only available option is {@link #IGNORE_HANGUL}.
     *              If you want the default behavior corresponding to one of the
     *              standard Unicode Normalization Forms, use 0 for this argument.
     */
    public Normalizer(CharacterIterator iter, Mode mode, int opt) {
        text = iter;
        this.mode = mode;
        options = opt;

        // Compatibility explosions have lower indices; skip them if necessary
        minDecomp = mode.compat() ? 0 : DecompData.MAX_COMPAT;
    }

    /**
     * Clones this <tt>Normalizer</tt> object.  All properties of this
     * object are duplicated in the new object, including the cloning of any
     * {@link CharacterIterator} that was passed in to the constructor
     * or to {@link #setText(CharacterIterator) setText}.
     * However, the text storage underlying
     * the <tt>CharacterIterator</tt> is not duplicated unless the
     * iterator's <tt>clone</tt> method does so.
     */
    public Object clone() {
        try {
            Normalizer copy = (Normalizer) super.clone();
            copy.text = (CharacterIterator) text.clone();
            // old version did not clone the buffer this causes serious
            // problems in the collation element iterator
            if (buffer != null) {
            	copy.buffer = new StringBuffer();
            	if (buffer.length() > 0) {
            		copy.buffer.append(buffer);
            	}
            }
            return copy;
        }
        catch (CloneNotSupportedException e) {
            throw new InternalError(e.toString());
        }
    }

    //-------------------------------------------------------------------------
    // Static utility methods
    //-------------------------------------------------------------------------

    /**
     * Normalizes a <tt>String</tt> using the given normalization operation.
     * <p>
     * The <tt>options</tt> parameter specifies which optional
     * <tt>Normalizer</tt> features are to be enabled for this operation.
     * Currently the only available option is {@link #IGNORE_HANGUL}.
     * If you want the default behavior corresponding to one of the standard
     * Unicode Normalization Forms, use 0 for this argument.
     * <p>
     * @param str       the input string to be normalized.
     *
     * @param aMode     the normalization mode
     *
     * @param options   the optional features to be enabled.
     */
    public static String normalize(String str, Mode mode, int options) {
        return normalize(str, mode, options, false);
    }

    public static String normalize(String str, Mode mode, int options, boolean addSingleQuotation) {
        if (mode.compose()) {
            // compose() handles decomposition and reordering;
            // don't call decompose() first.
            return compose(str, mode.compat(), options);
        }
        if (mode.decomp()) {
            return decompose(str, mode.compat(), options, addSingleQuotation);
        }
        return str;
    }

    //-------------------------------------------------------------------------
    // Compose methods
    //-------------------------------------------------------------------------

    /**
     * Compose a <tt>String</tt>.
     * <p>
     * The <tt>options</tt> parameter specifies which optional
     * <tt>Normalizer</tt> features are to be enabled for this operation.
     * Currently the only available option is {@link #IGNORE_HANGUL}.
     * If you want the default behavior corresponding
     * to Unicode Normalization Form <b>C</b> or <b>KC</b>,
     * use 0 for this argument.
     * <p>
     * @param source    the string to be composed.
     *
     * @param compat    Perform compatibility decomposition before composition.
     *                  If this argument is <tt>false</tt>, only canonical
     *                  decomposition will be performed.
     *
     * @param options   the optional features to be enabled.
     *
     * @return          the composed string.
     */
    public static String compose(String source, boolean compat, int options)
    {
        StringBuffer result = new StringBuffer();
        StringBuffer explodeBuf = new StringBuffer();

        int     explodePos = EMPTY;         // Position in input buffer
        int     basePos = 0;                // Position of last base in output string
        int     baseIndex = 0;              // Index of last base in "actions" array
        int     classesSeenL = 0;           // Combining classes seen since last base
        int     classesSeenH = 0;           //  64-bit mask
        int     action;

        // Compatibility explosions have lower indices; skip them if necessary
        int minExplode = compat ? 0 : ComposeData.MAX_COMPAT;
        int minDecomp  = compat ? 0 : DecompData.MAX_COMPAT;

        if (DEBUG) System.out.println("minExplode = " + minExplode);

        int i = 0;
        while (i < source.length() || explodePos != EMPTY) {
            // Get the next char from either the buffer or the source
            char ch;
            if (explodePos == EMPTY) {
                ch = source.charAt(i++);
            } else {
                ch = explodeBuf.charAt(explodePos++);
                if (explodePos >= explodeBuf.length()) {
                    explodePos = EMPTY;
                    explodeBuf.setLength(0);
                }
            }

            // Get the basic info for the character
            int charInfo = composeLookup(ch);
            int type = charInfo & ComposeData.TYPE_MASK;
            int index = charInfo >>> ComposeData.INDEX_SHIFT;

            if (DEBUG) System.out.println("Got char " + Utility.hex(ch) + 
					  ", type=" + type + ", index=" + index);

            // Examples of NON_COMPOSING_COMBINING with an index < minExplode:
            // 00A8 017F 03D2 1FBF 1FFE
            if (type == ComposeData.BASE || 
		(type == ComposeData.NON_COMPOSING_COMBINING && index < minExplode)) {

                if (DEBUG) System.out.println("New base " + Utility.hex(ch) + 
					      ", type=" + type + ", index=" + index);
                classesSeenL = classesSeenH = 0;
                baseIndex = index;
                basePos = result.length();
                result.append(ch);
            }
            else if (type == ComposeData.COMBINING)
            {
                // assert(index > 0);
                int cclass = ComposeData.typeBit[index];
                // typeBit is a bit value from 0..63, indicating the class.
                // We use a bit mask of 2 32-bit ints.
                boolean seen = 0 != ((cclass < 32) ?
                    (classesSeenL & (1 << cclass)) :
                    (classesSeenH & (1 << (cclass & 31))));

                if (DEBUG) System.out.println("Class of " + Utility.hex(ch) + 
		    " = " + cclass +
                    " seen:" + seen +
                    " baseIndex:" + baseIndex +
                    " action:" + composeAction(baseIndex, index));

                // We can only combine a character with the base if we haven't
                // already seen a combining character with the same canonical class.
                // We only combine characters with an index from
                // 1..COMBINING_COUNT-1.  Indices >= COMBINING_COUNT are
                // also combining characters, but we know that they don't
                // compose with anything.
                if (index < ComposeData.COMBINING_COUNT && !seen
                    && (action = composeAction(baseIndex, index)) > 0)
                {
                    if (action > ComposeData.MAX_COMPOSED) {
                        // Pairwise explosion.  Actions above this value are really
                        // indices into an array that in turn contains indices
                        // into the exploding string table
                        // TODO: What if there are unprocessed chars in the explode buffer?
                        if (DEBUG) System.out.println("Pairwise exploding");
                        char newBase = pairExplode(explodeBuf, action);
                        explodePos = 0;
                        result.setCharAt(basePos, newBase);

                        baseIndex = composeLookup(newBase) >>> ComposeData.INDEX_SHIFT;
                        if (DEBUG) System.out.println("New base " + Utility.hex(newBase));
                    } else {
                        // Normal pairwise combination.  Replace the base char
                        if (DEBUG) System.out.println("Pairwise combining");
                        char newBase = (char) action;
                        result.setCharAt(basePos, newBase);

                        baseIndex = composeLookup(newBase) >>> ComposeData.INDEX_SHIFT;
                        if (DEBUG) System.out.println("New base " + Utility.hex(newBase));
                    }
                    //
                    // Since there are Unicode characters that cannot be combined in arbitrary
                    // order, we have to re-process any combining marks that go with this
                    // base character.  There are only four characters in Unicode that have
                    // this problem.  If they are fixed in Unicode 3.0, this code can go away.
                    //
                    int len = result.length();
                    if (len - basePos > 1) {
                        for (int j = basePos+1; j < len; j++) {
                            explodeBuf.append(result.charAt(j));
                        }
                        result.setLength(basePos+1);
                        classesSeenL = classesSeenH = 0;
                        if (explodePos == EMPTY) explodePos = 0;
                    }
                } else {
                    // No combination with this character
                    if (DEBUG) System.out.println("No action");
                    bubbleAppend(result, ch, cclass);
                    if (cclass < 32) {
                        classesSeenL |= 1 << cclass;
                    } else {
                        classesSeenH |= 1 << (cclass & 31);
                    }
                }
            }
            else if (index > minExplode) {
                // Single exploding character
                explode(explodeBuf, index);
                explodePos = 0;
                if (DEBUG) System.out.println("explosion: " + Utility.hex(ch) + 
					      " --> " + Utility.hex(explodeBuf));
            }
            else if (type == ComposeData.HANGUL && minExplode == 0) {
                // If we're in compatibility mode we need to decompose Hangul to Jamo,
                // because some of the Jamo might have compatibility decompositions.
                hangulToJamo(ch, explodeBuf, minDecomp);
                if (DEBUG) System.out.println("decomposed hangul " + Utility.hex(ch) + 
					      " to jamo " + Utility.hex(explodeBuf));
                explodePos = 0;
            }
            else if (type == ComposeData.INITIAL_JAMO) {
                classesSeenL = classesSeenH = 0;
                baseIndex = ComposeData.INITIAL_JAMO_INDEX;
                basePos = result.length();
                result.append(ch);
                if (DEBUG) System.out.println("got initial jamo " + Utility.hex(ch));
            }
            else if (type == ComposeData.MEDIAL_JAMO && classesSeenL == 0 && classesSeenH == 0
                        && baseIndex == ComposeData.INITIAL_JAMO_INDEX) {
                // If the last character was an initial jamo, we can combine it with this
                // one to create a Hangul character.
                int l = result.charAt(basePos) - JAMO_LBASE;
                int v = ch - JAMO_VBASE;
                char newCh = (char)(HANGUL_BASE + (l*JAMO_VCOUNT + v) * JAMO_TCOUNT);
                result.setCharAt(basePos, newCh);

                if (DEBUG) System.out.println("got medial jamo " + Utility.hex(ch) + 
					      ", replacing with Hangul " + Utility.hex(newCh));

                baseIndex = ComposeData.MEDIAL_JAMO_INDEX;
            }
            else if (type == ComposeData.FINAL_JAMO && classesSeenL == 0 && classesSeenH == 0
                        && baseIndex == ComposeData.MEDIAL_JAMO_INDEX) {
                // If the last character was a medial jamo that we turned into Hangul,
                // we can add this character too.
                char newCh = (char)(result.charAt(basePos) + (ch - JAMO_TBASE));
                result.setCharAt(basePos, newCh);

                if (DEBUG) System.out.println("got final jamo " + Utility.hex(ch) + 
					      ", replacing with Hangul " + Utility.hex(newCh));

                baseIndex = 0;
                basePos = -1;
                classesSeenL = classesSeenH = 0;
            } else {
                if (DEBUG) System.out.println("No base as of " + Utility.hex(ch));
                baseIndex = 0;
                basePos = -1;
                classesSeenL = classesSeenH = 0;
                result.append(ch);
            }
        }
        return result.toString();
    }

    /**
     * Compose starting with current input character and continuing
     * until just before the next base char.
     * <p>
     * <b>Input</b>:
     * <ul>
     *  <li>underlying char iter points to first character to compose
     * </ul>
     * <p>
     * <b>Output:</b>
     * <ul>
     *  <li>returns first char of composition or DONE if at end
     *  <li>Underlying char iter is pointing at next base char or past end
     * </ul>
     */
    private char nextCompose()
    {
        if (DEBUG) System.out.println("--------------- top of nextCompose() ---------------");

        int     explodePos = EMPTY;         // Position in input buffer
        int     basePos = 0;                // Position of last base in output string
        int     baseIndex = 0;              // Index of last base in "actions" array
        int     classesSeenL = 0;           // Combining classes seen since last base
        int     classesSeenH = 0;           //  64-bit mask
        int     action;
        char    lastBase = 0;
        boolean chFromText = true;

        currentIndex =  nextIndex;
        text.setIndex(currentIndex);
        // Compatibility explosions have lower indices; skip them if necessary
        int minExplode = mode.compat() ? 0 : ComposeData.MAX_COMPAT;
        int minDecomp  = mode.compat() ? 0 : DecompData.MAX_COMPAT;

        initBuffer();
        if (explodeBuf == null) {
            explodeBuf = new StringBuffer();
        } else {
            explodeBuf.setLength(0);
        }

        char ch = curForward();

        while (ch != DONE) {
            // Get the basic info for the character
            int charInfo = composeLookup(ch);
            int type = charInfo & ComposeData.TYPE_MASK;
            int index = charInfo >>> ComposeData.INDEX_SHIFT;

            if (type == ComposeData.BASE || 
		(type == ComposeData.NON_COMPOSING_COMBINING && index < minExplode)) {

                if (getBufferLength() > 0 && chFromText && explodePos == EMPTY) {
                    // When we hit a base char in the source text, we can return the text
                    // that's been composed so far.  We'll re-process this char next time through.
                    if (DEBUG) System.out.println("returning early because we hit a new base");
                    break;
                }
                classesSeenL = classesSeenH = 0;
                baseIndex = index;
                basePos = getBufferLength();
                buffer.append(ch);
                if (DEBUG) System.out.println("got BASE char " + Utility.hex(ch) + 
					      ", type=" + type + ", index=" + index);
                lastBase = ch;
            }
            else if (type == ComposeData.COMBINING)
            {
                // assert(index > 0);
                int cclass = ComposeData.typeBit[index];
                boolean seen = 0 != ((cclass < 32) ?
                    (classesSeenL & (1 << cclass)) :
                    (classesSeenH & (1 << (cclass & 31))));

                if (DEBUG) System.out.println("got COMBINING char " + Utility.hex(ch) + 
			  ", type=" + type + ", index=" + index
                        + ", class=" + cclass);

                // We can only combine a character with the base if we haven't
                // already seen a combining character with the same canonical class.
                if (index < ComposeData.COMBINING_COUNT && !seen
                    && (action = composeAction(baseIndex, index)) > 0)
                {
                    if (action > ComposeData.MAX_COMPOSED) {
                        // Pairwise explosion.  Actions above this value are really
                        // indices into an array that in turn contains indices
                        // into the exploding string table
                        // TODO: What if there are unprocessed chars in the explode buffer?
                        char newBase = pairExplode(explodeBuf, action);
                        explodePos = 0;
                        buffer.setCharAt(basePos, newBase);

                        baseIndex = composeLookup(newBase) >>> ComposeData.INDEX_SHIFT;

                        if (DEBUG) System.out.println("Pairwise explosion: " + Utility.hex(lastBase)
						      + "," + Utility.hex(ch)
						      + " --> " + Utility.hex(newBase) 
						      + "," + Utility.hex(explodeBuf));
                        lastBase = newBase;
                    } else {
                        // Normal pairwise combination.  Replace the base char
                        char newBase = (char) action;
                        buffer.setCharAt(basePos, newBase);

                        baseIndex = composeLookup(newBase) >>> ComposeData.INDEX_SHIFT;

                        if (DEBUG) System.out.println("Pairwise combination: " + Utility.hex(lastBase)
						      + "," + Utility.hex(ch)
						      + " --> " + Utility.hex(newBase));
                        lastBase = newBase;
                    }
                    //
                    // Since there are Unicode characters that cannot be combined in arbitrary
                    // order, we have to re-process any combining marks that go with this
                    // base character.  There are only four characters in Unicode that have
                    // this problem.  If they are fixed in Unicode 3.0, this code can go away.
                    //
                    int len = getBufferLength();
                    if (len - basePos > 1) {
                        if (DEBUG) System.out.println("Reprocessing combining marks");
                        for (int j = basePos+1; j < len; j++) {
                            explodeBuf.append(buffer.charAt(j));
                        }
                        buffer.setLength(basePos+1);
                        classesSeenL = classesSeenH = 0;
                        if (explodePos == EMPTY) explodePos = 0;
                    }
                } else {
                    if (DEBUG) System.out.println("char doesn't combine");
                    // No combination with this character
                    bubbleAppend(buffer, ch, cclass);
                    if (cclass < 32) {
                        classesSeenL |= 1 << cclass;
                    } else {
                        classesSeenH |= 1 << (cclass & 31);
                    }
                }
            }
            else if (index > minExplode) {
                // Single exploding character
                explode(explodeBuf, index);
                explodePos = 0;
                if (DEBUG) System.out.println("explosion: " + Utility.hex(ch) 
					      + " --> " + Utility.hex(explodeBuf));
            }
            else if (type == ComposeData.HANGUL && minExplode == 0) {
                // If we're in compatibility mode we need to decompose Hangul to Jamo,
                // because some of the Jamo might have compatibility decompositions.
                hangulToJamo(ch, explodeBuf, minDecomp);
                if (DEBUG) System.out.println("decomposed hangul " + Utility.hex(ch) 
					      + " to jamo " + Utility.hex(explodeBuf));
                explodePos = 0;
            }
            else if (type == ComposeData.INITIAL_JAMO) {
                if (getBufferLength() > 0 && chFromText && explodePos == EMPTY) {
                    // When we hit a base char in the source text, we can return the text
                    // that's been composed so far.  We'll re-process this char next time through.
                    if (DEBUG) System.out.println("returning early because we hit a new base");
                    break;
                }
                classesSeenL = classesSeenH = 0;
                baseIndex = ComposeData.INITIAL_JAMO_INDEX;
                basePos = getBufferLength();
                buffer.append(ch);
                if (DEBUG) System.out.println("got initial jamo " + Utility.hex(ch));
            }
            else if (type == ComposeData.MEDIAL_JAMO && classesSeenL == 0 && classesSeenH == 0
                        && baseIndex == ComposeData.INITIAL_JAMO_INDEX) {
                // If the last character was an initial jamo, we can combine it with this
                // one to create a Hangul character.
                int l = buffer.charAt(basePos) - JAMO_LBASE;
                int v = ch - JAMO_VBASE;
                char newCh = (char)(HANGUL_BASE + (l*JAMO_VCOUNT + v) * JAMO_TCOUNT);
                buffer.setCharAt(basePos, newCh);

                if (DEBUG) System.out.println("got medial jamo " + Utility.hex(ch) 
					      + ", replacing with Hangul " + Utility.hex(newCh));

                baseIndex = ComposeData.MEDIAL_JAMO_INDEX;
            }
            else if (type == ComposeData.FINAL_JAMO && classesSeenL == 0 && classesSeenH == 0
                        && baseIndex == ComposeData.MEDIAL_JAMO_INDEX) {
                // If the last character was a medial jamo that we turned into Hangul,
                // we can add this character too.
                char newCh = (char)(buffer.charAt(basePos) + (ch - JAMO_TBASE));
                buffer.setCharAt(basePos, newCh);

                if (DEBUG) System.out.println("got final jamo " + Utility.hex(ch) 
					      + ", replacing with Hangul " + Utility.hex(newCh));

                baseIndex = 0;
                basePos = -1;
                classesSeenL = classesSeenH = 0;
            } else {
                // TODO: deal with JAMO character types
                baseIndex = 0;
                basePos = -1;
                classesSeenL = classesSeenH = 0;
                buffer.append(ch);
                if (DEBUG) System.out.println("UNKNOWN char " + Utility.hex(ch));
            }

            if (explodePos == EMPTY) {
                ch = text.next();
                chFromText = true;
            } else {
                ch = explodeBuf.charAt(explodePos++);
                if (explodePos >= explodeBuf.length()) {
                    explodePos = EMPTY;
                    explodeBuf.setLength(0);
                }
                chFromText = false;
            }
        }
        if (getBufferLength() > 0) {
            ch = buffer.charAt(0);
        } else {
            ch = DONE;
        }
        nextIndex = text.getIndex();
        return ch;
    }

    /**
     * Compose starting with the input char just before the current position
     * and continuing backward until (and including) the previous base char.
     * <p>
     * <b>Input</b>:
     * <ul>
     *  <li>underlying char iter points just after last char to decompose
     * </ul>
     * <p>
     * <b>Output:</b>
     * <ul>
     *  <li>returns last char of resulting decomposition sequence
     *  <li>underlying iter points to lowest-index char we decomposed, i.e. the base char
     * </ul>
     */
    private char prevCompose() {
        if (DEBUG) System.out.println("--------------- top of prevCompose() ---------------");

        // Compatibility explosions have lower indices; skip them if necessary
        int minExplode = mode.compat() ? 0 : ComposeData.MAX_COMPAT;

        nextIndex=currentIndex;

        initBuffer();
        // Slurp up characters until we hit a base char or an initial Jamo
        char ch;
        while ((ch = curBackward()) != DONE) {
            buffer.insert(0, ch);

            // Get the basic info for the character
            int charInfo = composeLookup(ch);
            int type = charInfo & ComposeData.TYPE_MASK;
            int index = charInfo >>> ComposeData.INDEX_SHIFT;

            if (DEBUG) System.out.println("prevCompose got char " + Utility.hex(ch) +
                                          ", type=" + type + ", index=" + index +
                                          ", minExplode=" + minExplode);

            if (type == ComposeData.BASE
                || (type == ComposeData.NON_COMPOSING_COMBINING && index < minExplode)
                || type == ComposeData.HANGUL
                || type == ComposeData.INITIAL_JAMO)
            {
                break;
            }
        }
        // If there's more than one character in the buffer, compose it all at once....
        if (getBufferLength() > 0) {
            // TODO: The performance of this is awful; add a way to compose
            // a StringBuffer in place.
            String composed = compose(buffer.toString(), mode.compat(), options);
            if (DEBUG) System.out.println("prevCompose called compose(" + Utility.hex(buffer) +
                                          ")->" + Utility.hex(composed));

            buffer.setLength(0);
            buffer.append(composed);

            if (getBufferLength() > 1) {
                bufferPos = getBufferLength()-1;
                ch = buffer.charAt(bufferPos);
            } else {
                ch = buffer.charAt(0);
            }
        }
        else {
            ch = DONE;
        }
        currentIndex = text.getIndex();
        if (DEBUG) System.out.println("prevCompose returning " + Utility.hex(ch));
        return ch;
    }

    private static void bubbleAppend(StringBuffer target, char ch, int cclass) {
        if (DEBUG) System.out.println(" bubbleAppend(" + Utility.hex(target) 
				      + ", " + Utility.hex(ch) + ", " + cclass + ")" );
        if (DEBUG) System.out.println(" getComposeClass(" + Utility.hex(ch) 
				      + ")=" + getComposeClass(ch) );
        if (DEBUG) System.out.println(" target before bubbling is : " + Utility.hex(target));

        int i = target.length()-1;
        if (cclass != 1) {      // 1 means combining class 0!!!
            for (; i >= 0; --i ) {
                int iClass = getComposeClass(target.charAt(i));
                if (DEBUG) System.out.println("  getComposeClass(" + Utility.hex(target.charAt(i))
					      + ")=" + getComposeClass(target.charAt(i)) );
                if (DEBUG) System.out.println(" bubbleAppend: target[" + i + "]=" 
					      + Utility.hex(target.charAt(i)) 
					      + " is iClass=" + iClass);
                if (DEBUG) System.out.println(" bubbleAppend: for ch="+ Utility.hex(ch) 
					      + " class="+cclass);
                if (iClass <= cclass) {
                    // We've hit something we can't bubble this character past, so insert here
                    break;
                }
            }
        }
        // We need to insert just after character "i"
        if (DEBUG) System.out.println(" bubbleAppend inserting "+ Utility.hex(ch)
				      + " at index " + (i+1));

        target.insert(i+1, ch);

        if (DEBUG) System.out.println(" target is : " + Utility.hex(target));
    }

    private static int getComposeClass(char ch) {
        int cclass = 0;
        int charInfo = composeLookup(ch);
        int type = charInfo & ComposeData.TYPE_MASK;
        if (type == ComposeData.COMBINING) {
            cclass = ComposeData.typeBit[charInfo >>> ComposeData.INDEX_SHIFT];
        }
        return cclass;
    }

    static final int composeLookup(char ch) {
        return ComposeData.lookup.elementAt(ch);
    }

    static final int composeAction(int baseIndex, int comIndex) {
        return ComposeData.actions.elementAt((char)(baseIndex
                                            + ComposeData.MAX_BASES*comIndex));
    }

    static final void explode(StringBuffer target, int index) {
        char ch;
        while ((ch = ComposeData.replaceCharAt(index++)) != 0)
            target.append(ch);
    }

    static final char pairExplode(StringBuffer target, int action) {
        int index = ComposeData.actionIndex[action - ComposeData.MAX_COMPOSED];
        explode(target, index + 1);
        return ComposeData.replaceCharAt(index);   // New base char
    }


    //-------------------------------------------------------------------------
    // Decompose methods
    //-------------------------------------------------------------------------

    /**
     * Static method to decompose a <tt>String</tt>.
     * <p>
     * The <tt>options</tt> parameter specifies which optional
     * <tt>Normalizer</tt> features are to be enabled for this operation.
     * Currently the only available option is {@link #IGNORE_HANGUL}.
     * The desired options should be OR'ed together to determine the value
     * of this argument.  If you want the default behavior corresponding
     * to Unicode Normalization Form <b>D</b> or <b>KD</b>,
     * use 0 for this argument.
     * <p>
     * @param str   the string to be decomposed.
     *
     * @param compat    Perform compatibility decomposition.
     *                  If this argument is <tt>false</tt>, only canonical
     *                  decomposition will be performed.
     *
     *
     * @return      the decomposed string.
     */
    public static String decompose(String source, boolean compat, int options)
    {
        return decompose(source, compat, options, false);
    }

    public static String decompose(String source, boolean compat, int options, boolean addSingleQuotation)
    {
        if (DEBUG) System.out.println("--------------- top of decompose() ---------------");

        boolean hangul = (options & IGNORE_HANGUL) == 0;
        int minDecomp = compat ? 0 : DecompData.MAX_COMPAT;

        StringBuffer result = new StringBuffer();
        StringBuffer buffer = null;
        StringBuffer tmpBuf = null;

        int i = 0, bufPtr = -1;

        if (addSingleQuotation) {
            tmpBuf = new StringBuffer();
        }

        while (i < source.length() || bufPtr >= 0)
        {
            char ch;

            if (bufPtr >= 0) {
                ch = buffer.charAt(bufPtr++);
                if (bufPtr == buffer.length()) {
                    bufPtr = -1;
                }
            } else {
                ch = source.charAt(i++);
            }

            int offset = DecompData.offsets.elementAt(ch);
            int index = offset & DecompData.DECOMP_MASK;

            if (DEBUG) System.out.println("decompose got " + Utility.hex(ch));

            if (index > minDecomp) {
                if ((offset & DecompData.DECOMP_RECURSE) != 0) {
                    if (DEBUG) System.out.println(" " + Utility.hex(ch) 
						  + " has RECURSIVE decomposition, index=" + index);
                    if (buffer == null) {
                        buffer = new StringBuffer();
                    } else {
                        buffer.setLength(0);
                    }
                    DecompData.doAppend(index, buffer);
                    bufPtr = 0;
                } else {
                    if (DEBUG) System.out.println(" " + Utility.hex(ch) 
						  + " has decomposition, index=" + index);
                    if (!addSingleQuotation) {
                        DecompData.doAppend(index, result);
                    } else {
                        tmpBuf.setLength(0);
                        DecompData.doAppend(index, tmpBuf);
                        if ((tmpBuf.length() > 1) ||
                            (ch == 0x037e) ||	// normalized to ';'
                            (ch == 0x1fef)) {	// normalized to '`'
                            for (int j=0; j < tmpBuf.length(); j++) {
                                char c = tmpBuf.charAt(j);
                                if ((c >= 0x0009 && c <= 0x000D) ||
                                    (c >= 0x0020 && c <= 0x002F) ||
                                    (c >= 0x003A && c <= 0x0040) ||
                                    (c >= 0x005B && c <= 0x0060) ||
                                    (c >= 0x007B && c <= 0x007E)) {
                                    result.append('\'');
                                    result.append(c);
                                    result.append('\'');
                                } else { 
                                    result.append(c);
                                }
                            }
                        } else {
                            result.append(tmpBuf);
                        }
                    }
                }
            } else if (ch >= HANGUL_BASE && ch < HANGUL_LIMIT && hangul) {
                hangulToJamo(ch, result, minDecomp);
            } else {
                result.append(ch);
            }
        }
        fixCanonical(result);
        return result.toString();
    }

    /**
     * Decompose starting with current input character and continuing
     * until just before the next base char.
     * <p>
     * <b>Input</b>:
     * <ul>
     *  <li>underlying char iter points to first character to decompose
     * </ul>
     * <p>
     * <b>Output:</b>
     * <ul>
     *  <li>returns first char of decomposition or DONE if at end
     *  <li>Underlying char iter is pointing at next base char or past end
     * </ul>
     */
    private char nextDecomp()
    {
        if (DEBUG) System.out.println("--------------- top of nextDecomp() ---------------");

        boolean hangul = (options & IGNORE_HANGUL) == 0;
        currentIndex =  nextIndex;
        char ch = curForward();

        int offset = DecompData.offsets.elementAt(ch);
        int index = offset & DecompData.DECOMP_MASK;

        initBuffer();

        if (index > minDecomp || DecompData.canonClass.elementAt(ch) != DecompData.BASE)
        {
            if (index > minDecomp) {
                if (DEBUG) System.out.println(" " + Utility.hex(ch) 
					      + " has decomposition, index=" + index);
                DecompData.doAppend(index, buffer);

                if ((offset & DecompData.DECOMP_RECURSE) != 0) {
                    // Need to decompose the output of this decomposition recursively.
                    for (int i = 0; i < getBufferLength(); i++) {
                        ch = buffer.charAt(i);
                        index = DecompData.offsets.elementAt(ch) & DecompData.DECOMP_MASK;

                        if (index > minDecomp) {
                            i += DecompData.doReplace(index, buffer, i);
                        }
                    }
                }
            } else {
                buffer.append(ch);
            }
            boolean needToReorder = false;

            // Any other combining chacters that immediately follow the decomposed
            // character must be included in the buffer too, because they're
            // conceptually part of the same logical character.
            while ((ch = text.next()) != DONE
                && DecompData.canonClass.elementAt(ch) != DecompData.BASE)
            {
                needToReorder = true;
                // Decompose any of these characters that need it - Liu
                index = DecompData.offsets.elementAt(ch) & DecompData.DECOMP_MASK;
                if (index > minDecomp) {
                    DecompData.doAppend(index, buffer);
                } else {
                    buffer.append(ch);
                }
            }

            if (getBufferLength() > 1 && needToReorder) {
                // If there is more than one combining character in the buffer,
                // put them into the canonical order.
                // But we don't need to sort if only characters are the ones that
                // resulted from decomosing the base character.
                fixCanonical(buffer);
            }
            ch = buffer.charAt(0);
        } else {
            // Just use this character, but first advance to the next one
            text.next();
            buffer.setLength(0);
            buffer.append(ch);
            // Do Hangul -> Jamo decomposition if necessary
            if (hangul && ch >= HANGUL_BASE && ch < HANGUL_LIMIT) {
                clearBuffer();
                hangulToJamo(ch, buffer, minDecomp);
                ch = buffer.charAt(0);
            }
        }
        nextIndex = text.getIndex();

        if (DEBUG) System.out.println("nextDecomp getBufferLength() "
				      + getBufferLength() + " buffer : " + buffer.toString());
        if (DEBUG) System.out.println("nextDecomp returning " + Utility.hex(ch) 
				      + ", text index=" + text.getIndex());
        return ch;
    }

    /**
     * Decompose starting with the input char just before the current position
     * and continuing backward until (and including) the previous base char.
     * <p>
     * <b>Input</b>:
     * <ul>
     *  <li>underlying char iter points just after last char to decompose
     * </ul>
     * <p>
     * <b>Output:</b>
     * <ul>
     *  <li>returns last char of resulting decomposition sequence
     *  <li>underlying iter points to lowest-index char we decomposed, i.e. the base char
     * </ul>
     */
    private char prevDecomp() {
        if (DEBUG) System.out.println("--------------- top of prevDecomp() ---------------");

        boolean hangul = (options & IGNORE_HANGUL) == 0;

        nextIndex=currentIndex;

        char ch = curBackward();

        int offset = DecompData.offsets.elementAt(ch);
        int index = offset & DecompData.DECOMP_MASK;

        if (DEBUG) System.out.println("prevDecomp got input char " + Utility.hex(ch));

        initBuffer();

        if (index > minDecomp || DecompData.canonClass.elementAt(ch) != DecompData.BASE)
        {
            // This method rewritten to pass conformance tests. - Liu
            // Collect all characters up to the previous base char
            while (ch != DONE) {
                buffer.insert(0, ch);
                if (DecompData.canonClass.elementAt(ch) == DecompData.BASE) break;
                ch = text.previous();
            }

            if (DEBUG) System.out.println("prevDecomp buffer: " + Utility.hex(buffer));

            // Decompose the buffer
            for (int i = 0; i < getBufferLength(); i++) {
                ch = buffer.charAt(i);
                offset = DecompData.offsets.elementAt(ch);
                index = offset & DecompData.DECOMP_MASK;

                if (index > minDecomp) {
                    int j = DecompData.doReplace(index, buffer, i);
                    if ((offset & DecompData.DECOMP_RECURSE) != 0) {
                        // Need to decompose this recursively
                        for (; i < j; ++i) {
                            ch = buffer.charAt(i);
                            index = DecompData.offsets.elementAt(ch) & DecompData.DECOMP_MASK;
                            if (index > minDecomp) {
                                i += DecompData.doReplace(index, buffer, i);
                            }
                        }
                    }
                    i = j;
                }
            }

            if (DEBUG) System.out.println("prevDecomp buffer after decomp: " + Utility.hex(buffer));

            if (getBufferLength() > 1) {
                // If there is more than one combining character in the buffer,
                // put them into the canonical order.
                fixCanonical(buffer);
            }
            bufferPos = getBufferLength()-1;
            ch = buffer.charAt(bufferPos);
        }
        else if (hangul && ch >= HANGUL_BASE && ch < HANGUL_LIMIT) {
            hangulToJamo(ch, buffer, minDecomp);
            getBufferLength();
            bufferPos = getBufferLength()-1;
            ch = buffer.charAt(bufferPos);
        }
        else {
            buffer.append(ch);
            getBufferLength();
            bufferPos=getBufferLength()-1;
        }

        currentIndex = text.getIndex();

        if (DEBUG) System.out.println(" prevDecomp getBufferLength() "
				      + getBufferLength() + " buffer : " + buffer.toString());
        if (DEBUG) System.out.println(" prevDecomp returning '" + ch + "' " + Utility.hex(ch) 
				      + ", text index=" + text.getIndex());
        return ch;
    }

    public static final int getClass(char ch) {
        int value = DecompData.canonClass.elementAt(ch);
        return (value >= 0) ? value : value + 256;
    }


    //-------------------------------------------------------------------------
    // CharacterIterator overrides
    //-------------------------------------------------------------------------

    /**
     * Return the current character in the normalized text.
     */
    public char current() {
        if (bufferPos >= getBufferLength() || getBufferLength()==0) {
            bufferPos = 0;    // Buffer is now out of date
            // i have a problem with this, we are moving one away from
            // the current position, which isn't right.
            // even when currentIndex is still at the present position
            // text.index would have shifted because of next*()
            // we have to reset the text to the former position
            // admittedly, this isn't the best solution
            if (mode.compose()) {
                currentChar = nextCompose();
                text.setIndex(currentIndex);
            }
            else if (mode.decomp()) {
                currentChar = nextDecomp();
                text.setIndex(currentIndex);
            }
            else {
                if (currentIndex == 0) {
                    currentChar = text.current();
                } else{
                    /* text.setIndex(currentIndex - 1);
                    currentChar = text.next(); */
                    currentChar = text.current();
                }
            }        
        }
        else {
            currentChar = buffer.charAt(bufferPos);
        }

        return currentChar;
    }

    /**
     * Return the first character in the normalized text.  This resets
     * the <tt>Normalizer's</tt> position to the beginning of the text.
     */
    public char first() {
        reset();
        return next();
    }

    /**
     * Return the last character in the normalized text.  This resets
     * the <tt>Normalizer's</tt> position to be just before the
     * the input text corresponding to that normalized character.
     */
    public char last() {
        currentIndex = nextIndex = text.getEndIndex() - 1;
        text.setIndex(currentIndex);  // Setting to getEndIndex() fails in 1.1
        atEnd = true;                           // so work around the bug
        currentChar = DONE;                     // The current char hasn't been processed
        clearBuffer();                          // The buffer is empty too
        return previous();
    }

    /**
     * Return the current character in the normalized text and advance
     * the iteration position by one.  If the end
     * of the text has already been reached, {@link #DONE} is returned.
     */
    public char next() {
        if (buffer != null && (++bufferPos) < buffer.length()) {
            currentChar= buffer.charAt(bufferPos);
        } else {
            bufferPos = 0;    // Buffer is now out of date
            if (mode.compose()) {
                currentChar = nextCompose();
            }
            else if (mode.decomp()) {
                currentChar = nextDecomp();
            }
            else {
                // If we're not really doing decomposition, just return the current char
                currentChar = text.current();
                text.next();

                //Set the indicies for no op
                if (currentChar != CharacterIterator.DONE) {
                	currentIndex = nextIndex+=1;
                }
            }
        }
        return currentChar;
    }

    /**
     * Return the previous character in the normalized text and decrement
     * the iteration position by one.  If the beginning
     * of the text has already been reached, {@link #DONE} is returned.
     */
    public char previous() {
        if (bufferPos > 0) {
            // There are output characters left in the buffer
            currentChar = buffer.charAt(--bufferPos);
        }
        else {
            bufferPos = 0;    // Buffer is now out of date
            if (mode.compose()) {
                currentChar = prevCompose();
            }
            else if (mode.decomp()) {
                currentChar = prevDecomp();
            }
            else {
                text.setIndex(currentIndex);
                currentChar = text.previous();
                if (currentIndex != 0) {
                    currentIndex = nextIndex -= 1;
                }
            }
        }
        return currentChar;
    }

    private int getBufferLength() {
        if (buffer == null){
            return 0;
        } else {
            return buffer.length();
        }
    }

    /**
     * Set the iteration position in the input text that is being normalized
     * and return the first normalized character at that position.
     * <p>
     * @param index the desired index in the input text.
     *
     * @return      the first normalized character that is the result of iterating
     *              forward starting at the given index.
     *
     * @throws IllegalArgumentException if the given index is less than
     *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
     */
    public char setIndex(int index) {
        setIndexOnly(index);
        return current();
    }

    public void setIndexOnly(int index) {
        currentIndex = nextIndex = index;
        text.setIndex(index);   // Checks range
        currentChar = DONE;     // The current char hasn't been processed
        clearBuffer();          // The buffer is empty too
    }

    /**
     * Retrieve the current iteration position in the input text that is
     * being normalized.  This method is useful in applications such as
     * searching, where you need to be able to determine the position in
     * the input text that corresponds to a given normalized output character.
     */
    public final int getIndex() {
        return text.getIndex();
    }

    /**
     * Retrieve the index of the start of the input text.  This is the begin index
     * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
     * over which this <tt>Normalizer</tt> is iterating
     */
    public final int getBeginIndex() {
        return text.getBeginIndex();
    }

    /**
     * Retrieve the index of the end of the input text.  This is the end index
     * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
     * over which this <tt>Normalizer</tt> is iterating
     */
    public final int getEndIndex() {
        return text.getEndIndex();
    }

    //-------------------------------------------------------------------------
    // Property access methods
    //-------------------------------------------------------------------------

    /**
     * Set the normalization mode for this object.
     * <p>
     * <b>Note:</b>If the normalization mode is changed while iterating
     * over a string, calls to {@link #next} and {@link #previous} may
     * return previously buffers characters in the old normalization mode
     * until the iteration is able to re-sync at the next base character.
     * It is safest to call {@link #setText setText()}, {@link #first},
     * {@link #last}, etc. after calling <tt>setMode</tt>.
     * <p>
     * @param newMode the new mode for this <tt>Normalizer</tt>.
     * The supported modes are:
     * <ul>
     *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
     *                                  followed by canonical composition.
     *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
     *                                  follwed by canonical composition.
     *  <li>{@link #DECOMP}         - Unicode canonical decomposition
     *  <li>{@link #DECOMP_COMPAT}  - Unicode compatibility decomposition.
     *  <li>{@link #NO_OP}          - Do nothing but return characters
     *                                  from the underlying input text.
     * </ul>
     *
     * @see #getMode
     */
    public void setMode(Mode newMode) {
        mode = newMode;
        minDecomp = mode.compat() ? 0 : DecompData.MAX_COMPAT;
    }

    /**
     * Return the basic operation performed by this <tt>Normalizer</tt>
     *
     * @see #setMode
     */
    public Mode getMode() {
        return mode;
    }

    /**
     * Set options that affect this <tt>Normalizer</tt>'s operation.
     * Options do not change the basic composition or decomposition operation
     * that is being performed , but they control whether
     * certain optional portions of the operation are done.
     * Currently the only available option is:
     * <p>
     * <ul>
     *   <li>{@link #IGNORE_HANGUL} - Do not decompose Hangul syllables into the Jamo alphabet
     *          and vice-versa.  This option is off by default (<i>i.e.</i> Hangul processing
     *          is enabled) since the Unicode standard specifies that Hangul to Jamo
     *          is a canonical decomposition.  For any of the standard Unicode Normalization
     *          Forms, you should leave this option off.
     * </ul>
     * <p>
     * @param   option  the option whose value is to be set.
     * @param   value   the new setting for the option.  Use <tt>true</tt> to
     *                  turn the option on and <tt>false</tt> to turn it off.
     *
     * @see #getOption
     */
    public void setOption(int option, boolean value) {
        if (option != IGNORE_HANGUL) {
            throw new IllegalArgumentException("Illegal option");
        }
        if (value) {
            options |= option;
        } else {
            options &= (~option);
        }
    }

    /**
     * Determine whether an option is turned on or off.
     * <p>
     * @see #setOption
     */
    public boolean getOption(int option) {
        return (options & option) != 0;
    }

    /**
     * Set the input text over which this <tt>Normalizer</tt> will iterate.
     * The iteration position will be reset to the beginning.
     * <p>
     * @param newText   The new string to be normalized.
     */
    public void setText(String newText) {
        text = new StringCharacterIterator(newText);
        reset();
    }

    /**
     * Set the input text over which this <tt>Normalizer</tt> will iterate.
     * The iteration position will be reset to the beginning.
     * <p>
     * @param newText   The new text to be normalized.
     */
    public void setText(CharacterIterator newText) {
        text = newText;
        reset();
    }


    //-------------------------------------------------------------------------
    // Private utility methods
    //-------------------------------------------------------------------------

    private final char curForward() {
        char ch = text.current();
        if (DEBUG) System.out.println(" curForward returning " + Utility.hex(ch) 
				      + ", text index=" + text.getIndex());
        return ch;
    }

    private final char curBackward() {
        char ch = atEnd ? text.current() : text.previous();
        atEnd = false;
        if (DEBUG) System.out.println(" curBackward returning " + Utility.hex(ch) 
				      + ", text index=" + text.getIndex());
        return ch;
    }



    public void reset() {
        currentIndex = nextIndex = text.getBeginIndex();
        text.setIndex(currentIndex);
        atEnd = false;
        bufferPos = 0;
        clearBuffer();
    }

    private final void initBuffer() {
        if (buffer == null) {
            buffer = new StringBuffer(10);
        } else {
            buffer.setLength(0);
        }
        clearBuffer();
    }

    private final void clearBuffer() {
       bufferPos = 0;
       if (buffer != null) {
           buffer.setLength(0);
       }
    }

    /**
     * Fixes the sorting sequence of non-spacing characters according to
     * their combining class.  The algorithm is listed on p.3-11 in the
     * Unicode Standard 2.0.  The table of combining classes is on p.4-2
     * in the Unicode Standard 2.0.
     * @param result the string to fix.
     */
    private static void fixCanonical(StringBuffer result) {
        if (result.length() == 0) return; // don't bother with empty strings!

        int i = result.length() - 1;
        int currentType = getClass(result.charAt(i));
        int lastType;

        for (--i; i >= 0; --i) {
            lastType = currentType;
            currentType = getClass(result.charAt(i));

            //
            // a swap is presumed to be rare (and a double-swap very rare),
            // so don't worry about efficiency here.
            //
            if (currentType > lastType && lastType != DecompData.BASE) {
                // swap characters
                char temp = result.charAt(i);
                result.setCharAt(i, result.charAt(i+1));
                result.setCharAt(i+1, temp);
                // if not at end, backup (one further, to compensate for for-loop)
                if (i < result.length() - 2) {
                    i += 2;
                }
                // reset type, since we swapped.
                currentType = getClass(result.charAt(i));
            }
        }
    }

    //-------------------------------------------------------------------------
    // Hangul / Jamo conversion utilities for internal use
    // See section 3.10 of The Unicode Standard, v 2.0.
    //

    // Package-accessible for use by ComposedCharIter
    static final char HANGUL_BASE   = 0xac00;
    static final char HANGUL_LIMIT  = 0xd7a4;

    private static final char JAMO_LBASE    = 0x1100;
    private static final char JAMO_VBASE    = 0x1161;
    private static final char JAMO_TBASE    = 0x11a7;
    private static final int  JAMO_LCOUNT   = 19;
    private static final int  JAMO_VCOUNT   = 21;
    private static final int  JAMO_TCOUNT   = 28;
    private static final int  JAMO_NCOUNT   = JAMO_VCOUNT * JAMO_TCOUNT;

    /**
     * Convert a single Hangul syllable into one or more Jamo characters.
     *
     * @param conjoin If true, decompose Jamo into conjoining Jamo.
     */
    static int hangulToJamo(char ch, StringBuffer result, int decompLimit) {
        char sIndex  = (char)(ch - HANGUL_BASE);
        char leading = (char)(JAMO_LBASE + sIndex / JAMO_NCOUNT);
        char vowel   = (char)(JAMO_VBASE +
                              (sIndex % JAMO_NCOUNT) / JAMO_TCOUNT);
        char trailing= (char)(JAMO_TBASE + (sIndex % JAMO_TCOUNT));

        int length = 0;

        length += jamoAppend(leading, decompLimit, result);
        length += jamoAppend(vowel, decompLimit, result);
        if (trailing != JAMO_TBASE) {
            length += jamoAppend(trailing, decompLimit, result);
        }
        return length;
    }

    static final int jamoAppend(char ch, int limit, StringBuffer dest) {
        int offset = DecompData.offsets.elementAt(ch);
        if (offset > limit) {
            return DecompData.doAppend(offset, dest);
        } else {
            dest.append(ch);
            return 1;
        }
    }

    static private void jamoToHangul(StringBuffer buffer, int start) {
        int out = 0;
        int limit = buffer.length() - 1;

        int in, l, v, t;

        for (in = start; in < limit; in++) {
            char ch = buffer.charAt(in);

            if ((l = ch - JAMO_LBASE) >= 0 && l < JAMO_LCOUNT
                    && (v = buffer.charAt(in+1) - JAMO_VBASE) >= 0 && v < JAMO_VCOUNT) {
                //
                // We've found a pair of Jamo characters to compose.
                // Snarf the Jamo vowel and see if there's also a trailing char
                //
                in++;   // Snarf the Jamo vowel too.

                t = (in < limit) ? buffer.charAt(in+1) : 0;
                t -= JAMO_TBASE;

                if (t >= 0 && t < JAMO_TCOUNT) {
                    in++;   // Snarf the trailing consonant too
                } else {
                    t = 0;  // No trailing consonant
                }
                buffer.setCharAt(out++, (char)((l*JAMO_VCOUNT + v) * JAMO_TCOUNT
                                               + t + HANGUL_BASE));
            } else {
                buffer.setCharAt(out++, ch);
            }
        }
        while (in < buffer.length()) {
            buffer.setCharAt(out++, buffer.charAt(in++));
        }

        buffer.setLength(out);
    }


    //-------------------------------------------------------------------------
    // Private data
    //-------------------------------------------------------------------------

    private static final boolean DEBUG = false;

    private Mode                mode = DECOMP;
    private int                 options = 0;
    private transient int       minDecomp;
    private int                 currentIndex = 0;
    private int                 nextIndex = 0;
    // The input text and our position in it
    private CharacterIterator   text;
    private boolean             atEnd = false;

    // A buffer for holding intermediate results
    private StringBuffer        buffer = null;
    private int                 bufferPos = 0;

    private char                currentChar;

    // Another buffer for use during iterative composition
    private static final int    EMPTY = -1;
    private StringBuffer        explodeBuf = null;

    // These must agree with the constants used in NormalizerBuilder
    static final int STR_INDEX_SHIFT = 2;
    static final int STR_LENGTH_MASK = 0x0003;
}