TestPNamePerf.java example

Explorer

aalto-xml-master
- src
  - main
    - java
      - com
        fasterxml
        aalto
        AsyncByteArrayFeeder.java
        AsyncByteBufferFeeder.java
        AsyncInputFeeder.java
        AsyncXMLInputFactory.java
        AsyncXMLStreamReader.java
        UncheckedStreamException.java
        ValidationException.java
        WFCException.java
        async
        AsyncByteArrayScanner.java
        AsyncByteBufferScanner.java
        AsyncByteScanner.java
        AsyncStreamReaderImpl.java
        package-info.java
        dom
        BijectiveNsMap.java
        DOMOutputElement.java
        DOMReaderImpl.java
        DOMWriterImpl.java
        OutputElementBase.java
        evt
        EventAllocatorImpl.java
        EventReaderImpl.java
        IncompleteEvent.java
        impl
        CommonConfig.java
        ErrorConsts.java
        IoStreamException.java
        LocationImpl.java
        StreamExceptionBase.java
        in
        AttributeCollector.java
        ByteBasedPName.java
        ByteBasedPNameFactory.java
        ByteBasedPNameTable.java
        ByteBasedScanner.java
        ByteSourceBootstrapper.java
        CharBasedPNameTable.java
        CharSourceBootstrapper.java
        ElementScope.java
        EntityNames.java
        FixedNsContext.java
        InputBootstrapper.java
        InputCharTypes.java
        MergedStream.java
        NsBinding.java
        NsDeclaration.java
        PName.java
        PName1.java
        PName2.java
        PName3.java
        PNameC.java
        PNameN.java
        ReaderConfig.java
        ReaderScanner.java
        StreamScanner.java
        Utf32Reader.java
        Utf8Scanner.java
        XmlScanner.java
        io
        UTF8Writer.java
        out
        AsciiXmlWriter.java
        ByteWName.java
        ByteXmlWriter.java
        CharWName.java
        CharXmlWriter.java
        Latin1XmlWriter.java
        NonRepairingStreamWriter.java
        NsBinder.java
        OutputCharTypes.java
        OutputElement.java
        RepairingStreamWriter.java
        SingleByteXmlWriter.java
        StreamWriterBase.java
        Utf8XmlWriter.java
        WName.java
        WNameFactory.java
        WNameTable.java
        WriterConfig.java
        XmlWriter.java
        package-info.java
        sax
        SAXFeature.java
        SAXParserFactoryImpl.java
        SAXParserImpl.java
        SAXProperty.java
        SAXUtil.java
        stax
        EventFactoryImpl.java
        InputFactoryImpl.java
        OutputFactoryImpl.java
        StreamReaderImpl.java
        util
        BufferRecycler.java
        CharsetNames.java
        DataUtil.java
        EmptyIterator.java
        IllegalCharHandler.java
        NameTable.java
        SingletonIterator.java
        TextAccumulator.java
        TextBuilder.java
        TextUtil.java
        URLUtil.java
        UriCanonicalizer.java
        XmlCharTypes.java
        XmlChars.java
        XmlConsts.java
        XmlNames.java
      - test
        BasePerfTest.java
        RunStreamWriter.java
        TestAsyncReader.java
        TestBase64Reader.java
        TestEventReader.java
        TestLineReader.java
        TestNameHashing.java
        TestNameTable.java
        TestPNamePerf.java
        TestRawStream.java
        TestSaxReader.java
        TestScannerPerf.java
        TestStreamCopier.java
        TestStreamReader.java
        TestTypedSpeed.java
        TestUTF8.java
  - test
    - java

package test;

import java.io.*;
import javax.xml.stream.*;


import com.fasterxml.aalto.in.*;
import com.fasterxml.aalto.util.*;

public final class TestPNamePerf
{
    final static int INT_A = 'A';

    final int mRepCount;

    int mTmpChar = 0;

    final byte[] mInputBuffer;

    final ByteBasedPNameTable mSymbols;

    final XmlCharTypes mCharTypes;

    int mInputPtr;

    int mInputLen;

    protected int[] mQuadBuffer = new int[64];

    protected char[] mNameBuffer = new char[100];

    public TestPNamePerf(byte[] data, int repCount)
    {
        mInputBuffer = data;
        mInputLen = data.length;
        mRepCount = repCount;
        ReaderConfig cfg = new ReaderConfig();
        cfg.setActualEncoding(CharsetNames.CS_UTF8);
        mSymbols = cfg.getBBSymbols();
        mCharTypes = cfg.getCharTypes();
    }

    public void test()
        throws IOException, XMLStreamException
    {
       int round = 0;

       for (; true; ++round) {
           String msg = "[null]";
           int total = 0;

           final int TYPES = 3;

           long now = System.currentTimeMillis();
           //switch (round % TYPES) {
           switch (0) {
           case 1:
               msg = "[Regular]";
               total = testRegularA();
               break;
           case 2:
               msg = "[New]";
               total = testNewA();
               break;
           case 0:
               msg = "[New/2]";
               total = testNew2A();
               break;
           default:
               throw new Error("Unexpected round, #"+round);
           }

           now = System.currentTimeMillis() - now;
           System.out.println(msg+" -> "+now+" msecs (total "+total+")");

           if ((round % TYPES) == 0) {
               System.out.println();
           }

           try { Thread.sleep(200L); } catch (Exception e) { }
           System.gc();
           try { Thread.sleep(200L); } catch (Exception e) { }
       }
    }

    private int testRegularA()
        throws IOException, XMLStreamException
    {
        int total = 0;
        for (int i = 0; i < mRepCount; ++i) {
            mInputPtr = 0;
            total += testRegular();
        }
        return total;
    }
    private int testNewA()
        throws IOException, XMLStreamException
    {
        int total = 0;
        for (int i = 0; i < mRepCount; ++i) {
            mInputPtr = 0;
            total += testNew();
        }
        return total;
    }
    private int testNew2A()
        throws IOException, XMLStreamException
    {
        int total = 0;
        for (int i = 0; i < mRepCount; ++i) {
            mInputPtr = 0;
            total += testNew2();
        }
        return total;
    }

    private int testRegular()
        throws IOException, XMLStreamException
    {
        ByteBasedPName name = null;
        int count = 0;

        while (mInputPtr < mInputLen) {
            byte b = mInputBuffer[mInputPtr++];
            int ch = (int) b & 0xFF;
            /* We'll skip all intervening chars that can't start a name,
             * including white space
             */
            if (ch >= INT_A) {
                name = parsePName(b);
                count += name.sizeInQuads();
            }
        }
        return count + name.sizeInQuads();
    }

    private int testNew()
        throws IOException, XMLStreamException
    {
        ByteBasedPName name = null;
        int count = 0;

        while (mInputPtr < mInputLen) {
            byte b = mInputBuffer[mInputPtr++];
            int ch = (int) b & 0xFF;
            /* We'll skip all intervening chars that can't start a name,
             * including white space
             */
            if (ch >= INT_A) {
                name = parsePNameNew(b);
                count += name.sizeInQuads();
            }
        }
        return count + name.sizeInQuads();
    }

    private int testNew2()
        throws IOException, XMLStreamException
    {
        ByteBasedPName name = null;
        int count = 0;

        while (mInputPtr < mInputLen) {
            byte b = mInputBuffer[mInputPtr++];
            int ch = (int) b & 0xFF;
            /* We'll skip all intervening chars that can't start a name,
             * including white space
             */
            if (ch >= INT_A) {
                name = parsePNameNew2(b);
                count += name.sizeInQuads();
            }
        }
        return count + name.sizeInQuads();
    }

    protected ByteBasedPName parsePName(byte b)
        throws XMLStreamException
    {
        int q = b & 0xFF;

        if (q < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            reportError("; expected a name start character");
        }

        int[] quads = mQuadBuffer;
        int qix = 0;
        int firstQuad = 0;

        while (true) {
            // Second byte
            if (mInputPtr >= mInputLen) {
                loadMoreGuaranteed();
            }
            int i2 = mInputBuffer[mInputPtr++] & 0xFF;
            /* For other bytes beyond first we have to do bit more complicated
             * check, to reliably find out where name ends. Still can do quite
             * simple checks though
             */
            if (i2 < 65) {
                // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
                if (i2 < 45 || i2 > 58 || i2 == 47) {
                    // End of name, a single ascii char?
                    return findPName(q, 1, firstQuad, qix, quads);
                }
            }
            // 3rd byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, 2, firstQuad, qix, quads);
                }
            }
            // 4th byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, 3, firstQuad, qix, quads);
                }
            }
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, 4, firstQuad, qix, quads);
                }
            }
            if (qix == 0) { // not yet, was the first quad
                firstQuad = q;
            } else if (qix == 1) { // second quad, need to init buffer
                quads[0] = firstQuad;
                quads[1] = q;
            } else { // 3rd or after... need to make sure there's room
                if (qix >= quads.length) { // let's just double?
                    mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
                }
                quads[qix] = q;
            }
            ++qix;
            q = i2;
        }
    }

    protected ByteBasedPName parsePNameNew(byte b)
        throws XMLStreamException
    {
        // First: can we optimize out bounds checks?
        if ((mInputLen - mInputPtr) < 8) { // got 1 byte, but need 7, plus one trailing
            return parsePName(b);
        }

        int q1 = b & 0xFF;

        if (q1 < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            reportError("; expected a name start character");
        }

        // If so, can also unroll loops nicely
        int i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, 1);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, 2);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
       if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, 3);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, 4);
            }
        }

        // Ok, so far so good; one quad, one byte. Then the second
        int q2 = i2;
        i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, q2, 1);
            }
        }

        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, q2, 2);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, q2, 3);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, q2, 4);
            }
        }

        // Ok, no, longer loop. Let's offline
        int[] quads = mQuadBuffer;
        quads[0] = q1;
        quads[1] = q2;
        return parsePNameNewLong(i2, quads);
    }

    protected ByteBasedPName parsePNameNew2(byte b)
        throws XMLStreamException
    {
        // First: can we optimize out bounds checks?
        if ((mInputLen - mInputPtr) < 8) { // got 1 byte, but need 7, plus one trailing
            return parsePName(b);
        }

        int q1 = b & 0xFF;
        if (q1 < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            reportError("; expected a name start character");
        }

        // If so, can also unroll loops nicely
        int i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, 1);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, 2);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
       if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, 3);
            }
        }
        q1 = (q1 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, 4);
            }
        }

        // Longer, let's offline:
        return parsePNameNewMedium(i2, q1);
    }

    protected ByteBasedPName parsePNameNewMedium(int i2, int q1)
        throws XMLStreamException
    {
        // Ok, so far so good; one quad, one byte. Then the second
        int q2 = i2;
        i2 = mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            // Ok; "_" (45), "." (46) and "0"-"9"/":" (48 - 57/58) still name chars
            if (i2 < 45 || i2 > 58 || i2 == 47) {
                return findPName(q1, q2, 1);
            }
        }

        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                return findPName(q1, q2, 2);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 3 (ascii) char name?
                return findPName(q1, q2, 3);
            }
        }
        q2 = (q2 << 8) | i2;
        i2 = (int) mInputBuffer[mInputPtr++] & 0xFF;
        if (i2 < 65) {
            if (i2 < 45 || i2 > 58 || i2 == 47) { // 4 (ascii) char name?
                return findPName(q1, q2, 4);
            }
        }

        // Ok, no, longer loop. Let's offline
        int[] quads = mQuadBuffer;
        quads[0] = q1;
        quads[1] = q2;
        return parsePNameNewLong(i2, quads);
    }

    protected ByteBasedPName parsePNameNewLong(int q, int[] quads)
        throws XMLStreamException
    {
        int qix = 2;
        while (true) {
            // Second byte of a new quad
            if (mInputPtr >= mInputLen) {
                loadMoreGuaranteed();
            }
            int i2 = mInputBuffer[mInputPtr++] & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) {
                    // End of name, a single ascii char?
                    return findPName(q, quads, qix, 1);
                }
            }
            // 3rd byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, quads, qix, 2);
                }
            }
            // 4th byte:
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, quads, qix, 3);
                }
            }
            q = (q << 8) | i2;
            i2 = (int) ((mInputPtr < mInputLen) ? mInputBuffer[mInputPtr++] : loadOne()) & 0xFF;
            if (i2 < 65) {
                if (i2 < 45 || i2 > 58 || i2 == 47) { // 2 (ascii) char name?
                    return findPName(q, quads, qix, 4);
                }
            }
            if (qix >= quads.length) { // let's just double?
                mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
            }
            quads[qix] = q;
            ++qix;
            q = i2;
        }
    }

    private final ByteBasedPName findPName(int onlyQuad, int lastByteCount)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        int hash = ByteBasedPNameTable.calcHash(onlyQuad);
        ByteBasedPName name = mSymbols.findSymbol(hash, onlyQuad, 0);
        if (name == null) {
            // Let's simplify things a bit, and just use array based one then:
            mQuadBuffer[0] = onlyQuad;
            name = addPName(hash, mQuadBuffer, 1, lastByteCount);
        }
        return name;
    }

    private final ByteBasedPName findPName(int firstQuad, int secondQuad,
                                  int lastByteCount)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        int hash = ByteBasedPNameTable.calcHash(firstQuad, secondQuad);
        ByteBasedPName name = mSymbols.findSymbol(hash, firstQuad, secondQuad);
        if (name == null) {
            // Let's just use array, then
            mQuadBuffer[0] = firstQuad;
            mQuadBuffer[1] = secondQuad;
            name = addPName(hash, mQuadBuffer, 2, lastByteCount);
        }
        return name;
    }

    private final ByteBasedPName findPName(int lastQuad, int[] quads, int qlen, int lastByteCount)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        /* Nope, long (3 quads or more). At this point, the last quad is
         * not yet in the array, let's add:
         */
        if (qlen >= quads.length) { // let's just double?
            mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
        }
        quads[qlen++] = lastQuad;
        int hash = ByteBasedPNameTable.calcHash(quads, qlen);
        ByteBasedPName name = mSymbols.findSymbol(hash, quads, qlen);
        if (name == null) {
            name = addPName(hash, quads, qlen, lastByteCount);
        }
        return name;
    }

    private final ByteBasedPName findPName(int lastQuad, int lastByteCount, int firstQuad,
                                  int qlen, int[] quads)
        throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --mInputPtr;
        // Separate handling for short names:
        if (qlen <= 1) { // short name?
            if (qlen == 0) { // 4-bytes or less; only has 'lastQuad' defined
                int hash = ByteBasedPNameTable.calcHash(lastQuad, 0);
                ByteBasedPName name = mSymbols.findSymbol(hash, lastQuad, 0);
                if (name == null) {
                    // Let's simplify things a bit, and just use array based one then:
                    quads = mQuadBuffer;
                    quads[0] = lastQuad;
                    name = addPName(hash, quads, 1, lastByteCount);
                }
                return name;
            }

            int hash = ByteBasedPNameTable.calcHash(firstQuad, lastQuad);
            ByteBasedPName name = mSymbols.findSymbol(hash, firstQuad, lastQuad);
            if (name == null) {
                // As above, let's just use array, then
                quads = mQuadBuffer;
                quads[0] = firstQuad;
                quads[1] = lastQuad;
                name = addPName(hash, quads, 2, lastByteCount);
            }
            return name;
        }
        /* Nope, long (3 quads or more). At this point, the last quad is
         * not yet in the array, let's add:
         */
        if (qlen >= quads.length) { // let's just double?
            mQuadBuffer = quads = DataUtil.growArrayBy(quads, quads.length);
        }
        quads[qlen++] = lastQuad;
        int hash = ByteBasedPNameTable.calcHash(quads, qlen);
        ByteBasedPName name = mSymbols.findSymbol(hash, quads, qlen);
        if (name == null) {
            name = addPName(hash, quads, qlen, lastByteCount);
        }

        return name;
    }

    protected final ByteBasedPName addPName(int hash, int[] quads, int qlen, int lastQuadBytes)
        throws XMLStreamException
    {
        // 4 bytes per quad, except last one maybe less
        int byteLen = (qlen << 2) - 4 + lastQuadBytes;

        /* And last one is not correctly aligned (leading zero bytes instead
         * need to shift a bit, instead of trailing). Only need to shift it
         * for UTF-8 decoding; need revert for storage (since key will not
         * be aligned, to optimize lookup speed)
         */
        int lastQuad;

        if (lastQuadBytes < 4) {
            lastQuad = quads[qlen-1];
            // 8/16/24 bit left shift
            quads[qlen-1] = (lastQuad << ((4 - lastQuadBytes) << 3));
        } else {
            lastQuad = 0;
        }

        // Let's handle first char separately (different validation):
        int ch = (quads[0] >>> 24);
        boolean ok;
        int ix = 1;
        char[] cbuf = mNameBuffer;
        int cix  = 0;
        final int[] TYPES = mCharTypes.NAME_CHARS;

        switch (TYPES[ch]) {
        case XmlCharTypes.CT_NAME_NONE:
        case XmlCharTypes.CT_NAME_COLON: // not ok as first
        case XmlCharTypes.CT_NAME_NONFIRST:
        case InputCharTypes.CT_INPUT_NAME_MB_N:
            ok = false;
            break;
        case XmlCharTypes.CT_NAME_ANY:
            ok = true;
            break;
        default: // multi-byte (UTF-8) chars:
            {
                int needed;
                
                if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
                    ch &= 0x1F;
                    needed = 1;
                } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
                    ch &= 0x0F;
                    needed = 2;
                } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all...
                    ch &= 0x07;
                    needed = 3;
                } else { // 5- and 6-byte chars not valid xml chars
                    reportError(ch);
                    needed = ch = 1; // never really gets this far
                }
                if ((ix + needed) > byteLen) {
                    reportError(ch);
                }
                ix += needed;
                
                int q = quads[0];
                // Always need at least one more right away:
                int ch2 = (q >> 16) & 0xFF;
                if ((ch2 & 0xC0) != 0x080) {
                    reportError(ch2);
                }
                ch = (ch << 6) | (ch2 & 0x3F);
                
                /* And then may need more. Note: here we do not do all the
                 * checks that UTF-8 text decoder might do. Reason is that
                 * name validity checking methods handle most of such checks
                 */
                if (needed > 1) {
                    ch2 = (q >> 8) & 0xFF;
                    if ((ch2 & 0xC0) != 0x080) {
                        reportError(ch2);
                    }
                    ch = (ch << 6) | (ch2 & 0x3F);
                    if (needed > 2) { // 4 bytes? (need surrogates on output)
                        ch2 = q & 0xFF;
                        if ((ch2 & 0xC0) != 0x080) {
                            reportError(ch2 & 0xFF);
                        }
                        ch = (ch << 6) | (ch2 & 0x3F);
                    }
                }
                ok = XmlChars.is10NameStartChar(ch);
                if (needed > 2) { // outside of basic 16-bit range? need surrogates
                    /* so, let's first output first char (high surrogate),
                     * let second be output by later code
                     */
                    ch -= 0x10000; // to normalize it starting with 0x0
                    cbuf[cix++] = (char) (0xD800 + (ch >> 10));
                    ch = (0xDC00 | (ch & 0x03FF));
                }
            }
        }

        if (!ok) { // 0 to indicate it's first char, even with surrogates
            reportError(ch);
        }

        cbuf[cix++] = (char) ch; // the only char, or second (low) surrogate

        /* Whoa! Tons of code for just the start char. But now we get to
         * decode the name proper, at last!
         */
        int last_colon = -1;

        for (; ix < byteLen; ) {
            ch = quads[ix >> 2]; // current quad, need to shift+mask
            int byteIx = (ix & 3);
            ch = (ch >> ((3 - byteIx) << 3)) & 0xFF;
            ++ix;

            // Ascii?
            switch (TYPES[ch]) {
            case XmlCharTypes.CT_NAME_NONE:
            case XmlCharTypes.CT_MULTIBYTE_N:
                ok = false;
                break;
            case XmlCharTypes.CT_NAME_COLON: // not ok as first
                if (last_colon >= 0) {
                    reportError(0);
                }
                last_colon = cix;
                ok = true;
                break;
            case XmlCharTypes.CT_NAME_NONFIRST:
            case XmlCharTypes.CT_NAME_ANY:
                ok = true;
                break;
            default:
                {
                    int needed;
                    if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
                        ch &= 0x1F;
                        needed = 1;
                    } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
                        ch &= 0x0F;
                        needed = 2;
                    } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all...
                        ch &= 0x07;
                        needed = 3;
                    } else { // 5- and 6-byte chars not valid xml chars
                        reportError(ch);
                        needed = ch = 1; // never really gets this far
                    }
                    if ((ix + needed) > byteLen) {
                        reportError(cix);
                    }
                    
                    // Ok, always need at least one more:
                    int ch2 = quads[ix >> 2]; // current quad, need to shift+mask
                    byteIx = (ix & 3);
                    ch2 = (ch2 >> ((3 - byteIx) << 3));
                    ++ix;
                    
                    if ((ch2 & 0xC0) != 0x080) {
                        reportError(ch2);
                    }
                    ch = (ch << 6) | (ch2 & 0x3F);
                    
                    // Once again, some of validation deferred to name char validator
                    if (needed > 1) {
                        ch2 = quads[ix >> 2];
                        byteIx = (ix & 3);
                        ch2 = (ch2 >> ((3 - byteIx) << 3));
                        ++ix;
                        
                        if ((ch2 & 0xC0) != 0x080) {
                            reportError(ch2);
                        }
                        ch = (ch << 6) | (ch2 & 0x3F);
                        if (needed > 2) { // 4 bytes? (need surrogates on output)
                            ch2 = quads[ix >> 2];
                            byteIx = (ix & 3);
                            ch2 = (ch2 >> ((3 - byteIx) << 3));
                            ++ix;
                            if ((ch2 & 0xC0) != 0x080) {
                                reportError(ch2 & 0xFF);
                            }
                            ch = (ch << 6) | (ch2 & 0x3F);
                        }
                    }
                    ok = XmlChars.is10NameChar(ch);
                    if (needed > 2) { // surrogate pair? once again, let's output one here, one later on
                        ch -= 0x10000; // to normalize it starting with 0x0
                        if (cix >= cbuf.length) {
                            mNameBuffer = cbuf = DataUtil.growArrayBy(cbuf, cbuf.length);
                        }
                        cbuf[cix++] = (char) (0xD800 + (ch >> 10));
                        ch = 0xDC00 | (ch & 0x03FF);
                    }
                }
            }
            if (!ok) {
                reportError(cix);
            }
            if (cix >= cbuf.length) {
                mNameBuffer = cbuf = DataUtil.growArrayBy(cbuf, cbuf.length);
            }
            cbuf[cix++] = (char) ch;
        }

        /* Ok. Now we have the character array, and can construct the
         * String (as well as check proper composition of semicolons
         * for ns-aware mode...)
         */
        String baseName = new String(cbuf, 0, cix);
        // And finally, unalign if necessary
        if (lastQuadBytes < 4) {
            quads[qlen-1] = lastQuad;
        }
        return mSymbols.addSymbol(hash, baseName, last_colon, quads, qlen);
    }

    private void loadMoreGuaranteed()
    {
        throw new IllegalStateException();
    }

    private int loadOne()
    {
        throw new IllegalStateException();
    }

    private void reportError(int arg)
    {
        throw new IllegalStateException();
    }

    private void reportError(String msg)
    {
        throw new IllegalStateException(msg);
    }

    private static byte[] readData(File f)
        throws IOException
    {
        int len = (int) f.length();
        byte[] data = new byte[len];
        int offset = 0;
        FileInputStream fis = new FileInputStream(f);
        
        while (len > 0) {
            int count = fis.read(data, offset, len-offset);
            offset += count;
            len -= count;
        }
        fis.close();
        return data;
    }

    public static void main(String[] args)
        throws Exception
    {
        if (args.length != 1) {
            System.err.println("Usage: java ... [input file]");
            System.exit(1);
        }
        byte[] data = readData(new File(args[0]));
        int len = data.length;
        int repCount = 1;

        int THRESHOLD = 10 * 1000 * 1000;

        if (len < THRESHOLD) {
            repCount = (THRESHOLD / len);
        }
        //if (repCount > 2) { repCount /= 2; }

        System.out.println("Ok, read in test data, "+len+" bytes; using "+repCount+" repetitions");
        new TestPNamePerf(data, repCount).test();
    }
}