ByteSourceBootstrapper.java example

Explorer

aalto-xml-master
- src
  - main
    - java
      - com
        fasterxml
        aalto
        AsyncByteArrayFeeder.java
        AsyncByteBufferFeeder.java
        AsyncInputFeeder.java
        AsyncXMLInputFactory.java
        AsyncXMLStreamReader.java
        UncheckedStreamException.java
        ValidationException.java
        WFCException.java
        async
        AsyncByteArrayScanner.java
        AsyncByteBufferScanner.java
        AsyncByteScanner.java
        AsyncStreamReaderImpl.java
        package-info.java
        dom
        BijectiveNsMap.java
        DOMOutputElement.java
        DOMReaderImpl.java
        DOMWriterImpl.java
        OutputElementBase.java
        evt
        EventAllocatorImpl.java
        EventReaderImpl.java
        IncompleteEvent.java
        impl
        CommonConfig.java
        ErrorConsts.java
        IoStreamException.java
        LocationImpl.java
        StreamExceptionBase.java
        in
        AttributeCollector.java
        ByteBasedPName.java
        ByteBasedPNameFactory.java
        ByteBasedPNameTable.java
        ByteBasedScanner.java
        ByteSourceBootstrapper.java
        CharBasedPNameTable.java
        CharSourceBootstrapper.java
        ElementScope.java
        EntityNames.java
        FixedNsContext.java
        InputBootstrapper.java
        InputCharTypes.java
        MergedStream.java
        NsBinding.java
        NsDeclaration.java
        PName.java
        PName1.java
        PName2.java
        PName3.java
        PNameC.java
        PNameN.java
        ReaderConfig.java
        ReaderScanner.java
        StreamScanner.java
        Utf32Reader.java
        Utf8Scanner.java
        XmlScanner.java
        io
        UTF8Writer.java
        out
        AsciiXmlWriter.java
        ByteWName.java
        ByteXmlWriter.java
        CharWName.java
        CharXmlWriter.java
        Latin1XmlWriter.java
        NonRepairingStreamWriter.java
        NsBinder.java
        OutputCharTypes.java
        OutputElement.java
        RepairingStreamWriter.java
        SingleByteXmlWriter.java
        StreamWriterBase.java
        Utf8XmlWriter.java
        WName.java
        WNameFactory.java
        WNameTable.java
        WriterConfig.java
        XmlWriter.java
        package-info.java
        sax
        SAXFeature.java
        SAXParserFactoryImpl.java
        SAXParserImpl.java
        SAXProperty.java
        SAXUtil.java
        stax
        EventFactoryImpl.java
        InputFactoryImpl.java
        OutputFactoryImpl.java
        StreamReaderImpl.java
        util
        BufferRecycler.java
        CharsetNames.java
        DataUtil.java
        EmptyIterator.java
        IllegalCharHandler.java
        NameTable.java
        SingletonIterator.java
        TextAccumulator.java
        TextBuilder.java
        TextUtil.java
        URLUtil.java
        UriCanonicalizer.java
        XmlCharTypes.java
        XmlChars.java
        XmlConsts.java
        XmlNames.java
      - test
        BasePerfTest.java
        RunStreamWriter.java
        TestAsyncReader.java
        TestBase64Reader.java
        TestEventReader.java
        TestLineReader.java
        TestNameHashing.java
        TestNameTable.java
        TestPNamePerf.java
        TestRawStream.java
        TestSaxReader.java
        TestScannerPerf.java
        TestStreamCopier.java
        TestStreamReader.java
        TestTypedSpeed.java
        TestUTF8.java
  - test
    - java

/* Woodstox Lite ("wool") XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.in;

import java.io.*;

import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.impl.IoStreamException;
import com.fasterxml.aalto.impl.LocationImpl;
import com.fasterxml.aalto.util.CharsetNames;

/**
 * Class that takes care of bootstrapping main document input from
 * a byte-oriented input source: usually either an <code>InputStream</code>,
 * or a block source like byte array.
 */
public final class ByteSourceBootstrapper
    extends InputBootstrapper
{
    private final static byte BYTE_NULL = (byte) 0;

    private final static byte BYTE_CR = (byte) '\r';

    private final static byte BYTE_LF = (byte) '\n';


    /*
    /**********************************************************************
    /* Configuration
    /**********************************************************************
     */

    /**
     * Underlying InputStream to use for reading content.
     */
    final InputStream _in;

    /*
    /**********************************************************************
    /* Input buffering
    /**********************************************************************
     */

    final byte[] _inputBuffer;

    private int _inputPtr;

    private int _inputLen;

    /*
    /**********************************************************************
    /* Data gathered
    /**********************************************************************
     */

    boolean mBigEndian = true;
    int mBytesPerChar = 0; // 0 means "dunno yet"

    boolean mHadBOM = false;
    boolean mByteSizeFound = false;

    /*
    /**********************************************************************
    /* Life-cycle
    /**********************************************************************
     */

    private ByteSourceBootstrapper(ReaderConfig cfg, InputStream in)
    {
        super(cfg);
        _in = in;
        _inputBuffer = cfg.allocFullBBuffer(4000);
        _inputLen = _inputPtr = 0;
    }

    private ByteSourceBootstrapper(ReaderConfig cfg, byte[] inputBuffer, int inputStart, int inputLen)
    {
        super(cfg);
        _in = null;
        _inputBuffer = inputBuffer;
        _inputPtr = inputStart;
        _inputLen = (inputStart + inputLen);
        // Need to offset this, to keep location correct
        _inputProcessed = -inputStart;
    }

    public static ByteSourceBootstrapper construct(ReaderConfig cfg, InputStream in)
        throws XMLStreamException
    {
        return new ByteSourceBootstrapper(cfg, in);
    }

    public static ByteSourceBootstrapper construct(ReaderConfig cfg, byte[] inputBuffer, int inputStart, int inputLen)
        throws XMLStreamException
    {
        return new ByteSourceBootstrapper(cfg, inputBuffer, inputStart, inputLen);
    }

    @Override
    public final XmlScanner bootstrap() throws XMLStreamException
    {
        try {
            return doBootstrap();
        } catch (IOException ioe) {
            throw new IoStreamException(ioe);
        } finally {
            _config.freeSmallCBuffer(mKeyword);
        }
    }

    public XmlScanner doBootstrap() throws IOException, XMLStreamException
    {
        String normEnc = null;

        determineStreamEncoding();
        if (hasXmlDeclaration()) { // yup, has xml decl:
            readXmlDeclaration();
            if (mFoundEncoding != null) {
                normEnc = verifyXmlEncoding(mFoundEncoding);
            }
        }

        // Now, have we figured out the encoding?

        if (normEnc == null) { // not via xml declaration
            if (mBytesPerChar == 2) { // UTF-16, BE/LE
                normEnc = mBigEndian ? CharsetNames.CS_UTF16BE : CharsetNames.CS_UTF16LE;
            } else if (mBytesPerChar == 4) { // UCS-4... ?
                /* 22-Mar-2005, TSa: JDK apparently has no way of dealing
                 *   with these encodings... not sure if and how it should
                 *   be dealt with, really. Name could be UCS-4xx... or
                 *   perhaps UTF-32xx
                 */
                normEnc = mBigEndian ? CharsetNames.CS_UTF32BE : CharsetNames.CS_UTF32LE;
            } else {
                // Ok, default has to be UTF-8, as per XML specs
                normEnc = CharsetNames.CS_UTF8;
            }
        }

        _config.setActualEncoding(normEnc);
        _config.setXmlDeclInfo(mDeclaredXmlVersion, mFoundEncoding, mStandalone);

        // Normalized, can thus use straight equality checks now
        // UTF-8 compatible (loosely speaking) ones can use same scanner
        if (normEnc == CharsetNames.CS_UTF8
            || normEnc == CharsetNames.CS_ISO_LATIN1
            || normEnc == CharsetNames.CS_US_ASCII) {
            return new Utf8Scanner(_config,
                                   _in, _inputBuffer, _inputPtr, _inputLen);
        } else if (normEnc.startsWith(CharsetNames.CS_UTF32)) {
            /* Since this is such a rare encoding, we'll just create
             * a Reader, and dispatch it to reader scanner?
             */
            // let's augment with actual endianness info
            if (normEnc == CharsetNames.CS_UTF32) {
                normEnc = mBigEndian ? CharsetNames.CS_UTF32BE : CharsetNames.CS_UTF32LE;
            }
            Reader r = new Utf32Reader(_config, _in, _inputBuffer, _inputPtr, _inputLen,
                                       mBigEndian);
            return new ReaderScanner(_config, r);
        }

        /* And finally, if all else fails, we'll also fall back to
         * using JDK-provided decoders and ReaderScanner:
         */
        InputStream in = _in;
        if (_inputPtr < _inputLen) {
            in = new MergedStream(_config, in, _inputBuffer, _inputPtr, _inputLen);
        }
        if (normEnc == CharsetNames.CS_UTF16) {
            normEnc = mBigEndian ? CharsetNames.CS_UTF16BE : CharsetNames.CS_UTF16LE;
        }
        try {
            Reader r = new InputStreamReader(in, normEnc);
            return new ReaderScanner(_config, r);
        } catch (UnsupportedEncodingException usex) {
            throw new IoStreamException("Unsupported encoding: "+usex.getMessage());
        }
    }

    /*
    /**********************************************************************
    // Internal methods, main xml decl processing
    /**********************************************************************
     */

    /**
     * Method called to figure out what the physical encoding of the
     * file appears to be (in case it can be determined from BOM, or
     * xml declaration, either of which may be present)
     */
    private void determineStreamEncoding()
        throws IOException
    {
        /* Ok; first just need 4 bytes for determining bytes-per-char from
         * BOM or first char(s) of likely xml declaration:
         */
        if (ensureLoaded(4)) {
            int origPtr = _inputPtr;

            bomblock:
            do { // BOM/auto-detection block
                int quartet = (_inputBuffer[_inputPtr] << 24)
                    | ((_inputBuffer[_inputPtr+1] & 0xFF) << 16)
                    | ((_inputBuffer[_inputPtr+2] & 0xFF) << 8)
                    | (_inputBuffer[_inputPtr+3] & 0xFF);

                /* Handling of (usually) optional BOM (required for
                 * multi-byte formats); first 32-bit charsets:
                 */
                switch (quartet) {
                case 0x0000FEFF:
                    mBigEndian = true;
                    _inputPtr += 4;
                    mBytesPerChar = 4;
                    break bomblock;
                case 0xFFFE0000: // UCS-4, LE?
                    mBigEndian = false;
                    _inputPtr += 4;
                    mBytesPerChar = 4;
                    break bomblock;
                case 0x0000FFFE: // UCS-4, in-order...
                    reportWeirdUCS4("2143");
                    break bomblock;
                case 0x0FEFF0000: // UCS-4, in-order...
                    reportWeirdUCS4("3412");
                    break bomblock;
                }

                // Ok, if not, how about 16-bit encoding BOMs?
                int msw = quartet >>> 16;
                if (msw == 0xFEFF) { // UTF-16, BE
                    _inputPtr += 2;
                    mBytesPerChar = 2;
                    mBigEndian = true;
                    break;
                }
                if (msw == 0xFFFE) { // UTF-16, LE
                    _inputPtr += 2;
                    mBytesPerChar = 2;
                    mBigEndian = false;
                    break;
                }

                // And if not, then UTF-8 BOM?
                if ((quartet >>> 8) == 0xEFBBBF) { // UTF-8
                    _inputPtr += 3;
                    mBytesPerChar = 1;
                    mBigEndian = true; // doesn't really matter
                    break;
                }

                /* And if that wasn't succesful, how about auto-detection
                 * for '<?xm' (or subset for multi-byte encodings) marker?
                 */
                // Note: none of these consume bytes... so ptr remains at 0

                switch (quartet) {
                case 0x0000003c: // UCS-4, BE?
                    mBigEndian = true;
                    mBytesPerChar = 4;
                    break bomblock;
                case 0x3c000000: // UCS-4, LE?
                    mBytesPerChar = 4;
                    mBigEndian = false;
                    break bomblock;
                case 0x00003c00: // UCS-4, in-order...
                    reportWeirdUCS4("2143");
                    break bomblock;
                case 0x003c0000: // UCS-4, in-order...
                    reportWeirdUCS4("3412");
                    break bomblock;
                case 0x003c003f: // UTF-16, BE
                    mBytesPerChar = 2;
                    mBigEndian = true;
                    break bomblock;
                case 0x3c003f00: // UTF-16, LE
                    mBytesPerChar = 2;
                    mBigEndian = false;
                    break bomblock;
                case 0x3c3f786d: // UTF-8, Ascii, ISO-Latin
                    mBytesPerChar = 1;
                    mBigEndian = true; // doesn't really matter
                    break bomblock;

                case 0x4c6fa794: // EBCDIC, not (yet?) supported...
                    reportEBCDIC();
                }
                
                /* Otherwise it's either single-byte doc without xml
                 * declaration, or corrupt input...
                 */
            } while (false); // BOM/auto-detection block
            
            mHadBOM = (_inputPtr > origPtr);

            /* Let's update location markers to ignore BOM when calculating
             * column positions (but not from raw byte offsets)
             */
            _inputRowStart = _inputPtr;
        }

        /* Hmmh. If we haven't figured it out, let's just assume
         * UTF-8 as per XML specs:
         */
        mByteSizeFound = (mBytesPerChar > 0);
        if (!mByteSizeFound) {
            mBytesPerChar = 1;
            mBigEndian = true; // doesn't matter
        }
    }

    protected boolean hasXmlDeclaration()
        throws IOException, XMLStreamException
    {
        // First the common case, 1-byte encoding (Ascii/ISO-Latin/UTF-8):
        if (mBytesPerChar == 1) {
            // Need 6 chars to determine for sure...
            if (ensureLoaded(6)) {
                if (_inputBuffer[_inputPtr] == '<'
                    && _inputBuffer[_inputPtr+1] == '?'
                    && _inputBuffer[_inputPtr+2] == 'x'
                    && _inputBuffer[_inputPtr+3] == 'm'
                    && _inputBuffer[_inputPtr+4] == 'l'
                    && ((_inputBuffer[_inputPtr+5] & 0xFF) <= CHAR_SPACE)) {

                    // Let's skip stuff so far:
                    _inputPtr += 6;
                    return true;
                }
            }
        } else { // ... and then for slower fixed-multibyte encodings:
            if (ensureLoaded (6 * mBytesPerChar)) { // 6 chars as well
                int start = _inputPtr; // if we have to 'unread' chars
                if (nextMultiByte() == '<'
                    && nextMultiByte() == '?'
                    && nextMultiByte() == 'x'
                    && nextMultiByte() == 'm'
                    && nextMultiByte() == 'l'
                    && nextMultiByte() <= CHAR_SPACE) {
                    return true;
                }
                _inputPtr = start; // push data back
            }
        }
        return false;
    }

    /**
     * @return Normalized encoding name
     */
    protected String verifyXmlEncoding(String enc)
        throws XMLStreamException
    {
        enc = CharsetNames.normalize(enc);

        // Let's actually verify we got matching information:
        if (enc == CharsetNames.CS_UTF8) {
            verifyEncoding(enc, 1);
        } else if (enc == CharsetNames.CS_ISO_LATIN1) {
            verifyEncoding(enc, 1);
        } else if (enc == CharsetNames.CS_US_ASCII) {
            verifyEncoding(enc, 1);
        } else if (enc == CharsetNames.CS_UTF16) {
            // BOM should be obligatory, to know the ordering?
            // For now, let's not enforce that though.
            //if (!mHadBOM) {
            //reportMissingBOM(enc);
            //}
            verifyEncoding(enc, 2);
        } else if (enc == CharsetNames.CS_UTF16LE) {
            verifyEncoding(enc, 2, false);
        } else if (enc == CharsetNames.CS_UTF16BE) {
            verifyEncoding(enc, 2, true);

        } else if (enc == CharsetNames.CS_UTF32) {
            // Do we require a BOM here? we can live without it...
            //if (!mHadBOM) {
            //    reportMissingBOM(enc);
            //}
            verifyEncoding(enc, 4);
        } else if (enc == CharsetNames.CS_UTF32LE) {
            verifyEncoding(enc, 4, false);
        } else if (enc == CharsetNames.CS_UTF32BE) {
            verifyEncoding(enc, 4, true);
        }
        return enc;
    }

    /*
    /**********************************************************************
    /* Internal methods, loading input data
    /**********************************************************************
     */
 
    protected boolean ensureLoaded(int minimum)
        throws IOException
    {
        /* Let's assume here buffer has enough room -- this will always
         * be true for the limited used this method gets
         */
        int gotten = (_inputLen - _inputPtr);
        while (gotten < minimum) {
            int count;

            if (_in == null) { // block source
                count = -1;
            } else {
                count = _in.read(_inputBuffer, _inputLen, _inputBuffer.length - _inputLen);
            }
            if (count < 1) {
                return false;
            }
            _inputLen += count;
            gotten += count;
        }
        return true;
    }

    protected void loadMore()
        throws IOException, XMLStreamException
    {
        _inputProcessed += _inputLen;
        _inputRowStart -= _inputLen;

        _inputPtr = 0;
        if (_in == null) { // block source
            _inputLen = -1;
        } else {
            _inputLen = _in.read(_inputBuffer, 0, _inputBuffer.length);
        }
        if (_inputLen < 1) {
            reportEof();
        }
    }

    /*
    /**********************************************************************
    /* Implementations of abstract parsing methods
    /**********************************************************************
     */

    @Override
    protected void pushback() {
        _inputPtr -= mBytesPerChar;
    }

    @Override
    protected int getNext()
        throws IOException, XMLStreamException
    {
        if (mBytesPerChar > 1) {
            return nextMultiByte();
        }
        byte b = (_inputPtr < _inputLen) ?
            _inputBuffer[_inputPtr++] : nextByte();
        return (b & 0xFF);
    }

    @Override
    protected int getNextAfterWs(boolean reqWs)
        throws IOException, XMLStreamException
    {
        int count;

        if (mBytesPerChar > 1) { // multi-byte
            count = skipMbWs();
        } else {
            count = skipSbWs();
        }

        if (reqWs && count == 0) {
            reportUnexpectedChar(getNext(), ERR_XMLDECL_EXP_SPACE);
        }

        // inlined getNext()
        if (mBytesPerChar > 1) {
            return nextMultiByte();
        }
        byte b = (_inputPtr < _inputLen) ?
            _inputBuffer[_inputPtr++] : nextByte();
        return (b & 0xFF);
    }

    /**
     * @return First character that does not match expected, if any;
     *    CHAR_NULL if match succeeded
     */
    @Override
    protected int checkKeyword(String exp)
        throws IOException, XMLStreamException
    {
        if (mBytesPerChar > 1) {
            return checkMbKeyword(exp);
        }
        return checkSbKeyword(exp);
    }

    @Override
    protected int readQuotedValue(char[] kw, int quoteChar)
        throws IOException, XMLStreamException
    {
        int i = 0;
        int len = kw.length;
        boolean mb = (mBytesPerChar > 1);

        while (i < len) {
            int c;

            if (mb) {
                c = nextMultiByte();
                if (c ==  CHAR_CR || c == CHAR_LF) {
                    skipMbLF(c);
                    c = CHAR_LF;
                }
            } else {
                byte b = (_inputPtr < _inputLen) ?
                    _inputBuffer[_inputPtr++] : nextByte();
                if (b == BYTE_NULL) {
                    reportNull();
                }
                if (b == BYTE_CR || b == BYTE_LF) {
                    skipSbLF(b);
                    b = BYTE_LF;
                }
                c = (b & 0xFF);
            }

            if (c == quoteChar) {
                return (i < len) ? i : -1;
            }

	    if (i < len) {
		kw[i++] = (char) c;
	    }
        }

        /* If we end up this far, we ran out of buffer space... let's let
         * caller figure that out, though
         */
        return -1;
    }

    @Override
    protected Location getLocation()
    {
        /* Ok; for fixed-size multi-byte encodings, need to divide numbers
         * to get character locations. For variable-length encodings the
         * good thing is that xml declaration only uses shortest codepoints,
         * ie. char count == byte count.
         */
        int total = _inputProcessed + _inputPtr;
        int col = _inputPtr - _inputRowStart;

        if (mBytesPerChar > 1) {
            total /= mBytesPerChar;
            col /= mBytesPerChar;
        }
        return LocationImpl.fromZeroBased
            (_config.getPublicId(), _config.getSystemId(),
             total, _inputRow, col);
    }

    /*
    /**********************************************************************
    /* Internal methods, single-byte access methods
    /**********************************************************************
     */

    protected byte nextByte()
        throws IOException, XMLStreamException
    {
        if (_inputPtr >= _inputLen) {
            loadMore();
        }
        return _inputBuffer[_inputPtr++];
    }

    protected int skipSbWs()
        throws IOException, XMLStreamException
    {
        int count = 0;

        while (true) {
            byte b = (_inputPtr < _inputLen) ?
                _inputBuffer[_inputPtr++] : nextByte();

            if ((b & 0xFF) > CHAR_SPACE) {
                --_inputPtr;
                break;
            }
            if (b == BYTE_CR || b == BYTE_LF) {
                skipSbLF(b);
            } else if (b == BYTE_NULL) {
                reportNull();
            }
            ++count;
        }
        return count;
    }

    protected void skipSbLF(byte lfByte)
        throws IOException, XMLStreamException
    {
        if (lfByte == BYTE_CR) {
            byte b = (_inputPtr < _inputLen) ?
                _inputBuffer[_inputPtr++] : nextByte();
            if (b != BYTE_LF) {
                --_inputPtr; // pushback if not 2-char/byte lf
            }
        }
        ++_inputRow;
        _inputRowStart = _inputPtr;
    }

    /**
     * @return First character that does not match expected, if any;
     *    CHAR_NULL if match succeeded
     */
    protected int checkSbKeyword(String expected)
        throws IOException, XMLStreamException
    {
        int len = expected.length();
        
        for (int ptr = 1; ptr < len; ++ptr) {
            byte b = (_inputPtr < _inputLen) ?
                _inputBuffer[_inputPtr++] : nextByte();
            
            if (b == BYTE_NULL) {
                reportNull();
            }
            if ((b & 0xFF) != expected.charAt(ptr)) {
                return (b & 0xFF);
            }
        }

        return CHAR_NULL;
    }

    /*
    /**********************************************************************
    /* Internal methods, multi-byte access/checks
    /**********************************************************************
     */

    protected int nextMultiByte()
        throws IOException, XMLStreamException
    {
        byte b = (_inputPtr < _inputLen) ?
            _inputBuffer[_inputPtr++] : nextByte();
        byte b2 = (_inputPtr < _inputLen) ?
            _inputBuffer[_inputPtr++] : nextByte();
        int c;

        if (mBytesPerChar == 2) {
            if (mBigEndian) {
                c = ((b & 0xFF) << 8) | (b2 & 0xFF);
            } else {
                c = (b & 0xFF) | ((b2 & 0xFF) << 8);
            }
        } else {
            // Has to be 4 bytes
            byte b3 = (_inputPtr < _inputLen) ?
                _inputBuffer[_inputPtr++] : nextByte();
            byte b4 = (_inputPtr < _inputLen) ?
                _inputBuffer[_inputPtr++] : nextByte();
            
            if (mBigEndian) {
                c = (b  << 24) | ((b2 & 0xFF) << 16)
                    | ((b3 & 0xFF) << 8) | (b4 & 0xFF);
            } else {
                c = (b4  << 24) | ((b3 & 0xFF) << 16)
                    | ((b2 & 0xFF) << 8) | (b & 0xFF);
            }
        }

        // Let's catch null chars early
        if (c == 0) {
            reportNull();
        }
        return c;
    }

    protected int skipMbWs()
        throws IOException, XMLStreamException
    {
        int count = 0;

        while (true) {
            int c = nextMultiByte();

            if (c > CHAR_SPACE) {
                _inputPtr -= mBytesPerChar;
                break;
            }
            if (c == CHAR_CR || c == CHAR_LF) {
                skipMbLF(c);
            } else if (c == CHAR_NULL) {
                reportNull();
            }
            ++count;
        }
        return count;
    }

    protected void skipMbLF(int lf)
        throws IOException, XMLStreamException
    {
        if (lf == CHAR_CR) {
            int c = nextMultiByte();
            if (c != CHAR_LF) {
                _inputPtr -= mBytesPerChar;
            }
        }
        ++_inputRow;
        _inputRowStart = _inputPtr;
    }

    /**
     * @return First character that does not match expected, if any;
     *    CHAR_NULL if match succeeded
     */
    protected int checkMbKeyword(String expected)
        throws IOException, XMLStreamException
    {
        int len = expected.length();
        
        for (int ptr = 1; ptr < len; ++ptr) {
            int c = nextMultiByte();
            if (c == BYTE_NULL) {
                reportNull();
            }
            if (c != expected.charAt(ptr)) {
              return c;
            }
        }

        return CHAR_NULL;
    }

    /*
    /**********************************************************************
    /* Other private methods:
    /**********************************************************************
     */

    private void verifyEncoding(String id, int bpc)
        throws XMLStreamException
    {
        if (mByteSizeFound) {
            /* Let's verify that if we matched an encoding, it's the same
             * as what was declared...
             */
            if (bpc != mBytesPerChar) {
                reportXmlProblem("Declared encoding '"+id+"' uses "+bpc
                                 +" bytes per character; but physical encoding appeared to use "+mBytesPerChar+"; cannot decode");
            }
        }
    }

    private void verifyEncoding(String id, int bpc, boolean bigEndian)
        throws XMLStreamException
    {
        if (mByteSizeFound) {
            verifyEncoding(id, bpc);

            if (bigEndian != mBigEndian) {
                String bigStr = bigEndian ? "big" : "little";
                reportXmlProblem
                    ("Declared encoding '"+id+"' has different endianness ("
                     +bigStr+" endian) than what physical ordering appeared to be; cannot decode");
            }
        }
    }

    private void reportWeirdUCS4(String type)
        throws IOException
    {
        throw new CharConversionException("Unsupported UCS-4 endianness ("+type+") detected");
    }

    private void reportEBCDIC()
        throws IOException
    {
        throw new CharConversionException("Unsupported encoding (EBCDIC)");
    }
}