DocTypeReader.java example

Explorer
openjpa-master
- openjpa-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.    
 */
package org.apache.openjpa.lib.xml;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;

/**
 * The DocTypeReader can be used to dynamically include a
 * <code>DOCTYPE</code> declaration in an XML stream. Often it is
 * inconvenient to specify a <code>DOCTYPE</code> in XML files -- you many
 * want the option of parsing the files without reading the DTD, the files
 * may move around, making placing a <code>DOCTYPE</code> path to the DTD in
 * them unattractive, and you may have many files, making an in-line include
 * of the DTD unattractive as well. This class makes
 * it possible to maintain XML files without any <code>DOCTYPE</code>
 * declaration, then dynamically include the <code>DOCTYPE</code> information
 * at runtime.
 * If the XML stream already contains a <code>DOCTYPE</code> declaration,
 * the reader will not add an additional one.
 * The <code>DOCTYPE</code> information given to the reader will be placed
 * in the XML stream it wraps just before the root element of the document.
 * Note that all methods other than the various forms of <code>read</code>
 * apply onto the underlying XML stream and should not be used until the
 * header and doc type have been read.
 *
 * @author Abe White
 */
public class DocTypeReader extends Reader {

    private Reader _xml = null;
    private Reader _docType = null;

    // use to hold all header information until the doctype dec should be
    // inserted
    private char[] _header = null;
    private int _headerPos = 0;

    /**
     * Construct the reader with an XML stream, and set the
     * <code>DOCTYPE</code> information to be included. The given
     * reader should access an input source containing the exact declaration
     * to include, such as:<br />
     * <code><DOCTYPE schedule SYSTEM "schedule.dtd"></code><br />
     * <code><DOCTYPE html PUBLIC "-//W3C//DTD XHTML ...></code><br />
     * <code><DOCTYPE stock-price [ <ELEMENT symb ... ]></code><br />
     * If the reader is null, no <code>DOCTYPE</code> information will be
     * included in the stream.
     */
    public DocTypeReader(Reader xml, Reader docType) throws IOException {
        _docType = docType;
        _xml = bufferHeader(xml);
    }

    public int read() throws IOException {
        int ch = readHeader();
        if (ch != -1)
            return ch;

        ch = readDocType();
        if (ch != -1)
            return ch;

        return _xml.read();
    }

    public int read(char[] buf) throws IOException {
        return read(buf, 0, buf.length);
    }

    public int read(char[] buf, int off, int len) throws IOException {
        int headerRead = readHeader(buf, off, len);
        off += headerRead;
        len -= headerRead;

        int docRead = readDocType(buf, off, len);
        off += docRead;
        len -= docRead;

        return headerRead + docRead + _xml.read(buf, off, len);
    }

    public long skip(long len) throws IOException {
        return _xml.skip(len);
    }

    public boolean ready() throws IOException {
        return _xml.ready();
    }

    public boolean markSupported() {
        return _xml.markSupported();
    }

    public void mark(int readAheadLimit) throws IOException {
        _xml.mark(readAheadLimit);
    }

    public void reset() throws IOException {
        _xml.reset();
    }

    public void close() throws IOException {
        _xml.close();
        if (_docType != null)
            _docType.close();
    }

    /**
     * Buffer all text until the doc type declaration should be inserted.
     */
    private Reader bufferHeader(Reader origXML) throws IOException {
        // don't bother if no doc type declaration
        if (_docType == null) {
            _header = new char[0];
            return origXML;
        }

        // create buffer
        StringWriter writer = new StringWriter();
        PushbackReader xml = new PushbackReader(origXML, 3);
        int ch, ch2, ch3;
        boolean comment;

        while (true) {
            // read leading space
            for (ch = xml.read(); ch != -1
                && Character.isWhitespace((char) ch); ch = xml.read())
                writer.write(ch);
            if (ch == -1)
                return headerOnly(writer.toString());

            // if not XML, finish
            if (ch != '<') {
                xml.unread(ch);
                _header = writer.toString().toCharArray();
                return xml;
            }

            // if the root element, finish
            ch = xml.read();
            if (ch != '?' && ch != '!') {
                xml.unread(ch);
                xml.unread('<');
                _header = writer.toString().toCharArray();
                return xml;
            }

            // if a doc type element, finish
            ch2 = xml.read();
            if (ch == '!' && ch2 == 'D') {
                xml.unread(ch2);
                xml.unread(ch);
                xml.unread('<');
                _header = writer.toString().toCharArray();
                _docType = null; // make sure doc type not included
                return xml;
            }

            // is this a comment?
            ch3 = xml.read();
            comment = ch == '!' && ch2 == '-' && ch3 == '-';

            // place everything read into the header material
            writer.write('<');
            writer.write(ch);
            writer.write(ch2);
            writer.write(ch3);

            // read until the next '>' or '-->' if a comment
            ch2 = 0;
            ch3 = 0;
            while ((ch = xml.read()) != -1) {
                writer.write(ch);

                if ((!comment && ch == '>')
                    || (comment && ch == '>' && ch2 == '-' && ch3 == '-'))
                    break;

                // track last two chars so we can tell if comment is ending
                ch3 = ch2;
                ch2 = ch;
            }
            if (ch == -1)
                return headerOnly(writer.toString());

            // read the space after the declaration
            for (ch = xml.read(); ch != -1
                && Character.isWhitespace((char) ch); ch = xml.read())
                writer.write(ch);
            if (ch == -1)
                return headerOnly(writer.toString());
            xml.unread(ch);
        }
    }

    /**
     * If the stream contained only space, think of it as pure XML with no
     * header for consistency with the other methods.
     */
    private Reader headerOnly(String header) {
        _header = new char[0];
        _docType = null;
        return new StringReader(header);
    }

    /**
     * Return a single character from the buffered header, or -1 if none.
     */
    private int readHeader() {
        if (_headerPos == _header.length)
            return -1;
        return _header[_headerPos++];
    }

    /**
     * Read from the buffered header to the given array, returning the
     * number of characters read.
     */
    private int readHeader(char[] buf, int off, int len) {
        int read = 0;
        for (; len > 0 && _headerPos < _header.length; read++, off++, len--)
            buf[off] = _header[_headerPos++];

        return read;
    }

    /**
     * Return a single character from the doc type declaration, or -1 if none.
     */
    private int readDocType() throws IOException {
        if (_docType == null)
            return -1;

        int ch = _docType.read();
        if (ch == -1)
            _docType = null;

        return ch;
    }

    /**
     * Read from the doc type declaration to the given array, returning the
     * number of characters read.
     */
    private int readDocType(char[] buf, int off, int len) throws IOException {
        if (_docType == null)
            return 0;

        int read = _docType.read(buf, off, len);
        if (read < len)
            _docType = null;
        if (read == -1)
            read = 0;

        return read;
    }
}