XmlInputStream.java example

Explorer
argouml-master
/* $Id: XmlInputStream.java 17940 2010-01-30 16:15:11Z euluis $
 *****************************************************************************
 * Copyright (c) 2009-2010 Contributors - see below
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *    tfmorris
 *    euluis
 *****************************************************************************
 *
 * Some portions of this file was previously release using the BSD License:
 */

// Copyright (c) 1996-2006 The Regents of the University of California. All
// Rights Reserved. Permission to use, copy, modify, and distribute this
// software and its documentation without fee, and without a written
// agreement is hereby granted, provided that the above copyright notice
// and this paragraph appear in all copies.  This software program and
// documentation are copyrighted by The Regents of the University of
// California. The software program and documentation are supplied "AS
// IS", without any accompanying services from The Regents. The Regents
// does not warrant that the operation of the program will be
// uninterrupted or error-free. The end-user understands that the program
// was developed for research purposes and is advised not to rely
// exclusively on the program for any reason.  IN NO EVENT SHALL THE
// UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
// THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
// PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
// CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
// UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

package org.argouml.persistence;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

//import javax.swing.event.EventListenerList;

import org.apache.log4j.Logger;


/**
 * A BufferInputStream that is aware of XML structure.
 * It searches for the first occurrence of a named tag
 * and reads only the data (inclusively) from that tag
 * to the matching end tag or it can search for the first
 * occurrence of a named tag and read on the child tags.
 * The tag is not expected to be an empty tag.
 * <p>
 * TODO: This is hardwired to assume a fixed single byte
 * character encoding.  It needs to be updated to handle different
 * encodings, including multi-byte encodings. - tfm 20070607
 * 
 * @author Bob Tarling
 */
class XmlInputStream extends BufferedInputStream {

    private boolean xmlStarted;
    private boolean inTag;
    private StringBuffer currentTag = new StringBuffer();
    private boolean endStream;
    private String tagName;
    private String endTagName;
    private Map attributes;
    private boolean childOnly;
    private int instanceCount;
    //private EventListenerList listenerList = new EventListenerList();

    /**
     * Logger.
     */
    private static final Logger LOG =
        Logger.getLogger(XmlInputStream.class);

    /**
     * Construct a new XmlInputStream.
     *
     * @param inStream the input stream to wrap.
     * @param theTag the tag name from which to start reading
     * @param theLength the expected length of the input stream
     * @param theEventSpacing the number of characters to read before
     *        firing a progress event.
     */
    public XmlInputStream(
            InputStream inStream,
            String theTag,
            long theLength,
            long theEventSpacing) {
        super(inStream);
        tagName = theTag;
        endTagName = '/' + theTag;
        attributes = null;
        childOnly = false;
    }

    /**
     * Reopen a stream that has already reached the end
     * of an XML fragment.
     *
     * @param theTag the tag name
     * @param attribs the attributes
     * @param child child only
     */
    public synchronized void reopen(
                String theTag,
                Map attribs,
                boolean child) {
        endStream = false;
        xmlStarted = false;
        inTag = false;
        tagName = theTag;
        endTagName = '/' + theTag;
        attributes = attribs;
        childOnly = child;
    }

    /**
     * Reopen a stream that has already reached the end
     * of an XML fragment.
     *
     * @param theTag the tag name
     */
    public synchronized void reopen(String theTag) {
        endStream = false;
        xmlStarted = false;
        inTag = false;
        tagName = theTag;
        endTagName = '/' + theTag;
        attributes = null;
        childOnly = false;
    }

    /*
     * @see java.io.InputStream#read()
     */
    public synchronized int read() throws IOException {

        if (!xmlStarted) {
            skipToTag();
            xmlStarted = true;
        }
        if (endStream) {
            return -1;
        }
        int ch = super.read();
        endStream = isLastTag(ch);
        return ch;
    }

    /*
     * @see java.io.InputStream#read(byte[], int, int)
     */
    public synchronized int read(byte[] b, int off, int len)
        throws IOException {

        if (!xmlStarted) {
            skipToTag();
            xmlStarted = true;
        }
        if (endStream) {
            return -1;
        }

        int cnt;
        for (cnt = 0; cnt < len; ++cnt) {
            int read = read();
            if (read == -1) {
		break;
	    }
            b[cnt + off] = (byte) read;
        }

        if (cnt > 0) {
            return cnt;
        }
        return -1;
    }



    /**
     * Determines if the character is the last character of the last tag of
     * interest.
     * Every character read after the first tag of interest should be passed
     * through this method in order.
     *
     * @param ch the character to test.
     * @return true if this is the end of the last tag.
     */
    private boolean isLastTag(int ch) {
        if (ch == '<') {
            inTag = true;
            currentTag.setLength(0);
        } else if (ch == '>') {
            inTag = false;
            String tag = currentTag.toString();
            if (tag.equals(endTagName)
                    // TODO: The below is not strictly correct, but should
                    // cover the case we deal with.  Using a real XML parser
                    // would be better.
                    // Look for XML document has just a single root element
                    || (currentTag.charAt(currentTag.length() - 1) == '/' 
                        && tag.startsWith(tagName)
                        && tag.indexOf(' ') == tagName.indexOf(' '))) {
                return true;
            }
        } else if (inTag) {
            currentTag.append((char) ch);
        }
        return false;
    }

    /**
     * Keep on reading an input stream until a specific
     * sequence of characters has ben read.
     * This method assumes there is at least one match.
     *
     * @throws IOException
     */
    private void skipToTag() throws IOException {
        char[] searchChars = tagName.toCharArray();
        int i;
        boolean found;
        while (true) {
            if (!childOnly) {
		mark(1000);
	    }
            // Keep reading till we get the left bracket of an opening tag
            while (realRead() != '<') {
                if (!childOnly) {
		    mark(1000);
		}
            }
            found = true;
            // Compare each following character to see
            // that it matches the tag we want
            for (i = 0; i < tagName.length(); ++i) {
                int c = realRead();
                if (c != searchChars[i]) {
                    found = false;
                    break;
                }
            }
            int terminator = realRead();
            // We also want to match with the right bracket of the tag or
            // some other terminator
            if (found && !isNameTerminator((char) terminator)) {
                found = false;
            }

            if (found) {
                // We've found the matching tag but do we have
                // the correct instance with matching attributes?
                if (attributes != null) {
                    Map attributesFound = new HashMap();
                    if (terminator != '>') {
                        attributesFound = readAttributes();
                    }
                    // Search all attributes found to those expected.
                    // If any don't match then turn off the found flag
                    // so that we search for the next matching tag.
                    Iterator it = attributes.entrySet().iterator();
                    while (found && it.hasNext()) {
                        Map.Entry pair = (Map.Entry) it.next();
                        if (!pair.getValue().equals(
                                attributesFound.get(pair.getKey()))) {
                            found = false;
                        }
                    }
                }
            }

            if (found) {
                if (instanceCount < 0) {
                    found = false;
                    ++instanceCount;
                }
            }

            if (found) {
                if (childOnly) {
                    // Read the name of the child tag
                    // and then reset read position
                    // back to that child tag.
                    mark(1000);
                    while (realRead() != '<') {
                        /* do nothing */
		    }
                    tagName = "";
                    char ch = (char) realRead();
                    while (!isNameTerminator(ch)) {
                        tagName += ch;
                        ch = (char) realRead();
                    }
                    endTagName = "/" + tagName;
                    LOG.info("Start tag = " + tagName);
                    LOG.info("End tag = " + endTagName);
                }
                reset();
                return;
            }
        }
    }

    private boolean isNameTerminator(char ch) {
        return (ch == '>' || Character.isWhitespace(ch));
    }

    /**
     * Having read the inputstream up until the tag name.
     * This method continues to read the contents of the tag to
     * retrieve any attribute names and values.
     * @return a map of name value pairs.
     * @throws IOException
     */
    private Map readAttributes() throws IOException {
        Map attributesFound = new HashMap();
        int character;
        while ((character = realRead()) != '>') {
            if (!Character.isWhitespace((char) character)) {
                StringBuffer attributeName = new StringBuffer();
                attributeName.append((char) character);
                while ((character = realRead()) != '='
                        && !Character.isWhitespace((char) character)) {
                    attributeName.append((char) character);
                }
                // Skip any whitespace till we should be on an equals sign.
                while (Character.isWhitespace((char) character)) {
                    character = realRead();
                }
                if (character != '=') {
                    throw new IOException(
                            "Expected = sign after attribute "
                            + attributeName);
                }
                // Skip any whitespace till we should be on a quote symbol.
                int quoteSymbol = realRead();
                while (Character.isWhitespace((char) quoteSymbol)) {
                    quoteSymbol = realRead();
                }
                if (quoteSymbol != '"' && quoteSymbol != '\'') {
                    throw new IOException(
                            "Expected \" or ' around attribute value after "
                            + "attribute " + attributeName);
                }
                StringBuffer attributeValue = new StringBuffer();
                while ((character = realRead()) != quoteSymbol) {
                    attributeValue.append((char) character);
                }
                attributesFound.put(
                        attributeName.toString(),
                        attributeValue.toString());
            }
        }
        return attributesFound;
    }



    /**
     * The close method is overridden to prevent some class out of
     * our control from closing the stream (such as a SAX parser).
     * Use realClose() to finally close the stream for real.
     * @throws IOException to satisfy ancestor but will never happen.
     */
    public void close() throws IOException {
    }

    /**
     * Really close the input.
     *
     * @throws IOException if an I/O error occurs.
     */
    public void realClose() throws IOException {
        super.close();
    }

    private int realRead() throws IOException {
        int read = super.read();
        if (read == -1) {
            throw new IOException("Tag " + tagName + " not found");
        }
        return read;
    }

}