/* $Id: XmlInputStream.java 17940 2010-01-30 16:15:11Z euluis $ ***************************************************************************** * Copyright (c) 2009-2010 Contributors - see below * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * tfmorris * euluis ***************************************************************************** * * Some portions of this file was previously release using the BSD License: */ // Copyright (c) 1996-2006 The Regents of the University of California. All // Rights Reserved. Permission to use, copy, modify, and distribute this // software and its documentation without fee, and without a written // agreement is hereby granted, provided that the above copyright notice // and this paragraph appear in all copies. This software program and // documentation are copyrighted by The Regents of the University of // California. The software program and documentation are supplied "AS // IS", without any accompanying services from The Regents. The Regents // does not warrant that the operation of the program will be // uninterrupted or error-free. The end-user understands that the program // was developed for research purposes and is advised not to rely // exclusively on the program for any reason. IN NO EVENT SHALL THE // UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, // SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, // ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF // THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF // SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE // PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF // CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, // UPDATES, ENHANCEMENTS, OR MODIFICATIONS. package org.argouml.persistence; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Iterator; import java.util.Map; //import javax.swing.event.EventListenerList; import org.apache.log4j.Logger; /** * A BufferInputStream that is aware of XML structure. * It searches for the first occurrence of a named tag * and reads only the data (inclusively) from that tag * to the matching end tag or it can search for the first * occurrence of a named tag and read on the child tags. * The tag is not expected to be an empty tag. * <p> * TODO: This is hardwired to assume a fixed single byte * character encoding. It needs to be updated to handle different * encodings, including multi-byte encodings. - tfm 20070607 * * @author Bob Tarling */ class XmlInputStream extends BufferedInputStream { private boolean xmlStarted; private boolean inTag; private StringBuffer currentTag = new StringBuffer(); private boolean endStream; private String tagName; private String endTagName; private Map attributes; private boolean childOnly; private int instanceCount; //private EventListenerList listenerList = new EventListenerList(); /** * Logger. */ private static final Logger LOG = Logger.getLogger(XmlInputStream.class); /** * Construct a new XmlInputStream. * * @param inStream the input stream to wrap. * @param theTag the tag name from which to start reading * @param theLength the expected length of the input stream * @param theEventSpacing the number of characters to read before * firing a progress event. */ public XmlInputStream( InputStream inStream, String theTag, long theLength, long theEventSpacing) { super(inStream); tagName = theTag; endTagName = '/' + theTag; attributes = null; childOnly = false; } /** * Reopen a stream that has already reached the end * of an XML fragment. * * @param theTag the tag name * @param attribs the attributes * @param child child only */ public synchronized void reopen( String theTag, Map attribs, boolean child) { endStream = false; xmlStarted = false; inTag = false; tagName = theTag; endTagName = '/' + theTag; attributes = attribs; childOnly = child; } /** * Reopen a stream that has already reached the end * of an XML fragment. * * @param theTag the tag name */ public synchronized void reopen(String theTag) { endStream = false; xmlStarted = false; inTag = false; tagName = theTag; endTagName = '/' + theTag; attributes = null; childOnly = false; } /* * @see java.io.InputStream#read() */ public synchronized int read() throws IOException { if (!xmlStarted) { skipToTag(); xmlStarted = true; } if (endStream) { return -1; } int ch = super.read(); endStream = isLastTag(ch); return ch; } /* * @see java.io.InputStream#read(byte[], int, int) */ public synchronized int read(byte[] b, int off, int len) throws IOException { if (!xmlStarted) { skipToTag(); xmlStarted = true; } if (endStream) { return -1; } int cnt; for (cnt = 0; cnt < len; ++cnt) { int read = read(); if (read == -1) { break; } b[cnt + off] = (byte) read; } if (cnt > 0) { return cnt; } return -1; } /** * Determines if the character is the last character of the last tag of * interest. * Every character read after the first tag of interest should be passed * through this method in order. * * @param ch the character to test. * @return true if this is the end of the last tag. */ private boolean isLastTag(int ch) { if (ch == '<') { inTag = true; currentTag.setLength(0); } else if (ch == '>') { inTag = false; String tag = currentTag.toString(); if (tag.equals(endTagName) // TODO: The below is not strictly correct, but should // cover the case we deal with. Using a real XML parser // would be better. // Look for XML document has just a single root element || (currentTag.charAt(currentTag.length() - 1) == '/' && tag.startsWith(tagName) && tag.indexOf(' ') == tagName.indexOf(' '))) { return true; } } else if (inTag) { currentTag.append((char) ch); } return false; } /** * Keep on reading an input stream until a specific * sequence of characters has ben read. * This method assumes there is at least one match. * * @throws IOException */ private void skipToTag() throws IOException { char[] searchChars = tagName.toCharArray(); int i; boolean found; while (true) { if (!childOnly) { mark(1000); } // Keep reading till we get the left bracket of an opening tag while (realRead() != '<') { if (!childOnly) { mark(1000); } } found = true; // Compare each following character to see // that it matches the tag we want for (i = 0; i < tagName.length(); ++i) { int c = realRead(); if (c != searchChars[i]) { found = false; break; } } int terminator = realRead(); // We also want to match with the right bracket of the tag or // some other terminator if (found && !isNameTerminator((char) terminator)) { found = false; } if (found) { // We've found the matching tag but do we have // the correct instance with matching attributes? if (attributes != null) { Map attributesFound = new HashMap(); if (terminator != '>') { attributesFound = readAttributes(); } // Search all attributes found to those expected. // If any don't match then turn off the found flag // so that we search for the next matching tag. Iterator it = attributes.entrySet().iterator(); while (found && it.hasNext()) { Map.Entry pair = (Map.Entry) it.next(); if (!pair.getValue().equals( attributesFound.get(pair.getKey()))) { found = false; } } } } if (found) { if (instanceCount < 0) { found = false; ++instanceCount; } } if (found) { if (childOnly) { // Read the name of the child tag // and then reset read position // back to that child tag. mark(1000); while (realRead() != '<') { /* do nothing */ } tagName = ""; char ch = (char) realRead(); while (!isNameTerminator(ch)) { tagName += ch; ch = (char) realRead(); } endTagName = "/" + tagName; LOG.info("Start tag = " + tagName); LOG.info("End tag = " + endTagName); } reset(); return; } } } private boolean isNameTerminator(char ch) { return (ch == '>' || Character.isWhitespace(ch)); } /** * Having read the inputstream up until the tag name. * This method continues to read the contents of the tag to * retrieve any attribute names and values. * @return a map of name value pairs. * @throws IOException */ private Map readAttributes() throws IOException { Map attributesFound = new HashMap(); int character; while ((character = realRead()) != '>') { if (!Character.isWhitespace((char) character)) { StringBuffer attributeName = new StringBuffer(); attributeName.append((char) character); while ((character = realRead()) != '=' && !Character.isWhitespace((char) character)) { attributeName.append((char) character); } // Skip any whitespace till we should be on an equals sign. while (Character.isWhitespace((char) character)) { character = realRead(); } if (character != '=') { throw new IOException( "Expected = sign after attribute " + attributeName); } // Skip any whitespace till we should be on a quote symbol. int quoteSymbol = realRead(); while (Character.isWhitespace((char) quoteSymbol)) { quoteSymbol = realRead(); } if (quoteSymbol != '"' && quoteSymbol != '\'') { throw new IOException( "Expected \" or ' around attribute value after " + "attribute " + attributeName); } StringBuffer attributeValue = new StringBuffer(); while ((character = realRead()) != quoteSymbol) { attributeValue.append((char) character); } attributesFound.put( attributeName.toString(), attributeValue.toString()); } } return attributesFound; } /** * The close method is overridden to prevent some class out of * our control from closing the stream (such as a SAX parser). * Use realClose() to finally close the stream for real. * @throws IOException to satisfy ancestor but will never happen. */ public void close() throws IOException { } /** * Really close the input. * * @throws IOException if an I/O error occurs. */ public void realClose() throws IOException { super.close(); } private int realRead() throws IOException { int read = super.read(); if (read == -1) { throw new IOException("Tag " + tagName + " not found"); } return read; } }