/* * Copyright 2003-2008 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package groovy.util; import groovy.xml.FactorySupport; import groovy.xml.QName; import org.xml.sax.*; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.*; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; /** * A helper class for parsing XML into a tree of Node instances for a * simple way of processing XML. This parser does not preserve the XML * InfoSet - if that's what you need try using W3C DOM, dom4j, JDOM, XOM etc. * This parser ignores comments and processing instructions and converts * the XML into a Node for each element in the XML with attributes * and child Nodes and Strings. This simple model is sufficient for * most simple use cases of processing XML. * <p/> * Example usage: * <pre> * def xml = '<root><one a1="uno!"/><two>Some text!</two></root>' * def rootNode = new XmlParser().parseText(xml) * assert rootNode.name() == 'root' * assert rootNode.one[0].@a1 == 'uno!' * assert rootNode.two.text() == 'Some text!' * rootNode.children().each { assert it.name() in ['one','two'] } * </pre> * * @author <a href="mailto:james@coredevelopers.net">James Strachan</a> * @author Paul King * @version $Revision$ */ public class XmlParser implements ContentHandler { private StringBuffer bodyText = new StringBuffer(); private List<Node> stack = new ArrayList<Node>(); private Locator locator; private XMLReader reader; private Node parent; private boolean trimWhitespace = true; private boolean namespaceAware; public XmlParser() throws ParserConfigurationException, SAXException { this(false, true); } public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException { SAXParserFactory factory = FactorySupport.createSaxParserFactory(); factory.setNamespaceAware(namespaceAware); this.namespaceAware = namespaceAware; factory.setValidating(validating); reader = factory.newSAXParser().getXMLReader(); } public XmlParser(XMLReader reader) { this.reader = reader; } public XmlParser(SAXParser parser) throws SAXException { reader = parser.getXMLReader(); } /** * Returns the current trim whitespace setting. * * @return true if whitespace will be trimmed */ public boolean isTrimWhitespace() { return trimWhitespace; } /** * Sets the trim whitespace setting value. * * @param trimWhitespace the desired setting value */ public void setTrimWhitespace(boolean trimWhitespace) { this.trimWhitespace = trimWhitespace; } /** * Parses the content of the given file as XML turning it into a tree * of Nodes. * * @param file the File containing the XML to be parsed * @return the root node of the parsed tree of Nodes * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @throws IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. */ public Node parse(File file) throws IOException, SAXException { InputSource input = new InputSource(new FileInputStream(file)); input.setSystemId("file://" + file.getAbsolutePath()); getXMLReader().parse(input); return parent; } /** * Parse the content of the specified input source into a tree of Nodes. * * @param input the InputSource for the XML to parse * @return the root node of the parsed tree of Nodes * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @throws IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. */ public Node parse(InputSource input) throws IOException, SAXException { getXMLReader().parse(input); return parent; } /** * Parse the content of the specified input stream into a tree of Nodes. * <p/> * Note that using this method will not provide the parser with any URI * for which to find DTDs etc * * @param input an InputStream containing the XML to be parsed * @return the root node of the parsed tree of Nodes * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @throws IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. */ public Node parse(InputStream input) throws IOException, SAXException { InputSource is = new InputSource(input); getXMLReader().parse(is); return parent; } /** * Parse the content of the specified reader into a tree of Nodes. * <p/> * Note that using this method will not provide the parser with any URI * for which to find DTDs etc * * @param in a Reader to read the XML to be parsed * @return the root node of the parsed tree of Nodes * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @throws IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. */ public Node parse(Reader in) throws IOException, SAXException { InputSource is = new InputSource(in); getXMLReader().parse(is); return parent; } /** * Parse the content of the specified URI into a tree of Nodes. * * @param uri a String containing a uri pointing to the XML to be parsed * @return the root node of the parsed tree of Nodes * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @throws IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. */ public Node parse(String uri) throws IOException, SAXException { InputSource is = new InputSource(uri); getXMLReader().parse(is); return parent; } /** * A helper method to parse the given text as XML. * * @param text the XML text to parse * @return the root node of the parsed tree of Nodes * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @throws IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. */ public Node parseText(String text) throws IOException, SAXException { return parse(new StringReader(text)); } /** * Determine if namspace handling is enabled. * * @return true if namespace handling is enabled */ public boolean isNamespaceAware() { return namespaceAware; } /** * Enable and/or disable namspace handling. * * @param namespaceAware the new desired value */ public void setNamespaceAware(boolean namespaceAware) { this.namespaceAware = namespaceAware; } // Delegated XMLReader methods //------------------------------------------------------------------------ /* (non-Javadoc) * @see org.xml.sax.XMLReader#getDTDHandler() */ public DTDHandler getDTDHandler() { return this.reader.getDTDHandler(); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getEntityResolver() */ public EntityResolver getEntityResolver() { return this.reader.getEntityResolver(); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getErrorHandler() */ public ErrorHandler getErrorHandler() { return this.reader.getErrorHandler(); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getFeature(java.lang.String) */ public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException { return this.reader.getFeature(uri); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getProperty(java.lang.String) */ public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException { return this.reader.getProperty(uri); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) */ public void setDTDHandler(final DTDHandler dtdHandler) { this.reader.setDTDHandler(dtdHandler); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) */ public void setEntityResolver(final EntityResolver entityResolver) { this.reader.setEntityResolver(entityResolver); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) */ public void setErrorHandler(final ErrorHandler errorHandler) { this.reader.setErrorHandler(errorHandler); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) */ public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException { this.reader.setFeature(uri, value); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object) */ public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException { reader.setProperty(uri, value); } // ContentHandler interface //------------------------------------------------------------------------- public void startDocument() throws SAXException { parent = null; } public void endDocument() throws SAXException { stack.clear(); } public void startElement(String namespaceURI, String localName, String qName, Attributes list) throws SAXException { addTextToNode(); Object nodeName = getElementName(namespaceURI, localName, qName); int size = list.getLength(); Map<Object, String> attributes = new LinkedHashMap<Object, String>(size); for (int i = 0; i < size; i++) { Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i)); String value = list.getValue(i); attributes.put(attributeName, value); } parent = createNode(parent, nodeName, attributes); stack.add(parent); } public void endElement(String namespaceURI, String localName, String qName) throws SAXException { addTextToNode(); if (!stack.isEmpty()) { stack.remove(stack.size() - 1); if (!stack.isEmpty()) { parent = stack.get(stack.size() - 1); } } } public void characters(char buffer[], int start, int length) throws SAXException { bodyText.append(buffer, start, length); } public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException { } public void endPrefixMapping(String prefix) throws SAXException { } public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException { } public void processingInstruction(String target, String data) throws SAXException { } public Locator getDocumentLocator() { return locator; } public void setDocumentLocator(Locator locator) { this.locator = locator; } public void skippedEntity(String name) throws SAXException { } // Implementation methods //------------------------------------------------------------------------- protected XMLReader getXMLReader() { reader.setContentHandler(this); return reader; } protected void addTextToNode() { String text = bodyText.toString(); if (trimWhitespace) { text = text.trim(); } if (text.length() > 0) { parent.children().add(text); } bodyText = new StringBuffer(); } /** * Creates a new node with the given parent, name, and attributes. The * default implementation returns an instance of * <code>groovy.util.Node</code>. * * @param parent the parent node, or null if the node being created is the * root node * @param name an Object representing the name of the node (typically * an instance of {@link QName}) * @param attributes a Map of attribute names to attribute values * @return a new Node instance representing the current node */ protected Node createNode(Node parent, Object name, Map attributes) { return new Node(parent, name, attributes); } /** * Return a name given the namespaceURI, localName and qName. * * @param namespaceURI the namespace URI * @param localName the local name * @param qName the qualified name * @return the newly created representation of the name */ protected Object getElementName(String namespaceURI, String localName, String qName) { String name = localName; String prefix = ""; if ((name == null) || (name.length() < 1)) { name = qName; } if (namespaceURI == null || namespaceURI.length() <= 0) { return name; } if (qName != null && qName.length() > 0 && namespaceAware) { int index = qName.lastIndexOf(":"); if (index > 0) { prefix = qName.substring(0, index); } } return new QName(namespaceURI, name, prefix); } }