/* * Sun Public License Notice * * The contents of this file are subject to the Sun Public License * Version 1.0 (the "License"). You may not use this file except in * compliance with the License. A copy of the License is available at * http://www.sun.com/ * * The Original Code is NetBeans. The Initial Developer of the Original * Code is Sun Microsystems, Inc. Portions Copyright 1997-2002 Sun * Microsystems, Inc. All Rights Reserved. */ package org.openide.xml; import java.io.*; import javax.xml.parsers.*; import org.w3c.dom.*; import org.xml.sax.*; /** * Utility class collecting library methods related to XML processing. * * <div class="nonnormative"> * * <p>Remember that when parsing XML files you often want to set an explicit * entity resolver. For example, consider a file such as this:</p> * * <pre> * <?<font class="keyword">xml</font> <font class="variable-name">version</font>=<font class="string">"1.0"</font> <font class="variable-name">encoding</font>=<font class="string">"UTF-8"</font>?> * <!<font class="keyword">DOCTYPE</font> <font class="type">root</font> <font class="keyword">PUBLIC</font> <font class="string">"-//NetBeans//DTD Foo 1.0//EN"</font> <font class="string">"http://www.netbeans.org/dtds/foo-1_0.dtd"</font>> * <<font class="function-name">root</font>/> * </pre> * * <p>If you parse this with a null entity resolver, or you use the * default resolver ({@link EntityCatalog#getDefault}) but do not do * anything special with this DTD, you will probably find the parse * blocking to make a network connection <em>even when you are not * validating</em>. That is because DTDs can be used to define * entities and other XML oddities, and are not a pure constraint * language like Schema or RELAX-NG.</p> * * <p>There are three basic ways to avoid the network connection.</p> * * <ol> * * <li><p>Register the DTD. This is generally the best thing to do. See * {@link EntityCatalog}'s documentation for details, but for example * in your layer use:</p> * * <pre> * <<font class="function-name">filesystem</font>> * <<font class="function-name">folder</font> <font class="variable-name">name</font>=<font class="string">"xml"</font>> * <<font class="function-name">folder</font> <font class="variable-name">name</font>=<font class="string">"entities"</font>> * <<font class="function-name">folder</font> <font class="variable-name">name</font>=<font class="string">"NetBeans"</font>> * <<font class="function-name">file</font> <font class="variable-name">name</font>=<font class="string">"DTD_Foo_1_0"</font> * <font class="variable-name">url</font>=<font class="string">"nbres:/org/netbeans/modules/mymod/resources/foo-1_0.dtd"</font>> * <<font class="function-name">attr</font> <font class="variable-name">name</font>=<font class="string">"hint.originalPublicID"</font> * <font class="variable-name">stringvalue</font>=<font class="string">"-//NetBeans//DTD Foo 1.0//EN"</font>/> * </<font class="function-name">file</font>> * </<font class="function-name">folder</font>> * </<font class="function-name">folder</font>> * </<font class="function-name">folder</font>> * </<font class="function-name">filesystem</font>> * </pre> * * <p>Now the default system entity catalog will resolve the public ID * to the local copy in your module, not the network copy. * Additionally, anyone who mounts the "NetBeans Catalog" in the XML * Entity Catalogs node in the Runtime tab will be able to use your * local copy of the DTD automatically, for validation, code * completion, etc. (The network URL should really exist, though, for * the benefit of other tools!)</p></li> * * <li><p>You can also set an explicit entity resolver which maps that * particular public ID to some local copy of the DTD, if you do not * want to register it globally in the system for some reason. If * handed other public IDs, just return null to indicate that the * system ID should be loaded.</p></li> * * <li><p>In some cases where XML parsing is very * performance-sensitive, and you know that you do not need validation * and furthermore that the DTD defines no infoset (there are no * entity or character definitions, etc.), you can speed up the parse. * Turn off validation, but also supply a custom entity resolver that * does not even bother to load the DTD at all:</p> * * <pre> * <font class="keyword">public</font> <font class="type">InputSource</font> <font class="function-name">resolveEntity</font>(<font class="type">String</font> <font class="variable-name">pubid</font>, <font class="type">String</font> <font class="variable-name">sysid</font>) * <font class="keyword">throws</font> <font class="type">SAXException</font>, <font class="type">IOException</font> { * <font class="keyword">if</font> (pubid.equals(<font class="string">"-//NetBeans//DTD Foo 1.0//EN"</font>)) { * <font class="keyword">return</font> <font class="keyword">new</font> <font class="type">InputSource</font>(<font class="keyword">new</font> <font class="type">ByteArrayInputStream</font>(<font class="keyword">new</font> <font class="type">byte</font>[0])); * } <font class="keyword">else</font> { * <font class="keyword">return</font> EntityCatalog.getDefault().resolveEntity(pubid, sysid); * } * } * </pre></li> * * </ol> * * </div> * * @author Petr Kuzel * @since release 3.2 */ public final class XMLUtil extends Object { /** Forbids creating new XMLUtil */ private XMLUtil() { } // ~~~~~~~~~~~~~~~~~~~~~ SAX related ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /** Create a simple parser. * @return <code>createXMLReader(false, false)</code> */ public static XMLReader createXMLReader() throws SAXException { return createXMLReader(false, false); } /** Create a simple parser, possibly validating. * @param validate if true, a validating parser is returned * @return <code>createXMLReader(validate, false)</code> */ public static XMLReader createXMLReader(boolean validate) throws SAXException{ return createXMLReader(validate, false); } /** Create a SAX parser from the JAXP factory. * The result can be used to parse XML files. * * <p>See class Javadoc for hints on setting an entity resolver. * This parser has its entity resolver set to the system entity resolver chain. * * @param validate if true, a validating parser is returned * @param namespaceAware if true, a namespace aware parser is returned * * @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type. * @throws SAXException if a parser fulfilling given parameters can not be created * * @return XMLReader configured according to passed parameters */ public static XMLReader createXMLReader(boolean validate, boolean namespaceAware) throws SAXException { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(validate); factory.setNamespaceAware(namespaceAware); try { return factory.newSAXParser().getXMLReader(); } catch (ParserConfigurationException ex) { throw new SAXException("Cannot create parser satisfying configuration parameters", ex); //NOI18N } } // ~~~~~~~~~~~~~~~~~~~~~ DOM related ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /** * Creates empty DOM Document using JAXP factoring. E.g.: * <p><pre> * Document doc = createDocument("book", null, null, null); * </pre><p> * creates new DOM of a well-formed document with root element named book. * * @param rootQName qualified name of root element. e.g. <code>myroot</code> or <code>ns:myroot</code> * @param namespaceURI URI of root element namespace or <code>null</code> * @param doctypePublicID public ID of DOCTYPE or <code>null</code> * @param doctypeSystemID system ID of DOCTYPE or <code>null</code> if no DOCTYPE * required and doctypePublicID is also <code>null</code> * * @throws DOMException if new DOM with passed parameters can not be created * @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type. * * @return new DOM Document */ public static Document createDocument(String rootQName, String namespaceURI, String doctypePublicID, String doctypeSystemID) throws DOMException { DOMImplementation impl = getDOMImplementation(); if (doctypePublicID != null && doctypeSystemID == null) { throw new IllegalArgumentException("System ID cannot be null if public ID specified. "); //NOI18N } DocumentType dtd = null; if (doctypeSystemID != null) { dtd = impl.createDocumentType(rootQName, doctypePublicID, doctypeSystemID); } return impl.createDocument(namespaceURI, rootQName, dtd); } /** * Obtains DOMImpementaton interface providing a number of methods for performing * operations that are independent of any particular DOM instance. * * @throw DOMException <code>NOT_SUPPORTED_ERR</code> if cannot get DOMImplementation * @throw FactoryConfigurationError Application developers should never need to directly catch errors of this type. * * @return DOMImplementation implementation */ private static DOMImplementation getDOMImplementation() throws DOMException { //can be made public DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); try { return factory.newDocumentBuilder().getDOMImplementation(); } catch (ParserConfigurationException ex) { throw new DOMException(DOMException.NOT_SUPPORTED_ERR , "Cannot create parser satisfying configuration parameters"); //NOI18N } } /** * Create from factory a DocumentBuilder and let it create a org.w3c.dom.Document. * This method takes InputSource. After successful finish the document tree is returned. * * @param input a parser input (for URL users use: <code>new InputSource(url.toExternalForm())</code> * @param validate if true validating parser is used * @param namespaceAware if true DOM is created by namespace aware parser * @param errorHandler a error handler to notify about exception or <code>null</code> * @param entityResolver SAX entity resolver or <code>null</code>; see class Javadoc for hints * * @throws IOException if an I/O problem during parsing occurs * @throws SAXException is thrown if a parser error occurs * @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type. * * @return document representing given input, or null if a parsing error occurs */ public static Document parse ( InputSource input, boolean validate, boolean namespaceAware, ErrorHandler errorHandler, EntityResolver entityResolver ) throws IOException, SAXException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(validate); factory.setNamespaceAware(namespaceAware); DocumentBuilder builder = null; try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException ex) { throw new SAXException("Cannot create parser satisfying configuration parameters", ex); //NOI18N } if (errorHandler != null) { builder.setErrorHandler(errorHandler); } if (entityResolver != null) { builder.setEntityResolver(entityResolver); } return builder.parse(input); } /** * Write Document into OutputStream using given encoding. * It is a shortcut for writing configurations etc. It guarantees * just that data will be written. Structure and indentation may change. * * @param doc DOM Document to be written * @param out data sink * @param enc XML defined encoding name (i.e. IANA defined, one of UTF-8, UNICODE, ASCII) * * @throws IOException if an I/O exception occurs */ public static void write(Document doc, OutputStream out, String enc) throws IOException { XMLUtilImpl.write(doc, out, enc); } /** * Escape passed string as XML attibute value * (<code><</code>, <code>&</code>, <code>'</code> and <code>"</code> * will be escaped. * Note: An XML processor returns normalized value that can be different. * * @param val a string to be escaped * * @return escaped value * @throws CharConversionException if val contains an improper XML character * * @since 1.40 */ public static String toAttributeValue(String val) throws CharConversionException { if (val == null) throw new CharConversionException("null"); // NOI18N if (checkAttributeCharacters(val)) return val; StringBuffer buf = new StringBuffer(); for (int i = 0; i<val.length(); i++) { char ch = val.charAt(i); if ('<' == ch) { buf.append("<"); continue; } else if ('&' == ch) { buf.append("&"); continue; } else if ('\'' == ch) { buf.append("'"); continue; } else if ('"' == ch) { buf.append("""); continue; } buf.append(ch); } return buf.toString(); } /** * Escape passed string as XML element content (<code><</code>, * <code>&</code> and <code>><code> in <code>]]></code> sequences). * * @param val a string to be escaped * * @return escaped value * @throws CharConversionException if val contains an improper XML character * * @since 1.40 */ public static String toElementContent(String val) throws CharConversionException { if (val == null) throw new CharConversionException("null"); // NOI18N if (checkContentCharacters(val)) return val; StringBuffer buf = new StringBuffer(); for (int i = 0; i<val.length(); i++) { char ch = val.charAt(i); if ('<' == ch) { buf.append("<"); continue; } else if ('&' == ch) { buf.append("&"); continue; } else if ('>' == ch && i>1 && val.charAt(i-2) == ']' && val.charAt(i-1) == ']') { buf.append(">"); continue; } buf.append(ch); } return buf.toString(); } /* public static String toCDATA(String val) throws IOException { } */ private static final char[] DEC2HEX = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; /** * Can be used to encode values that contain invalid XML characters. * At SAX parser end must be used pair method to get original value. * * @param val data to be converted * @param start offset * @param len count * * @since 1.29 */ public static String toHex(byte[] val, int start, int len) { StringBuffer buf = new StringBuffer(); for (int i = 0; i<len; i++) { byte b = val[start + i]; buf.append(DEC2HEX[(b & 0xf0) >> 4]); buf.append(DEC2HEX[b & 0x0f]); } return buf.toString(); } /** * Decodes data encoded using {@link #toHex(byte[],int,int) toHex}. * * @param hex data to be converted * @param start offset * @param len count * * @throws IOException if input does not represent hex encoded value * * @since 1.29 */ public static byte[] fromHex(char[] hex, int start, int len) throws IOException { if (hex == null) throw new IOException("null"); int i = hex.length; if (i % 2 != 0) throw new IOException("odd length"); byte[] magic = new byte[i/2]; for (;i>0; i-=2) { String g = new String(hex, i-2, 2); try { magic[(i/2) -1] = (byte) Integer.parseInt(g, 16); } catch (NumberFormatException ex) { throw new IOException(ex.getLocalizedMessage()); } } return magic; } /** * Check if all passed characters match XML expression [2]. * @return true if no escaping necessary * @throws CharConversionException if contains invalid chars */ private static boolean checkAttributeCharacters(String chars) throws CharConversionException { boolean escape = false; for (int i = 0; i<chars.length(); i++) { char ch = chars.charAt(i); if (((int)ch) <= 93) { // we are UNICODE ']' switch (ch) { case 0x9: case 0xA: case 0xD: continue; case '\'': case '"': case '<': case '&': escape = true; continue; default: if (((int) ch) < 0x20) { throw new CharConversionException("Invalid XML character &#" + ((int)ch) + ";."); } } } } return escape == false; } /** * Check if all passed characters match XML expression [2]. * @return true if no escaping necessary * @throws CharConversionException if contains invalid chars */ private static boolean checkContentCharacters(String chars) throws CharConversionException { boolean escape = false; for (int i = 0; i<chars.length(); i++) { char ch = chars.charAt(i); if (((int)ch) <= 93) { // we are UNICODE ']' switch (ch) { case 0x9: case 0xA: case 0xD: continue; case '>': // only ]]> is dangerous if (escape) continue; escape = i > 0 && (chars.charAt(i - 1) == ']'); continue; case '<': case '&': escape = true; continue; default: if (((int) ch) < 0x20) { throw new CharConversionException("Invalid XML character &#" + ((int)ch) + ";."); } } } } return escape == false; } }