package marubinotto.util.xml; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.StringReader; import java.io.StringWriter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Locale; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import marubinotto.util.Assert; import org.apache.commons.lang.UnhandledException; import org.w3c.dom.Attr; import org.w3c.dom.CharacterData; import org.w3c.dom.Comment; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * <p>Utilities related to handling DOM.</p> * * <p>This utility class uses JAXP(Java API for XML Processing) * to parse and transform XML documents internally. JAXP is independent of a particular * XML processing implementation. To switch parser implementation behind JAXP, * you can find detail instructions at {@link DocumentBuilderFactory#newInstance}.</p> * * <h4>XML parser configuration</h4> * <p>The list below describes the configurations of JAXP configured * by this utility as default.</p> * <ul> * <li>{@link DocumentBuilderFactory#isCoalescing} => false</li> * <li>{@link DocumentBuilderFactory#isExpandEntityReferences} => true</li> * <li>{@link DocumentBuilderFactory#isIgnoringComments} => false</li> * <li>{@link DocumentBuilderFactory#isIgnoringElementContentWhitespace} => false</li> * <li>{@link DocumentBuilderFactory#isNamespaceAware} => true</li> * </ul> * * <p>Check the XML1.0 W3C Recommendation * (<a href="http://www.w3.org/TR/2000/REC-xml-20001006"> * http://www.w3.org/TR/2000/REC-xml-20001006</a>) * for more detail about XML specification.</p> * * @version $Id: DomUtils.java 1286 2008-03-20 15:39:37Z morita $ */ public class DomUtils { public static final String XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; public static final String XML_SCHEMA_NAMESPACE = "http://www.w3.org/2001/XMLSchema-instance"; public static Document buildNewDocument() { DocumentBuilder builder = createDocumentBuilder(false); return builder.newDocument(); } /** * <p>Builds a DOM tree parsing the given String.</p> */ public static Document buildDocument(String xml) throws SAXException, IOException { Assert.Arg.notNull(xml, "xml"); DocumentBuilder builder = createDocumentBuilder(false); return builder.parse(new InputSource(new StringReader(xml))); } /** * <p>Builds a DOM tree parsing the specificed file.</p> */ public static Document buildDocument(File xmlFile) throws SAXException, IOException { Assert.Arg.notNull(xmlFile, "xmlFile"); DocumentBuilder builder = createDocumentBuilder(false); return builder.parse(xmlFile); } /** * <p>Builds a DOM tree parsing the given stream data.</p> */ public static Document buildDocument(InputStream input) throws SAXException, IOException { Assert.Arg.notNull(input, "input"); DocumentBuilder builder = createDocumentBuilder(false); return builder.parse(input); } /** * <p>Converts the given DOM node to String.</p> * * <p>If the given node is an <b>Attr</b> or * <b>CharacterData (CDATASection, Comment, Text),</b> * the node will be converted to its value.</p> */ public static String toString(Node node) { Assert.Arg.notNull(node, "node"); if (node instanceof Attr) { return ((Attr)node).getValue(); } else if (node instanceof CharacterData) { return ((CharacterData)node).getData(); } StringWriter buffer = new StringWriter(); try { serialize(node, new StreamResult(buffer), "UTF-8"); } catch (TransformerException e) { throw new UnhandledException(e); } return buffer.toString(); } /** * <p>Serializes the given DOM node to the specified file.</p> * * @param node node to be serialized * @param file file to which the given node will be serialized * @param encoding name of a supported XML charset * @throws IOException */ public static void serialize(Node node, File file, String encoding) throws TransformerException, IOException { // J2SE 5.0 StreamResult bug? // serialize(node, new StreamResult(file), encoding); OutputStream output = new BufferedOutputStream(new FileOutputStream(file)); try { serialize(node, output, encoding); } finally { output.close(); } } /** * <p>Serializes the given DOM node to the given OutputStream.</p> * * @param node node to be serialized * @param output OutputStream to which the given node will be serialized * @param encoding name of a supported XML charset */ public static void serialize(Node node, OutputStream output, String encoding) throws TransformerException { serialize(node, new StreamResult(output), encoding); } /** * Returns a boolean value indicating whether the given Element is a root Element. */ public static boolean isRootElement(Element element) { Assert.Arg.notNull(element, "element"); return element.getParentNode() instanceof Document; } /** * Returns the language code of the given node. * The language code is specified with an "xml:lang" attribute. * If the given node does not have an "xml:lang" attribute, * the parents of the node will be examined recursively. * If no "xml:lang" attributes are found, this method returns null. */ public static String getLang(Node node) { Assert.Arg.notNull(node, "node"); while (true) { if (node instanceof Element) { Attr lang = ((Element) node).getAttributeNodeNS(XML_NAMESPACE, "lang"); if (lang != null) { return lang.getValue(); } } node = node.getParentNode(); if (node == null) { break; } } return null; } /** * Selects the node from the given list, that is the first node * of those which correspond to the specified locale(language code). * If a corresponding node is not found, the first node of those which * do not have an "xml:lang" attribute will be returned as a default. */ public static Node selectByLang(List<Node> nodes, Locale locale) { Assert.Arg.notNull(nodes, "nodes"); Assert.Arg.notNull(locale, "locale"); if (nodes.size() == 0) { return null; } Node firstNoLangNode = null; for (Iterator<Node> i = nodes.iterator(); i.hasNext();) { Node node = i.next(); String lang = getLang(node); if (lang == null && firstNoLangNode == null) { firstNoLangNode = node; } if (lang != null && lang.equals(locale.getLanguage())) { return node; } } return firstNoLangNode; } /** * Sets the specified locale(language code) to the given element * as an "xml:lang" attribute. */ public static void setLang(Element element, Locale locale) { Assert.Arg.notNull(element, "element"); Assert.Arg.notNull(locale, "locale"); element.setAttributeNS(XML_NAMESPACE, "xml:lang", locale.getLanguage()); } /** * Adds the given XML fragment to the given node as the last child. * The context of namespace will be applied to the fragment. */ public static Node addFragment(String fragment, Node node) throws SAXException, IOException { Assert.Arg.notNull(fragment, "fragment"); Assert.Arg.notNull(node, "node"); return node.appendChild(buildFragment(fragment, node)); } /** * Adds the given XML fragment to the given node as the first child. * The context of namespace will be applied to the fragment. */ public static Node addFragmentFirst(String fragment, Node node) throws SAXException, IOException { Assert.Arg.notNull(fragment, "fragment"); Assert.Arg.notNull(node, "node"); if (node.getChildNodes().getLength() > 0) { return node.insertBefore(buildFragment(fragment, node), node.getFirstChild()); } else { return node.appendChild(buildFragment(fragment, node)); } } /** * <p>Evaluates the given node as a string.</p> * * <p>If the given node is an <b>Attr</b> or * <b>CharacterData (CDATASection, Comment, Text),</b> * Its value will be returned.</p> * * <p>If the given node is an element, * returns the textual content directly held under this element as a string. * This includes all text within this single element, * including whitespace and CDATA sections if they exist. * The call does not recurse into child elements. * If no textual value exists for the element, a null value is returned.</p> */ public static String getValue(Node node) { Assert.Arg.notNull(node, "node"); if (node instanceof CharacterData) { return ((CharacterData)node).getData(); } else if (node instanceof Element) { return getText((Element)node); } else { return node.getNodeValue(); } } /** * <p>Sets the String value to the given node.</p> * * <p>If the given node is an element, * Sets the content of the element to be the text given. * All existing text content and non-text context is removed.</p> */ public static void setValue(Node node, String value) { Assert.Arg.notNull(node, "node"); if (node instanceof CharacterData) { ((CharacterData)node).setData(value); } else if (node instanceof Element) { removeAllChildNodes(node); node.appendChild(node.getOwnerDocument().createTextNode(value)); } else { node.setNodeValue(value); } } /** * Removes all child nodes from the given node. */ public static void removeAllChildNodes(Node node) { NodeList children = node.getChildNodes(); while (children.getLength() > 0) { node.removeChild(children.item(0)); } } /** * Returns all namespace declaration attributes that * the ancestor-or-self nodes of the given node have. * A name of a namespace declaration attribute starts with "xmlns". */ public static Attr[] collectNamespaceDefinitions(Node node) { Assert.Arg.notNull(node, "node"); List<Attr> nsAttrs = new ArrayList<Attr>(); do { if (!(node instanceof Element)) { continue; } NamedNodeMap attrs = ((Element)node).getAttributes(); for (int i = 0; i < attrs.getLength(); i++) { Attr attr = (Attr)attrs.item(i); if (attr.getName().startsWith("xmlns")) { nsAttrs.add(attr); } } } while ((node = node.getParentNode()) != null); return (Attr[])nsAttrs.toArray(new Attr[0]); } public static List<Node> toList(NodeList nodeList) { Assert.Arg.notNull(nodeList, "nodeList"); List<Node> nodes = new ArrayList<Node>(nodeList.getLength()); for (int i = 0; i < nodeList.getLength(); i++) { nodes.add(nodeList.item(i)); } return nodes; } public static List<Element> getChildElements(Node node, String name) { Assert.Arg.notNull(node, "node"); Assert.Arg.notNull(name, "name"); List<Element> elements = new ArrayList<Element>(); NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(name)) { elements.add((Element)child); } } return elements; } public static Element getFirstChildElement(Node node, String name) { Assert.Arg.notNull(node, "node"); Assert.Arg.notNull(name, "name"); NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(name)) { return (Element)child; } } return null; } // Private methods private static DocumentBuilder createDocumentBuilder(boolean validating) { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setCoalescing(false); factory.setExpandEntityReferences(true); factory.setIgnoringComments(false); factory.setIgnoringElementContentWhitespace(false); factory.setNamespaceAware(true); factory.setValidating(validating); try { return factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { throw new UnhandledException(e); } } private static void serialize(Node node, StreamResult result, String encoding) throws TransformerException { Assert.Arg.notNull(node, "node"); Assert.Arg.notNull(result, "result"); Source source = new DOMSource(node); Transformer transformer = TransformerFactory.newInstance().newTransformer(); if (encoding != null) { transformer.setOutputProperty(OutputKeys.ENCODING, encoding); } if (node instanceof Document) { transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); } else { transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); } transformer.transform(source, result); } private static Node buildFragment(String fragment, Node context) throws SAXException, IOException { Attr[] nsAttrs = collectNamespaceDefinitions(context); StringBuffer startTag = new StringBuffer("<Fragment"); for (int i = 0; i < nsAttrs.length; i++) { startTag.append(" " + nsAttrs[i]); } startTag.append(">"); fragment = startTag.toString() + fragment + "</Fragment>"; Node fragmentTree = buildDocument(fragment) .getDocumentElement().getFirstChild(); return context.getOwnerDocument().importNode(fragmentTree, true); } /** * Returns the textual content directly held under this element as a string. * This includes all text within this single element, * including whitespace and CDATA sections if they exist. * The call does not recurse into child elements. * If no textual value exists for the element, a null value is returned. */ public static String getText(Element element) { StringBuffer text = new StringBuffer(); NodeList children = element.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child instanceof CharacterData && !(child instanceof Comment)) { text.append(((CharacterData)child).getData()); } } if (text.length() > 0) { return text.toString(); } else { return null; } } private DomUtils() { } }