/******************************************************************************* * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com) * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt ******************************************************************************/ package com.opendoorlogistics.core.utils; import java.io.ByteArrayInputStream; import java.io.File; import java.io.StringWriter; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.io.IOUtils; import org.w3c.dom.Document; import org.w3c.dom.Node; import com.sun.org.apache.xml.internal.serialize.OutputFormat; import com.sun.org.apache.xml.internal.serialize.XMLSerializer; final public class XMLUtils { private XMLUtils() { } public static String toString(Node node) { try { TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(node); StringWriter writer = new StringWriter(); StreamResult result = new StreamResult(writer); transformer.transform(source, result); return writer.toString(); } catch (Throwable e) { throw new RuntimeException(e); } } public static OutputFormat getPrettyPrintFormat() { OutputFormat format = new OutputFormat(); format.setLineWidth(120); format.setIndenting(true); format.setIndent(2); format.setEncoding("UTF-8"); return format; } public static String toString(Node doc, OutputFormat format) { try { StringWriter stringOut = new StringWriter(); XMLSerializer serial = new XMLSerializer(stringOut, format); serial.serialize(doc); return stringOut.toString(); } catch (Throwable e) { throw new RuntimeException(e); } } public static Document load(File file) { try { if (file.exists()) { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(file); TextNodesRemover.cleanEmptyTextNodes(doc); return doc; } } catch (Throwable e) { throw new RuntimeException(e); } return null; } public static Document parse(String xml) { try { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(new ByteArrayInputStream(xml.getBytes("UTF-8"))); TextNodesRemover.cleanEmptyTextNodes(doc); return doc; } catch (Throwable e) { throw new RuntimeException(e); } } /** * See http://stackoverflow.com/questions/16641835/strange-xml-indentation. Removes text nodes that only contains whitespace. The conditions for removing * text nodes, besides only containing whitespace, are: If the parent node has at least one child of any of the following types, all whitespace-only * text-node children will be removed: - ELEMENT child - CDATA child - COMMENT child * * The purpose of this is to make the format() method (that use a Transformer for formatting) more consistent regarding indenting and line breaks. */ private static class TextNodesRemover { private static void cleanEmptyTextNodes(Node parentNode) { boolean removeEmptyTextNodes = false; Node childNode = parentNode.getFirstChild(); while (childNode != null) { removeEmptyTextNodes |= checkNodeTypes(childNode); childNode = childNode.getNextSibling(); } if (removeEmptyTextNodes) { removeEmptyTextNodes(parentNode); } } private static void removeEmptyTextNodes(Node parentNode) { Node childNode = parentNode.getFirstChild(); while (childNode != null) { // grab the "nextSibling" before the child node is removed Node nextChild = childNode.getNextSibling(); short nodeType = childNode.getNodeType(); if (nodeType == Node.TEXT_NODE) { boolean containsOnlyWhitespace = childNode.getNodeValue().trim().isEmpty(); if (containsOnlyWhitespace) { parentNode.removeChild(childNode); } } childNode = nextChild; } } private static boolean checkNodeTypes(Node childNode) { short nodeType = childNode.getNodeType(); if (nodeType == Node.ELEMENT_NODE) { cleanEmptyTextNodes(childNode); // recurse into subtree } if (nodeType == Node.ELEMENT_NODE || nodeType == Node.CDATA_SECTION_NODE || nodeType == Node.COMMENT_NODE) { return true; } else { return false; } } } }