package org.htmlcleaner; import org.w3c.dom.Comment; import org.w3c.dom.Document; import org.w3c.dom.Element; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import java.util.Iterator; import java.util.List; import java.util.Map; /** * <p>DOM serializer - creates xml DOM.</p> */ public class DomSerializer { protected CleanerProperties props; protected boolean escapeXml = true; public DomSerializer(CleanerProperties props, boolean escapeXml) { this.props = props; this.escapeXml = escapeXml; } public DomSerializer(CleanerProperties props) { this(props, true); } public Document createDOM(TagNode rootNode) throws ParserConfigurationException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); Document document = factory.newDocumentBuilder().newDocument(); Element rootElement = createElement(rootNode, document); document.appendChild(rootElement); setAttributes(rootNode, rootElement); createSubnodes(document, rootElement, rootNode.getChildren()); return document; } private Element createElement(TagNode node, Document document) { String name = node.getName(); boolean nsAware = props.isNamespacesAware(); String prefix = Utils.getXmlNSPrefix(name); Map<String, String> nsDeclarations = node.getNamespaceDeclarations(); String nsURI = null; if (prefix != null) { if (nsAware) { if (nsDeclarations != null) { nsURI = nsDeclarations.get(prefix); } if (nsURI == null) { nsURI = node.getNamespaceURIOnPath(prefix); } if (nsURI == null) { nsURI = prefix; } } else { name = Utils.getXmlName(name); } } else { if (nsAware) { if (nsDeclarations != null) { nsURI = nsDeclarations.get(""); } if (nsURI == null) { nsURI = node.getNamespaceURIOnPath(prefix); } } } if (nsAware && nsURI != null) { return document.createElementNS(nsURI, name); } else { return document.createElement(name); } } private void setAttributes(TagNode node, Element element) { for (Map.Entry<String, String> entry: node.getAttributes().entrySet()) { String attrName = entry.getKey(); String attrValue = entry.getValue(); if (escapeXml) { attrValue = Utils.escapeXml(attrValue, props, true); } String attPrefix = Utils.getXmlNSPrefix(attrName); if (attPrefix != null) { if (props.isNamespacesAware()) { String nsURI = node.getNamespaceURIOnPath(attPrefix); if (nsURI == null) { nsURI = attPrefix; } element.setAttributeNS(nsURI, attrName, attrValue); } else { element.setAttribute(Utils.getXmlName(attrName), attrValue); } } else { element.setAttribute(attrName, attrValue); } } } @SuppressWarnings("rawtypes") private void createSubnodes(Document document, Element element, List tagChildren) { if (tagChildren != null) { Iterator it = tagChildren.iterator(); while (it.hasNext()) { Object item = it.next(); if (item instanceof CommentNode) { CommentNode commentNode = (CommentNode) item; Comment comment = document.createComment( commentNode.getContent().toString() ); element.appendChild(comment); } else if (item instanceof ContentNode) { String nodeName = element.getNodeName(); String content = item.toString(); boolean specialCase = props.isUseCdataForScriptAndStyle() && ("script".equalsIgnoreCase(nodeName) || "style".equalsIgnoreCase(nodeName)); if (escapeXml && !specialCase) { content = Utils.escapeXml(content, props, true); } element.appendChild( specialCase ? document.createCDATASection(content) : document.createTextNode(content) ); } else if (item instanceof TagNode) { TagNode subTagNode = (TagNode) item; Element subelement = createElement(subTagNode, document); setAttributes(subTagNode, subelement); // recursively create subnodes createSubnodes(document, subelement, subTagNode.getChildren()); element.appendChild(subelement); } else if (item instanceof List) { List sublist = (List) item; createSubnodes(document, element, sublist); } } } } }