/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package nokogiri.internals.c14n; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.ProcessingInstruction; import org.w3c.dom.Text; /** * DOM and XML accessibility and comfort functions. * * @author Christian Geuer-Pollmann */ public class XMLUtils { /** * Constructor XMLUtils * */ private XMLUtils() { // we don't allow instantiation } /** * Method getFullTextChildrenFromElement * * @param element * @return the string of children */ public static String getFullTextChildrenFromElement(Element element) { StringBuilder sb = new StringBuilder(); Node child = element.getFirstChild(); while (child != null) { if (child.getNodeType() == Node.TEXT_NODE) { sb.append(((Text)child).getData()); } child = child.getNextSibling(); } return sb.toString(); } /** * This method returns the owner document of a particular node. * This method is necessary because it <I>always</I> returns a * {@link Document}. {@link Node#getOwnerDocument} returns <CODE>null</CODE> * if the {@link Node} is a {@link Document}. * * @param node * @return the owner document of the node */ public static Document getOwnerDocument(Node node) { if (node.getNodeType() == Node.DOCUMENT_NODE) { return (Document) node; } try { return node.getOwnerDocument(); } catch (NullPointerException npe) { throw new NullPointerException(npe.getMessage()); } } /** * This method returns the first non-null owner document of the Nodes in this Set. * This method is necessary because it <I>always</I> returns a * {@link Document}. {@link Node#getOwnerDocument} returns <CODE>null</CODE> * if the {@link Node} is a {@link Document}. * * @param xpathNodeSet * @return the owner document */ public static Document getOwnerDocument(Set<Node> xpathNodeSet) { NullPointerException npe = null; for (Node node : xpathNodeSet) { int nodeType = node.getNodeType(); if (nodeType == Node.DOCUMENT_NODE) { return (Document) node; } try { if (nodeType == Node.ATTRIBUTE_NODE) { return ((Attr)node).getOwnerElement().getOwnerDocument(); } return node.getOwnerDocument(); } catch (NullPointerException e) { npe = e; } } throw new NullPointerException(npe.getMessage()); } /** * Method convertNodelistToSet * * @param xpathNodeSet * @return the set with the nodelist */ public static Set<Node> convertNodelistToSet(NodeList xpathNodeSet) { if (xpathNodeSet == null) { return new HashSet<Node>(); } int length = xpathNodeSet.getLength(); Set<Node> set = new HashSet<Node>(length); for (int i = 0; i < length; i++) { set.add(xpathNodeSet.item(i)); } return set; } /** * This method spreads all namespace attributes in a DOM document to their * children. This is needed because the XML Signature XPath transform * must evaluate the XPath against all nodes in the input, even against * XPath namespace nodes. Through a bug in XalanJ2, the namespace nodes are * not fully visible in the Xalan XPath model, so we have to do this by * hand in DOM spaces so that the nodes become visible in XPath space. * * @param doc * @see <A HREF="http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2650"> * Namespace axis resolution is not XPath compliant </A> */ public static void circumventBug2650(Document doc) { Element documentElement = doc.getDocumentElement(); // if the document element has no xmlns definition, we add xmlns="" Attr xmlnsAttr = documentElement.getAttributeNodeNS(Constants.NamespaceSpecNS, "xmlns"); if (xmlnsAttr == null) { documentElement.setAttributeNS(Constants.NamespaceSpecNS, "xmlns", ""); } XMLUtils.circumventBug2650internal(doc); } /** * This is the work horse for {@link #circumventBug2650}. * * @param node * @see <A HREF="http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2650"> * Namespace axis resolution is not XPath compliant </A> */ @SuppressWarnings("fallthrough") private static void circumventBug2650internal(Node node) { Node parent = null; Node sibling = null; final String namespaceNs = Constants.NamespaceSpecNS; do { switch (node.getNodeType()) { case Node.ELEMENT_NODE : Element element = (Element) node; if (!element.hasChildNodes()) { break; } if (element.hasAttributes()) { NamedNodeMap attributes = element.getAttributes(); int attributesLength = attributes.getLength(); for (Node child = element.getFirstChild(); child!=null; child = child.getNextSibling()) { if (child.getNodeType() != Node.ELEMENT_NODE) { continue; } Element childElement = (Element) child; for (int i = 0; i < attributesLength; i++) { Attr currentAttr = (Attr) attributes.item(i); if (!namespaceNs.equals(currentAttr.getNamespaceURI())) { continue; } if (childElement.hasAttributeNS(namespaceNs, currentAttr.getLocalName())) { continue; } childElement.setAttributeNS(namespaceNs, currentAttr.getName(), currentAttr.getNodeValue()); } } } case Node.ENTITY_REFERENCE_NODE : case Node.DOCUMENT_NODE : parent = node; sibling = node.getFirstChild(); break; } while ((sibling == null) && (parent != null)) { sibling = parent.getNextSibling(); parent = parent.getParentNode(); } if (sibling == null) { return; } node = sibling; sibling = node.getNextSibling(); } while (true); } /** * @param sibling * @param uri * @param nodeName * @param number * @return nodes with the constrain */ public static Text selectNodeText(Node sibling, String uri, String nodeName, int number) { Node n = selectNode(sibling,uri,nodeName,number); if (n == null) { return null; } n = n.getFirstChild(); while (n != null && n.getNodeType() != Node.TEXT_NODE) { n = n.getNextSibling(); } return (Text)n; } /** * @param sibling * @param uri * @param nodeName * @param number * @return nodes with the constrain */ public static Element selectNode(Node sibling, String uri, String nodeName, int number) { while (sibling != null) { if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri) && sibling.getLocalName().equals(nodeName)) { if (number == 0){ return (Element)sibling; } number--; } sibling = sibling.getNextSibling(); } return null; } /** * @param sibling * @param uri * @param nodeName * @return nodes with the constraint */ public static Element[] selectNodes(Node sibling, String uri, String nodeName) { List<Element> list = new ArrayList<Element>(); while (sibling != null) { if (sibling.getNamespaceURI() != null && sibling.getNamespaceURI().equals(uri) && sibling.getLocalName().equals(nodeName)) { list.add((Element)sibling); } sibling = sibling.getNextSibling(); } return list.toArray(new Element[list.size()]); } /** * @param signatureElement * @param inputSet * @return nodes with the constrain */ public static Set<Node> excludeNodeFromSet(Node signatureElement, Set<Node> inputSet) { Set<Node> resultSet = new HashSet<Node>(); Iterator<Node> iterator = inputSet.iterator(); while (iterator.hasNext()) { Node inputNode = iterator.next(); if (!XMLUtils.isDescendantOrSelf(signatureElement, inputNode)) { resultSet.add(inputNode); } } return resultSet; } /** * Method getStrFromNode * * @param xpathnode * @return the string for the node. */ public static String getStrFromNode(Node xpathnode) { if (xpathnode.getNodeType() == Node.TEXT_NODE) { // we iterate over all siblings of the context node because eventually, // the text is "polluted" with pi's or comments StringBuilder sb = new StringBuilder(); for (Node currentSibling = xpathnode.getParentNode().getFirstChild(); currentSibling != null; currentSibling = currentSibling.getNextSibling()) { if (currentSibling.getNodeType() == Node.TEXT_NODE) { sb.append(((Text) currentSibling).getData()); } } return sb.toString(); } else if (xpathnode.getNodeType() == Node.ATTRIBUTE_NODE) { return ((Attr) xpathnode).getNodeValue(); } else if (xpathnode.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) { return ((ProcessingInstruction) xpathnode).getNodeValue(); } return null; } /** * Returns true if the descendantOrSelf is on the descendant-or-self axis * of the context node. * * @param ctx * @param descendantOrSelf * @return true if the node is descendant */ public static boolean isDescendantOrSelf(Node ctx, Node descendantOrSelf) { if (ctx == descendantOrSelf) { return true; } Node parent = descendantOrSelf; while (true) { if (parent == null) { return false; } if (parent == ctx) { return true; } if (parent.getNodeType() == Node.ATTRIBUTE_NODE) { parent = ((Attr) parent).getOwnerElement(); } else { parent = parent.getParentNode(); } } } /** * Returns the attribute value for the attribute with the specified name. * Returns null if there is no such attribute, or * the empty string if the attribute value is empty. * * <p>This works around a limitation of the DOM * <code>Element.getAttributeNode</code> method, which does not distinguish * between an unspecified attribute and an attribute with a value of * "" (it returns "" for both cases). * * @param elem the element containing the attribute * @param name the name of the attribute * @return the attribute value (may be null if unspecified) */ public static String getAttributeValue(Element elem, String name) { Attr attr = elem.getAttributeNodeNS(null, name); return (attr == null) ? null : attr.getValue(); } /** * This method is a tree-search to help prevent against wrapping attacks. It checks that no * two Elements have ID Attributes that match the "value" argument, if this is the case then * "false" is returned. Note that a return value of "true" does not necessarily mean that * a matching Element has been found, just that no wrapping attack has been detected. */ public static boolean protectAgainstWrappingAttack(Node startNode, String value) { Node startParent = startNode.getParentNode(); Node processedNode = null; Element foundElement = null; String id = value.trim(); if (id.charAt(0) == '#') { id = id.substring(1); } while (startNode != null) { if (startNode.getNodeType() == Node.ELEMENT_NODE) { Element se = (Element) startNode; NamedNodeMap attributes = se.getAttributes(); if (attributes != null) { for (int i = 0; i < attributes.getLength(); i++) { Attr attr = (Attr)attributes.item(i); if (attr.isId() && id.equals(attr.getValue())) { if (foundElement == null) { // Continue searching to find duplicates foundElement = attr.getOwnerElement(); } else { //log.debug("Multiple elements with the same 'Id' attribute value!"); return false; } } } } } processedNode = startNode; startNode = startNode.getFirstChild(); // no child, this node is done. if (startNode == null) { // close node processing, get sibling startNode = processedNode.getNextSibling(); } // no more siblings, get parent, all children // of parent are processed. while (startNode == null) { processedNode = processedNode.getParentNode(); if (processedNode == startParent) { return true; } // close parent node processing (processed node now) startNode = processedNode.getNextSibling(); } } return true; } /** * This method is a tree-search to help prevent against wrapping attacks. It checks that no other * Element than the given "knownElement" argument has an ID attribute that matches the "value" * argument, which is the ID value of "knownElement". If this is the case then "false" is returned. */ public static boolean protectAgainstWrappingAttack( Node startNode, Element knownElement, String value ) { Node startParent = startNode.getParentNode(); Node processedNode = null; String id = value.trim(); if (id.charAt(0) == '#') { id = id.substring(1); } while (startNode != null) { if (startNode.getNodeType() == Node.ELEMENT_NODE) { Element se = (Element) startNode; NamedNodeMap attributes = se.getAttributes(); if (attributes != null) { for (int i = 0; i < attributes.getLength(); i++) { Attr attr = (Attr)attributes.item(i); if (attr.isId() && id.equals(attr.getValue()) && se != knownElement) { //log.debug("Multiple elements with the same 'Id' attribute value!"); return false; } } } } processedNode = startNode; startNode = startNode.getFirstChild(); // no child, this node is done. if (startNode == null) { // close node processing, get sibling startNode = processedNode.getNextSibling(); } // no more siblings, get parent, all children // of parent are processed. while (startNode == null) { processedNode = processedNode.getParentNode(); if (processedNode == startParent) { return true; } // close parent node processing (processed node now) startNode = processedNode.getNextSibling(); } } return true; } }