/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.xml;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.rapidminer.tools.container.Pair;
/**
* Provides helper functions for XML DOM, like finding common ancestors of nodes,
* creating XPath expression etc.
*
* @author Marius Helf
*
*/
public class XMLDomHelper {
/**
* A simple class for storing a tuple of element name, attribute name, attribute namespace and Attribute value.
*
* Implements the hashCode() and equals() functions, so that storing in any container with "hashed in its name
* is possible.
*
* @author Marius Helf
*
*/
public static class AttributeNamespaceValue {
private String name = null;
private String namespace = null;
private String value = null;
private String element = null;
public AttributeNamespaceValue() {
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result
+ ((element == null) ? 0 : element.hashCode());
result = prime * result + ((name == null) ? 0 : name.hashCode());
result = prime * result
+ ((namespace == null) ? 0 : namespace.hashCode());
result = prime * result + ((value == null) ? 0 : value.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
AttributeNamespaceValue other = (AttributeNamespaceValue) obj;
if (element == null) {
if (other.element != null)
return false;
} else if (!element.equals(other.element))
return false;
if (name == null) {
if (other.name != null)
return false;
} else if (!name.equals(other.name))
return false;
if (namespace == null) {
if (other.namespace != null)
return false;
} else if (!namespace.equals(other.namespace))
return false;
if (value == null) {
if (other.value != null)
return false;
} else if (!value.equals(other.value))
return false;
return true;
}
public String getName() {
return name;
}
public String getNamespace() {
return namespace;
}
public String getValue() {
return value;
}
public String getElement() {
return element;
}
public void setElement(String element) {
this.element = element;
}
public void setName(String name) {
this.name = name;
}
public void setNamespace(String namespace) {
this.namespace = namespace;
}
public void setValue(String value) {
this.value = value;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
if (element != null) {
builder.append(element);
}
builder.append("[@");
if (namespace != null) {
builder.append(namespace);
builder.append(":");
}
builder.append(name);
builder.append("=\"");
if (value != null) {
builder.append(value);
}
builder.append("\"]");
return builder.toString();
}
}
/**
* Creates a list of all elements, which are common ancestors of the element sets.
* They are matched by tag and namespace.
* The first element in the list resembles the farthest ancestor.
* Each element in the list is a Pair, with first=namespace and second=tagName
*/
public static List<Pair<String,String>> getCommonAncestorNames(Set<Element> elements) {
// Loop over path elements of the first path, beginning at the end.
// Continue as long as all other elements have the same element name at the
// same position, relative to the end.
Iterator<Element> it = elements.iterator();
if (!it.hasNext()) {
return new LinkedList<Pair<String,String>>();
}
// create reference element. All other elements will be compared
// to this element.
Element referenceElement = it.next();
String referenceElementNS = referenceElement.getNamespaceURI();
String referenceElementName = referenceElement.getLocalName();
// loop over all remaining elements and return an empty list
// if one of them does not match the reference element
while (it.hasNext()) {
Element currentElement = it.next();
String currentElementNS = currentElement.getNamespaceURI();
String currentElementName = currentElement.getLocalName();
// check if namespaces match. Since they may be null, this
// comparison is a bit clumsy
if (currentElementNS != null && referenceElementNS != null) {
if (!currentElementNS.equals(referenceElementNS)) {
return new LinkedList<Pair<String,String>>();
}
} else {
if (referenceElementNS != currentElementNS) {
return new LinkedList<Pair<String,String>>();
}
}
// compare element names
if (!currentElementName.equals(referenceElementName)) {
return new LinkedList<Pair<String,String>>();
}
}
// all ancestors are equal regarding namespace and element name. Add
// name and namespace to list and recurse one level deeper
LinkedList<Pair<String,String>> commonAncestors = new LinkedList<Pair<String,String>>();
// find all direct ancestors
Set<Element> directAncestors = getDirectAncestors(elements);
// if each element has a parent node, recurse
if (!directAncestors.isEmpty()) {
commonAncestors.addAll(getCommonAncestorNames(directAncestors));
}
// add current element namespace and tagname to list
commonAncestors.add(new Pair<String,String>(referenceElementNS, referenceElementName));
return commonAncestors;
}
/**
* Returns a list of all attributes, whose name, value and namespace are equal for all elements
* in elements.
*/
public static Set<AttributeNamespaceValue> getCommonAttributes(Set<Element> elements)
{
// generate attribute-value set for each element:
Set<AttributeNamespaceValue> commonAttributeValueSet = null;
for (Element element : elements) {
Set<AttributeNamespaceValue> elementAttributeValueSet = new HashSet<AttributeNamespaceValue>();
NamedNodeMap attributes = element.getAttributes();
for (int i = 0; i < attributes.getLength(); ++i) {
Attr attribute = (Attr)attributes.item(i);
if (!attribute.getLocalName().equals("xmlns") && !(attribute.getNamespaceURI() != null && attribute.getNamespaceURI().equals("http://www.w3.org/2000/xmlns/"))) {
AttributeNamespaceValue attributeNSValue = new AttributeNamespaceValue();
attributeNSValue.setName(attribute.getLocalName());
attributeNSValue.setNamespace(attribute.getNamespaceURI());
attributeNSValue.setValue(attribute.getValue());
// add vector to set
elementAttributeValueSet.add(attributeNSValue);
}
}
if (commonAttributeValueSet == null) {
commonAttributeValueSet = elementAttributeValueSet;
} else {
// perform intersection of common attributes and attributes of current element
commonAttributeValueSet.retainAll(elementAttributeValueSet);
}
// break if intersection is empty
if (commonAttributeValueSet.isEmpty()) {
return commonAttributeValueSet;
}
}
return commonAttributeValueSet;
}
/**
* Returns a set of the direct ancestors of each element in elements.
* If one element does not have an ancestor, the whole set will be empty.
*/
public static Set<Element> getDirectAncestors(Set<Element> elements) {
Set<Element> directAncestors = new HashSet<Element>();
for (Element element : elements) {
if (element.getParentNode() != null && element.getParentNode() instanceof Element) {
directAncestors.add((Element)element.getParentNode());
} else {
return new HashSet<Element>();
}
}
return directAncestors;
}
/**
* Returns the XPath to retrieve targetElement from rootElement. rootElement may be null, in this case the XPath starts with and includes
* the farthest non-null ancestor of targetElement. If rootElement == targetElement, an empty string
* is returned.
* @param includeElementIndex Indicates if the element indices in the form elementName[n] should
* be included in the XPath.
* @param namespacesMap Maps namespace ids to namespace URIs.
*/
public static String getXPath(Element rootElement, Element targetElement, boolean includeElementIndex, Map<String,String> namespacesMap) {
Stack<Element> elementPath = new Stack<Element>();
// since we need the mapping the other way round, we invert the map
Map<String,String> namespaceUriToIdMap = new HashMap<String, String>();
for (Entry<String, String> entry : namespacesMap.entrySet()) {
namespaceUriToIdMap.put(entry.getValue(), entry.getKey());
}
// recursively find all ancestors of targetElement (up to, not including, rootElement)
{
Element currentElement = targetElement;
while (currentElement != null && currentElement != rootElement) {
elementPath.push(currentElement);
Node parent = currentElement.getParentNode();
if (parent instanceof Element) {
currentElement = (Element)currentElement.getParentNode();
} else {
currentElement = null;
}
}
}
// construct XPath
StringBuilder builder = new StringBuilder();
while (!elementPath.isEmpty()) {
Element currentElement = elementPath.pop();
if (builder.length() > 0) {
// don't include "/" at the beginning
builder.append("/");
}
if (namespacesMap != null) {
String namespace = currentElement.getNamespaceURI();
if (namespace != null) {
namespace = namespaceUriToIdMap.get(namespace);
builder.append(namespace);
builder.append(":");
}
}
builder.append(currentElement.getLocalName());
if (includeElementIndex) {
int index = getElementIndex(currentElement);
builder.append("[");
builder.append(index);
builder.append("]");
}
}
return builder.toString();
}
/**
* Returns the index of element in the list of all elements with the same name in its parent node.
* If element's parent node is null, this function returns 0.
*/
public static int getElementIndex(Element element) {
int index = 1;
Node sibling = element;
while ((sibling = sibling.getPreviousSibling()) != null) {
if (sibling instanceof Element) {
Element siblingElement = (Element) sibling;
// check if element names and element namespaces match
if (element.getLocalName().equals(siblingElement.getLocalName())
&& (element.getNamespaceURI() == null?siblingElement.getNamespaceURI()==null:element.getNamespaceURI().equals(siblingElement.getNamespaceURI()))) {
++index;
}
}
}
return index;
}
public static String nodeListToString(NodeList nodeList) throws TransformerException {
StringWriter stringWriter = new StringWriter();
for (int i = 0; i < nodeList.getLength(); ++i) {
Node node = nodeList.item(i);
if (node instanceof Element) {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.transform(new DOMSource(node), new StreamResult(stringWriter));
} else {
stringWriter.append(node.getTextContent());
}
}
return stringWriter.toString();
}
}