XMLTools.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.io.process;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Level;

import javax.xml.XMLConstants;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.rapidminer.tools.LogService;
import com.rapidminer.tools.XMLException;


/**
 * This class offers several convenience methods for treating XML documents-
 *
 * @author Sebastian Land, Simon Fischer
 */
public class XMLTools {

	private static final Map<URI, Validator> VALIDATORS = new HashMap<URI, Validator>();

	private final static DocumentBuilderFactory BUILDER_FACTORY;

	public static final String SCHEMA_URL_PROCESS = "http://www.rapidminer.com/xml/schema/RapidMinerProcess";

	static {
		DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
		domFactory.setNamespaceAware(true);
		BUILDER_FACTORY = domFactory;
	}

	/**
	 * Creates a new {@link DocumentBuilder} instance.
	 *
	 * Needed because DocumentBuilder is not thread-safe and crashes when different threads try to
	 * parse at the same time.
	 *
	 * @return
	 * @throws IOException
	 *             if it fails to create a {@link DocumentBuilder}
	 */
	private static DocumentBuilder createDocumentBuilder() throws IOException {
		try {
			synchronized (BUILDER_FACTORY) {
				return BUILDER_FACTORY.newDocumentBuilder();
			}
		} catch (ParserConfigurationException e) {
			LogService.getRoot().log(Level.WARNING, "Unable to create document builder", e);
			throw new IOException(e);
		}
	}

	private static Validator getValidator(URI schemaURI) throws XMLException {
		if (schemaURI == null) {
			throw new NullPointerException("SchemaURL is null!");
		}
		synchronized (VALIDATORS) {
			if (VALIDATORS.containsKey(schemaURI)) {
				return VALIDATORS.get(schemaURI);
			} else {
				SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
				Validator validator;
				try {
					validator = factory.newSchema(schemaURI.toURL()).newValidator();
				} catch (SAXException e) {
					throw new XMLException("Cannot parse XML schema: " + e.getMessage(), e);
				} catch (MalformedURLException e) {
					throw new XMLException("Cannot parse XML schema: " + e.getMessage(), e);
				}
				VALIDATORS.put(schemaURI, validator);
				return validator;
			}
		}
	}

	/**
	 * This method should not be called since it is slower than
	 * {@link #parseAndValidate(InputStream, URI, String)}
	 */
	public static Document parseAndValidate(InputStream in, URL schemaURL, String sourceName) throws XMLException,
			IOException {
		try {
			return parseAndValidate(in, new URI(schemaURL.toString()), sourceName);
		} catch (URISyntaxException e) {
			throw new XMLException("Could not resolve URL.", e);
		}
	}

	/**
	 * The schema URL might be given as URI for performance reasons.
	 */
	public static Document parseAndValidate(InputStream in, URI schemaURL, String sourceName) throws XMLException,
			IOException {
		XMLErrorHandler errorHandler = new XMLErrorHandler(sourceName);

		Document doc;
		try {
			doc = createDocumentBuilder().parse(in);
		} catch (SAXException e) {
			throw new XMLException(errorHandler.toString(), e);
		}

		Source source = new DOMSource(doc);
		DOMResult result = new DOMResult();
		Validator validator = getValidator(schemaURL);
		validator.setErrorHandler(errorHandler);
		try {
			validator.validate(source, result);
		} catch (SAXException e) {
			throw new XMLException(errorHandler.toString(), e);
		}
		if (errorHandler.hasErrors()) {
			throw new XMLException(errorHandler.toString());
		}
		return (Document) result.getNode();
	}

	public static Document parse(String string) throws SAXException, IOException {
		return createDocumentBuilder().parse(new ByteArrayInputStream(string.getBytes(Charset.forName("UTF-8"))));
		// new ReaderInputStream(new StringReader(string)));
	}

	public static Document parse(InputStream in) throws SAXException, IOException {
		return createDocumentBuilder().parse(in);
	}

	public static Document parse(File file) throws SAXException, IOException {
		return createDocumentBuilder().parse(file);
	}

	public static String toString(Document document) throws XMLException {
		ByteArrayOutputStream buf = new ByteArrayOutputStream();
		Charset utf8 = Charset.forName("UTF-8");
		stream(document, buf, utf8);
		return new String(buf.toByteArray(), utf8);
	}

	/**
	 * @param document
	 * @param encoding
	 * @return
	 * @throws XMLException
	 * @deprecated use {@link #toString(Document)} instead
	 */
	@Deprecated
	public static String toString(Document document, Charset encoding) throws XMLException {
		ByteArrayOutputStream buf = new ByteArrayOutputStream();
		stream(document, buf, encoding);
		return new String(buf.toByteArray(), encoding);
	}

	public static void stream(Document document, File file, Charset encoding) throws XMLException {

		OutputStream out = null;
		try {
			out = new FileOutputStream(file);
			stream(document, out, encoding);
		} catch (IOException e) {
			throw new XMLException("Cannot save XML to " + file + ": " + e, e);
		} finally {
			if (out != null) {
				try {
					out.close();
				} catch (IOException e) {
				}
			}
		}
	}

	public static void stream(Document document, OutputStream out, Charset encoding) throws XMLException {
		stream(new DOMSource(document), out, encoding);
	}

	public static void stream(DOMSource source, OutputStream out, Charset encoding) throws XMLException {
		// we wrap this in a Writer to fix a Java bug
		// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6296446
		if (encoding == null) {
			encoding = Charset.forName("UTF-8");
		}
		stream(source, new StreamResult(new OutputStreamWriter(out, encoding)), encoding);
	}

	public static void stream(Document document, Result result, Charset encoding) throws XMLException {
		stream(new DOMSource(document), result, encoding);
	}

	public static void stream(DOMSource source, Result result, Charset encoding) throws XMLException {
		stream(source, result, encoding, null);
	}

	public static void stream(DOMSource source, Result result, Charset encoding, Properties outputProperties)
			throws XMLException {
		Transformer transformer;
		try {
			TransformerFactory tf = TransformerFactory.newInstance();
			try {
				tf.setAttribute("indent-number", Integer.valueOf(2));
			} catch (IllegalArgumentException e) {
				// ignore, may not be supported by implementation
			}
			transformer = tf.newTransformer();
			transformer.setOutputProperty(OutputKeys.INDENT, "yes");
			try {
				transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
			} catch (IllegalArgumentException e) {
				// ignore, may not be supported by implementation
			}
			if (outputProperties != null) {
				transformer.setOutputProperties(outputProperties);
			}

			if (encoding != null) {
				transformer.setOutputProperty(OutputKeys.ENCODING, encoding.name());
			}
		} catch (TransformerConfigurationException e) {
			throw new XMLException("Cannot transform XML: " + e, e);
		} catch (TransformerFactoryConfigurationError e) {
			throw new XMLException("Cannot transform XML: " + e, e);
		}
		try {
			transformer.transform(source, result);
		} catch (TransformerException e) {
			throw new XMLException("Cannot transform XML: " + e, e);
		}
	}

	/**
	 * As {@link #getTagContents(Element, String, boolean)}, but never throws an exception. Returns
	 * null if can't retrieve string.
	 */
	public static String getTagContents(Element element, String tag) {
		try {
			return getTagContents(element, tag, false);
		} catch (XMLException e) {
			// cannot happen
			return null;
		}
	}

	public static String getTagContents(Element element, String tag, String deflt) {
		String result = getTagContents(element, tag);
		if (result == null) {
			return deflt;
		} else {
			return result;
		}
	}

	/**
	 * For a tag <parent> <tagName>content</tagName> <something>else</something> ... </parent>
	 *
	 * returns "content". This will return the content of the first occurring child element with
	 * name tagName. If no such tag exists and {@link XMLException} is thrown if
	 * throwExceptionOnError is true. Otherwise null is returned.
	 */
	public static String getTagContents(Element parent, String tagName, boolean throwExceptionOnError) throws XMLException {
		NodeList nodeList = parent.getChildNodes();
		for (int i = 0; i < nodeList.getLength(); i++) {
			Node node = nodeList.item(i);
			if (node instanceof Element && ((Element) node).getTagName().equals(tagName)) {
				Element child = (Element) node;
				return child.getTextContent();
			}
		}
		if (throwExceptionOnError) {
			throw new XMLException("Missing tag: <" + tagName + "> in <" + parent.getTagName() + ">.");
		} else {
			return null;
		}
	}

	/**
	 * This will parse the text contents of an child element of element parent with the given
	 * tagName as integer. If no such child element can be found an XMLException is thrown. If more
	 * than one exists, the first is used. A {@link XMLException} is thrown if the text content is
	 * not a valid integer.
	 */
	public static int getTagContentsAsInt(Element element, String tag) throws XMLException {
		final String string = getTagContents(element, tag, true);
		try {
			return Integer.parseInt(string);
		} catch (NumberFormatException e) {
			throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
		}
	}

	/**
	 * This will parse the text contents of an child element of element parent with the given
	 * tagName as integer. If no such child element can be found, the given default value is
	 * returned. If more than one exists, the first is used. A {@link XMLException} is thrown if the
	 * text content is not a valid integer.
	 */
	public static int getTagContentsAsInt(Element element, String tag, int dfltValue) throws XMLException {
		final String string = getTagContents(element, tag, false);
		if (string == null) {
			return dfltValue;
		}
		try {
			return Integer.parseInt(string);
		} catch (NumberFormatException e) {
			throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
		}
	}

	/**
	 * This will parse the text contents of an child element of element parent with the given
	 * tagName as long. If no such child element can be found an XMLException is thrown. If more
	 * than one exists, the first is used. A {@link XMLException} is thrown if the text content is
	 * not a valid long.
	 */
	public static long getTagContentsAsLong(Element element, String tag) throws XMLException {
		final String string = getTagContents(element, tag, true);
		try {
			return Long.parseLong(string);
		} catch (NumberFormatException e) {
			throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
		}
	}

	/**
	 * This will parse the text contents of an child element of element parent with the given
	 * tagName as long. If no such child element can be found, the given default value is returned.
	 * If more than one exists, the first is used. A {@link XMLException} is thrown if the text
	 * content is not a valid long.
	 */
	public static long getTagContentsAsLong(Element element, String tag, int dfltValue) throws XMLException {
		final String string = getTagContents(element, tag, false);
		if (string == null) {
			return dfltValue;
		}
		try {
			return Long.parseLong(string);
		} catch (NumberFormatException e) {
			throw new XMLException("Contents of tag <" + tag + "> must be integer, but found '" + string + "'.");
		}
	}

	/**
	 * This will parse the text contents of an child element of element parent with the given
	 * tagName as double. If no such child element can be found, the given default value is
	 * returned. If more than one exists, the first is used. A {@link XMLException} is thrown if the
	 * text content is not a valid integer.
	 */
	public static double getTagContentsAsDouble(Element element, String tag, double dfltValue) throws XMLException {
		final String string = getTagContents(element, tag, false);
		if (string == null) {
			return dfltValue;
		}
		try {
			return Double.parseDouble(string);
		} catch (NumberFormatException e) {
			throw new XMLException("Contents of tag <" + tag + "> must be double, but found '" + string + "'.");
		}
	}

	/**
	 * This will parse the text contents of an child element of element parent with the given
	 * tagName as boolean. If no such child element can be found the default is returned. If more
	 * than one exists, the first is used. A {@link NumberFormatException} is thrown if the text
	 * content is not a valid integer.
	 */
	public static boolean getTagContentsAsBoolean(Element parent, String tagName, boolean dflt) throws XMLException {
		String string = getTagContents(parent, tagName, false);
		if (string == null) {
			return dflt;
		}
		try {
			return Boolean.parseBoolean(string);
		} catch (NumberFormatException e) {
			throw new XMLException("Contents of tag <" + tagName + "> must be true or false, but found '" + string + "'.");
		}
	}

	/**
	 * If parent has a direct child with the given name, the child's children are removed and are
	 * replaced by a single text node with the given text. If no direct child of parent with the
	 * given tag name exists, a new one is created.
	 */
	public static void setTagContents(Element parent, String tagName, String value) {
		if (value == null) {
			value = "";
		}
		Element child = null;
		NodeList list = parent.getChildNodes();
		for (int i = 0; i < list.getLength(); i++) {
			Node node = list.item(i);
			if (node instanceof Element) {
				if (((Element) node).getTagName().equals(tagName)) {
					child = (Element) node;
					break;
				}
			}
		}
		if (child == null) {
			child = parent.getOwnerDocument().createElement(tagName);
			parent.appendChild(child);
		} else {
			while (child.hasChildNodes()) {
				child.removeChild(child.getFirstChild());
			}
		}
		child.appendChild(parent.getOwnerDocument().createTextNode(value));
	}

	/**
	 * This method removes all child elements with the given name of the given element.
	 */
	public static void deleteTagContents(Element parentElement, String name) {
		NodeList children = parentElement.getElementsByTagName(name);
		for (int i = children.getLength() - 1; i >= 0; i--) {
			Element child = (Element) children.item(i);
			parentElement.removeChild(child);
		}
	}

	public static XMLGregorianCalendar getXMLGregorianCalendar(Date date) {
		if (date == null) {
			return null;
		}
		// Calendar calendar = Calendar.getInstance();
		// calendar.setTimeInMillis(date.getTime());
		DatatypeFactory datatypeFactory;
		try {
			datatypeFactory = DatatypeFactory.newInstance();
		} catch (DatatypeConfigurationException e) {
			throw new RuntimeException("Failed to create XMLGregorianCalendar: " + e, e);
		}
		GregorianCalendar c = new GregorianCalendar();
		c.setTime(date);
		return datatypeFactory.newXMLGregorianCalendar(c);
		//
		// XMLGregorianCalendar xmlGregorianCalendar = datatypeFactory.newXMLGregorianCalendar();
		// xmlGregorianCalendar.setYear(calendar.get(Calendar.YEAR));
		// xmlGregorianCalendar.setMonth(calendar.get(Calendar.MONTH) + 1);
		// xmlGregorianCalendar.setDay(calendar.get(Calendar.DAY_OF_MONTH));
		// xmlGregorianCalendar.setHour(calendar.get(Calendar.HOUR_OF_DAY));
		// xmlGregorianCalendar.setMinute(calendar.get(Calendar.MINUTE));
		// xmlGregorianCalendar.setSecond(calendar.get(Calendar.SECOND));
		// xmlGregorianCalendar.setMillisecond(calendar.get(Calendar.MILLISECOND));
		// //
		// xmlGregorianCalendar.setTimezone(calendar.get(((Calendar.DST_OFFSET)+calendar.get(Calendar.ZONE_OFFSET))/(60*1000)));
		// return xmlGregorianCalendar;
	}

	/**
	 * This will return the inner tag of the given element with the given tagName. If no such
	 * element can be found, or if there are more than one, an {@link XMLException} is thrown.
	 */
	public static Element getUniqueInnerTag(Element element, String tagName) throws XMLException {
		return getUniqueInnerTag(element, tagName, true);
	}

	/**
	 * This method will return null if the element doesn't exist if obligatory is false. Otherwise
	 * an exception is thrown. If the element is not unique, an exception is thrown in any cases.
	 */
	public static Element getUniqueInnerTag(Element element, String tagName, boolean obligatory) throws XMLException {
		NodeList children = element.getChildNodes();
		Collection<Element> elements = new ArrayList<Element>();
		for (int i = 0; i < children.getLength(); i++) {
			if (children.item(i) instanceof Element) {
				Element child = (Element) children.item(i);
				if (tagName.equals(child.getTagName())) {
					elements.add(child);
				}
			}
		}
		switch (elements.size()) {
			case 0:
				if (obligatory) {
					throw new XMLException("Missing inner tag <" + tagName + "> inside <" + element.getTagName() + ">.");
				} else {
					return null;
				}
			case 1:
				return elements.iterator().next();
			default:
				throw new XMLException("Inner tag <" + tagName + "> inside <" + element.getTagName()
						+ "> must be unique, but found " + children.getLength() + ".");
		}

	}

	/**
	 * This method will return a Collection of all Elements that are direct child elements of the
	 * given element and have the given tag name.
	 */
	public static Collection<Element> getChildElements(Element father, String tagName) {
		LinkedList<Element> elements = new LinkedList<Element>();
		NodeList list = father.getChildNodes();
		for (int i = 0; i < list.getLength(); i++) {
			Node node = list.item(i);
			if (node instanceof Element) {
				if (node.getNodeName().equals(tagName)) {
					elements.add((Element) node);
				}
			}
		}
		return elements;
	}

	/**
	 * This method will return a Collection of all Elements that are direct child elements of the
	 * given element.
	 */
	public static Collection<Element> getChildElements(Element father) {
		LinkedList<Element> elements = new LinkedList<Element>();
		NodeList list = father.getChildNodes();
		for (int i = 0; i < list.getLength(); i++) {
			Node node = list.item(i);
			if (node instanceof Element) {
				elements.add((Element) node);
			}
		}
		return elements;
	}

	/**
	 * This method will return the single inner child with the given name of the given father
	 * element. If obligatory is true, an Exception is thrown if the element is not present. If it's
	 * ambiguous, an execption is thrown in any case.
	 */
	public static Element getChildElement(Element father, String tagName, boolean mandatory) throws XMLException {
		Collection<Element> children = getChildElements(father, tagName);
		switch (children.size()) {
			case 0:
				if (mandatory) {
					throw new XMLException("Missing child tag <" + tagName + "> inside <" + father.getTagName() + ">.");
				} else {
					return null;
				}
			case 1:
				return children.iterator().next();
			default:
				throw new XMLException("Child tag <" + tagName + "> inside <" + father.getTagName()
						+ "> must be unique, but found " + children.size() + ".");
		}

	}

	/**
	 * This is the same as {@link #getChildElement(Element, String, boolean)}, but its always
	 * obligatory to have the child element.
	 *
	 * @throws XMLException
	 */
	public static Element getUniqueChildElement(Element father, String tagName) throws XMLException {
		return getChildElement(father, tagName, true);
	}

	/**
	 * This adds a single tag with the given content to the given parent element. The new tag is
	 * automatically appended.
	 */
	public static void addTag(Element parent, String name, String textValue) {
		Element child = parent.getOwnerDocument().createElement(name);
		child.setTextContent(textValue);
		parent.appendChild(child);
	}

	/**
	 * Creates a new, empty document.
	 */
	public static Document createDocument() {
		try {
			DocumentBuilder builder = createDocumentBuilder();
			return builder.newDocument();
		} catch (IOException e) {
			return null;
		}
	}

	/**
	 * This will add an empty new tag to the given fatherElement with the given name.
	 */
	public static Element addTag(Element fatherElement, String tagName) {
		Element createElement = fatherElement.getOwnerDocument().createElement(tagName);
		fatherElement.appendChild(createElement);
		return createElement;
	}

	/**
	 * Returns the unique child of the given element with the given tag name. This child tag must be
	 * unique, or an exception will be raised. If optional is false and the tag is missing, this
	 * method also raises an exception. Otherwise it returns null.
	 */
	public static Element getChildTag(Element element, String xmlTagName, boolean optional) throws XMLException {
		NodeList children = element.getChildNodes();
		Element found = null;
		for (int i = 0; i < children.getLength(); i++) {
			Node n = children.item(i);
			if (n instanceof Element) {
				if (((Element) n).getTagName().equals(xmlTagName)) {
					if (found != null) {
						throw new XMLException("Tag <" + xmlTagName + "> in <" + element.getTagName() + "> must be unique.");
					} else {
						found = (Element) n;
					}
				}
			}
		}
		if (!optional && found == null) {
			throw new XMLException("Tag <" + xmlTagName + "> in <" + element.getTagName() + "> is missing.");
		} else {
			return found;
		}
	}

	/**
	 * Returns the contents of the inner tags with the given name as String array.
	 */
	public static String[] getChildTagsContentAsStringArray(Element father, String childElementName) {
		Collection<Element> valueElements = XMLTools.getChildElements(father, childElementName);
		String[] values = new String[valueElements.size()];
		int i = 0;
		for (Element valueElement : valueElements) {
			values[i] = valueElement.getTextContent();
			i++;
		}

		return values;
	}

	/**
	 * Returns the contents of the inner tags with the given name as int array.
	 *
	 * @throws XMLException
	 */
	public static int[] getChildTagsContentAsIntArray(Element father, String childElementName) throws XMLException {
		Collection<Element> valueElements = XMLTools.getChildElements(father, childElementName);
		int[] values = new int[valueElements.size()];
		int i = 0;
		for (Element valueElement : valueElements) {
			try {
				values[i] = Integer.valueOf(valueElement.getTextContent().trim());
			} catch (NumberFormatException e) {
				throw new XMLException("Invalid format for element content of type " + childElementName, e);
			}
			i++;
		}

		return values;
	}

	/**
	 * This method will get a XPath expression matching all elements given. This works by following
	 * this algorithm: 1. Check whether the last element is of same type Yes: if paths of elements
	 * are of same structure, keep it, but remove counters where necessary if not,
	 */
	public static String getXPath(Document document, Element... elements) {
		Map<String, List<Element>> elementTypeElementsMap = new HashMap<String, List<Element>>();
		for (Element element : elements) {
			List<Element> typeElements = elementTypeElementsMap.get(element.getTagName());
			if (typeElements == null) {
				typeElements = new LinkedList<Element>();
				elementTypeElementsMap.put(element.getTagName(), typeElements);
			}
			typeElements.add(element);
		}

		// for each single type of element build single longest common path of all elements

		Element[] parentElements = new Element[elements.length];

		for (int i = 0; i < elements.length; i++) {
			parentElements[i] = (Element) elements[i].getParentNode();
		}

		return "";
	}
}