DocumentUtils.java example

Explorer
visitmeta-master
/*
 * #%L
 * =====================================================
 *   _____                _     ____  _   _       _   _
 *  |_   _|_ __ _   _ ___| |_  / __ \| | | | ___ | | | |
 *    | | | '__| | | / __| __|/ / _` | |_| |/ __|| |_| |
 *    | | | |  | |_| \__ \ |_| | (_| |  _  |\__ \|  _  |
 *    |_| |_|   \__,_|___/\__|\ \__,_|_| |_||___/|_| |_|
 *                             \____/
 * 
 * =====================================================
 * 
 * Hochschule Hannover
 * (University of Applied Sciences and Arts, Hannover)
 * Faculty IV, Dept. of Computer Science
 * Ricklinger Stadtweg 118, 30459 Hannover, Germany
 * 
 * Email: trust@f4-i.fh-hannover.de
 * Website: http://trust.f4.hs-hannover.de/
 * 
 * This file is part of visitmeta-common, version 0.6.0,
 * implemented by the Trust@HsH research group at the Hochschule Hannover.
 * %%
 * Copyright (C) 2012 - 2016 Trust@HsH
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package de.hshannover.f4.trust.visitmeta.util;

import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * Utility class for W3C {@link Document} objects.
 *
 * @author Bastian Hellmann
 *
 */
public class DocumentUtils {

	/**
	 * Private constructor, as the the class only contains static methods.
	 */
	private DocumentUtils() {
	}

	private static final Logger LOGGER = Logger.getLogger(DocumentUtils.class);

	private static final String[] NAME_TYPE_VALUE_ARRAY = new String[] {
			"name", "value", "type"};
	public static final Set<String> NAME_TYPE_VALUE = new HashSet<>(
			Arrays.asList(NAME_TYPE_VALUE_ARRAY));

	private static DocumentBuilderFactory mFactory;
	private static DocumentBuilder mBuilder;

	static {
		mFactory = DocumentBuilderFactory.newInstance();
		try {
			mBuilder = mFactory.newDocumentBuilder();
		} catch (ParserConfigurationException e) {
			LOGGER.error(e.getMessage());
		}
	}

	/**
	 * Takes escaped XML as a {@link String} and replaces any escaped character
	 * with its non-escaped version.
	 *
	 * <ul>
	 * <li>&amp; -> &
	 * <li>&lt; -> <
	 * <li>&gt; -> >
	 * <li>&quot; -> "
	 * <li>&apos; -> '
	 * </ul>
	 *
	 * @param input
	 * @return
	 */
	public static String deEscapeXml(String input) {
		String ret = input;

		String[] unwanted = {"&", "<", ">", """, "'"};
		String[] replaceBy = {"&", "<", ">", "\"", "'"};

		for (int i = 0; i < unwanted.length; i++) {
			ret = ret.replace(unwanted[i], replaceBy[i]);
		}

		return ret;
	}

	/**
	 * Parses a {@link String} containing a XML document to a {@link Document}.
	 *
	 * @param input
	 *            XML document as one {@link String}
	 * @return a {@link Document}
	 */
	public static synchronized Document parseXmlString(String input) {
		Document document = null;
		try {
			document = mBuilder.parse(new InputSource(new StringReader(input)));
		} catch (SAXException | IOException e) {
			LOGGER.error(e.getMessage());
		}

		return document;
	}

	/**
	 * Convenience method; takes an escaped XML document as input and returns a
	 * {@link Document} object.
	 *
	 * @param input
	 *            escaped XML as one {@link String}
	 * @return a {@link Document}
	 */
	public static Document parseEscapedXmlString(String input) {
		String deEscapedXml = deEscapeXml(input);
		return parseXmlString(deEscapedXml);
	}

	private static String extractSingleInformation(Document document, String key) {
		Element documentElement = document.getDocumentElement();

		NodeList childNodes = document.getChildNodes();
		String result = null;
		for (int i = 0; i < childNodes.getLength(); i++) {
			Node item = childNodes.item(i);

			if (item.getNodeName().equals(key)) {
				result = item.getNodeValue();
			}
		}

		if (result == null
				|| result.equals("")) {
			result = documentElement.getAttribute(key);
		}

		return result;
	}

	/**
	 * Tries to extract information of a given {@link Document} object, by
	 * looking for child elements and attributes at the root-node of the
	 * document.
	 *
	 * @param document
	 *            a {@link Document} object
	 * @param keys
	 *            a {@link Set} of keys that are looked up in the
	 *            {@link Document}
	 * @return a {@link Map} containing all keys and their found values
	 */
	public static Map<String, String> extractInformation(Document document,
			Set<String> keys) {
		HashMap<String, String> result = new HashMap<>();

		String tmpValue;
		for (String key : keys) {
			tmpValue = extractSingleInformation(document, key);
			if (tmpValue != null
					&& !tmpValue.isEmpty()) {
				result.put(key, tmpValue);
			}
		}

		return result;
	}
}