/* * #%L * ===================================================== * _____ _ ____ _ _ _ _ * |_ _|_ __ _ _ ___| |_ / __ \| | | | ___ | | | | * | | | '__| | | / __| __|/ / _` | |_| |/ __|| |_| | * | | | | | |_| \__ \ |_| | (_| | _ |\__ \| _ | * |_| |_| \__,_|___/\__|\ \__,_|_| |_||___/|_| |_| * \____/ * * ===================================================== * * Hochschule Hannover * (University of Applied Sciences and Arts, Hannover) * Faculty IV, Dept. of Computer Science * Ricklinger Stadtweg 118, 30459 Hannover, Germany * * Email: trust@f4-i.fh-hannover.de * Website: http://trust.f4.hs-hannover.de/ * * This file is part of visitmeta-common, version 0.6.0, * implemented by the Trust@HsH research group at the Hochschule Hannover. * %% * Copyright (C) 2012 - 2016 Trust@HsH * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ package de.hshannover.f4.trust.visitmeta.util; import java.io.IOException; import java.io.StringReader; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.log4j.Logger; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * Utility class for W3C {@link Document} objects. * * @author Bastian Hellmann * */ public class DocumentUtils { /** * Private constructor, as the the class only contains static methods. */ private DocumentUtils() { } private static final Logger LOGGER = Logger.getLogger(DocumentUtils.class); private static final String[] NAME_TYPE_VALUE_ARRAY = new String[] { "name", "value", "type"}; public static final Set<String> NAME_TYPE_VALUE = new HashSet<>( Arrays.asList(NAME_TYPE_VALUE_ARRAY)); private static DocumentBuilderFactory mFactory; private static DocumentBuilder mBuilder; static { mFactory = DocumentBuilderFactory.newInstance(); try { mBuilder = mFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { LOGGER.error(e.getMessage()); } } /** * Takes escaped XML as a {@link String} and replaces any escaped character * with its non-escaped version. * * <ul> * <li>&amp; -> & * <li>&lt; -> < * <li>&gt; -> > * <li>&quot; -> " * <li>&apos; -> ' * </ul> * * @param input * @return */ public static String deEscapeXml(String input) { String ret = input; String[] unwanted = {"&", "<", ">", """, "'"}; String[] replaceBy = {"&", "<", ">", "\"", "'"}; for (int i = 0; i < unwanted.length; i++) { ret = ret.replace(unwanted[i], replaceBy[i]); } return ret; } /** * Parses a {@link String} containing a XML document to a {@link Document}. * * @param input * XML document as one {@link String} * @return a {@link Document} */ public static synchronized Document parseXmlString(String input) { Document document = null; try { document = mBuilder.parse(new InputSource(new StringReader(input))); } catch (SAXException | IOException e) { LOGGER.error(e.getMessage()); } return document; } /** * Convenience method; takes an escaped XML document as input and returns a * {@link Document} object. * * @param input * escaped XML as one {@link String} * @return a {@link Document} */ public static Document parseEscapedXmlString(String input) { String deEscapedXml = deEscapeXml(input); return parseXmlString(deEscapedXml); } private static String extractSingleInformation(Document document, String key) { Element documentElement = document.getDocumentElement(); NodeList childNodes = document.getChildNodes(); String result = null; for (int i = 0; i < childNodes.getLength(); i++) { Node item = childNodes.item(i); if (item.getNodeName().equals(key)) { result = item.getNodeValue(); } } if (result == null || result.equals("")) { result = documentElement.getAttribute(key); } return result; } /** * Tries to extract information of a given {@link Document} object, by * looking for child elements and attributes at the root-node of the * document. * * @param document * a {@link Document} object * @param keys * a {@link Set} of keys that are looked up in the * {@link Document} * @return a {@link Map} containing all keys and their found values */ public static Map<String, String> extractInformation(Document document, Set<String> keys) { HashMap<String, String> result = new HashMap<>(); String tmpValue; for (String key : keys) { tmpValue = extractSingleInformation(document, key); if (tmpValue != null && !tmpValue.isEmpty()) { result.put(key, tmpValue); } } return result; } }