/* * Copyright (c) 2012 Data Harmonisation Panel * * All rights reserved. This program and the accompanying materials are made * available under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of the License, * or (at your option) any later version. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * HUMBOLDT EU Integrated Project #030962 * Data Harmonisation Panel <http://www.dhpanel.eu> */ package eu.esdihumboldt.hale.common.core.io.project.util; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.StringReader; import java.net.URI; import java.util.HashMap; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.google.common.io.ByteStreams; import de.fhg.igd.slf4jplus.ALogger; import de.fhg.igd.slf4jplus.ALoggerFactory; import eu.esdihumboldt.hale.common.core.io.report.IOReporter; import eu.esdihumboldt.hale.common.core.io.report.impl.IOMessageImpl; import eu.esdihumboldt.hale.common.core.io.supplier.DefaultInputSupplier; import eu.esdihumboldt.util.io.IOUtils; /** * Class for updating location values in XML files.<br> * Loads a XML file, searches for specified nodes with location values, resolves * the referenced resources, copies them next to the given XML file (or a * sub-directory) and adapts the location value in the XML file. This process is * done recursively. * * @author Patrick Lieb * @author Kai Schwierczek */ public class XMLPathUpdater { private static final ALogger log = ALoggerFactory.getLogger(XMLPathUpdater.class); /** * Updates the specified resource file.<br> * <br> * The specified file <code>xmlResource</code>, which was originally placed * at <code>oldFile</code> gets updated. All node values found by the XPath * expression <code>locationXPath</code> get copied (and the node values are * updated accordingly) to a new relative path (except for web resources if * <code>includeWebResources</code> is false). The copied files are then * checked the same way.<br> * Resources (identified by their absolute URI) will be copied only once. * Note however, that this is only true for a single call of this method. So * if this method is called multiple times for the same file and some * referenced resources are present in both calls, they are copied multiple * times. You should select your XPath expression accordingly. <br> * Example:<br> * resource file is 'C:/Local/Temp/1348138164029-0/watercourse/wfs_va.xsd' * <br> * oldFile is 'C:/igd/hale/watercourse/wfs_va.xsd'.<br> * wfs_va.xsd has one schema import with location * 'C:/igd/hale/watercourse/schemas/feature.xsd'<br> * So feature.xsd is copied into * 'C:/Local/Temp/1348138164029-0/watercourse/' (or a sub-directory) and the * import location in wfs_va.xsd will be adapted. * * @param xmlResource the XML resource file that gets updated * @param oldPath its original location, may be <code>null</code> in case it * didn't exist before * @param locationXPath a XPath expression to find nodes that should be * processed * @param includeWebResources whether web resources should be copied and * updates, too * @param reporter the reporter of the current IO process where errors * should be reported to * @throws IOException if an IO exception occurs */ public static void update(File xmlResource, URI oldPath, String locationXPath, boolean includeWebResources, IOReporter reporter) throws IOException { update(xmlResource, oldPath, locationXPath, includeWebResources, reporter, new HashMap<URI, File>()); } /** * Actual implementation of the update method. * * @param xmlResource the XML resource file that gets updated * @param oldPath its original location * @param locationXPath a XPath expression to find nodes that should be * processed * @param includeWebResources whether web resources should be copied and * updates, too * @param reporter the reporter of the current IO process where errors * should be reported to * @param updates a map of already copied files which is used and gets * filled by this method. Needed for multiple updates on the same * file. * @throws IOException if an IO exception occurs */ private static void update(File xmlResource, URI oldPath, String locationXPath, boolean includeWebResources, IOReporter reporter, Map<URI, File> updates) throws IOException { // every XML resource should be updated (and copied) only once // so we save the currently adapted resource in a map updates.put(oldPath, xmlResource); // counter for the directory because every resource should have its own // directory int count = 0; DocumentBuilder builder = null; try { builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); } catch (ParserConfigurationException e) { throw new IOException("Can not create a DocumentBuilder", e); } builder.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { // FIXME some documentation would be nice why this is OK here?! return new InputSource(new StringReader("")); } }); Document doc = null; try { doc = builder.parse(xmlResource); } catch (SAXException e1) { // if the file is no XML file simply stop the recursion return; } // find schemaLocation of imports/includes via XPath XPath xpath = XPathFactory.newInstance().newXPath(); NodeList nodelist = null; try { nodelist = ((NodeList) xpath.evaluate(locationXPath, doc, XPathConstants.NODESET)); } catch (XPathExpressionException e) { throw new IOException("The XPath expression is wrong", e); } // iterate over all imports or includes and get the schemaLocations for (int i = 0; i < nodelist.getLength(); i++) { Node locationNode = nodelist.item(i); String location = locationNode.getNodeValue(); URI locationUri = null; try { locationUri = new URI(location); } catch (Exception e1) { reporter.error(new IOMessageImpl("The location is no valid URI.", e1)); continue; } if (!locationUri.isAbsolute()) { locationUri = oldPath.resolve(locationUri); } String scheme = locationUri.getScheme(); InputStream input = null; if (scheme != null) { // should the resource be included? if (includeWebResources || !(scheme.equals("http") || scheme.equals("https"))) { DefaultInputSupplier supplier = new DefaultInputSupplier(locationUri); input = supplier.getInput(); } else continue; } else { // file is invalid - at least report that reporter.error( new IOMessageImpl("Skipped resource because it cannot be loaded from " + locationUri.toString(), null)); continue; } // every file needs its own directory because of name conflicts String filename = location; if (location.contains("/")) filename = location.substring(location.lastIndexOf("/") + 1); filename = count + "/" + filename; File includednewFile = null; if (updates.containsKey(locationUri)) { // if the current XML schema is already updated we have to // find the relative path to this resource URI relative = IOUtils.getRelativePath(updates.get(locationUri).toURI(), xmlResource.toURI()); locationNode.setNodeValue(relative.toString()); } else if (input != null) { // we need the directory of the file File xmlResourceDir = xmlResource.getParentFile(); // path where the file should be copied to includednewFile = new File(xmlResourceDir, filename); try { includednewFile.getParentFile().mkdirs(); } catch (SecurityException e) { throw new IOException( "Can not create directories " + includednewFile.getParent(), e); } // copy to new directory OutputStream output = new FileOutputStream(includednewFile); ByteStreams.copy(input, output); output.close(); input.close(); // set new location in the XML resource locationNode.setNodeValue(filename); update(includednewFile, locationUri, locationXPath, includeWebResources, reporter, updates); count++; } // write new XML-File TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = null; try { transformer = transformerFactory.newTransformer(); } catch (TransformerConfigurationException e) { log.debug("Can not create transformer for creating XMl file", e); return; } DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(xmlResource); try { transformer.transform(source, result); } catch (TransformerException e) { log.debug("Cannot create new XML file", e); return; } } } }