/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.administer; import java.io.File; import java.io.IOException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import org.apache.xpath.XPathAPI; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * @author Richard Jones * * This class provides the tools that registry importers might need to * use. Basically some utility methods. And actually, although it says * I am the author, really I ripped these methods off from other * classes */ public class RegistryImporter { /** * Load in the XML from file. * * @param filename * the filename to load from * * @return the DOM representation of the XML file * @throws IOException if IO error * @throws ParserConfigurationException if configuration parse error * @throws SAXException if XML parse error */ public static Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { DocumentBuilder builder = DocumentBuilderFactory.newInstance() .newDocumentBuilder(); Document document = builder.parse(new File(filename)); return document; } /** * Get the CDATA of a particular element. For example, if the XML document * contains: * <P> * <code> * <foo><mimetype>application/pdf</mimetype></foo> * </code> * passing this the <code>foo</code> node and <code>mimetype</code> will * return <code>application/pdf</code>. * </P> * Why this isn't a core part of the XML API I do not know... * * @param parentElement * the element, whose child element you want the CDATA from * @param childName * the name of the element you want the CDATA from * @throws TransformerException if error * @return the CDATA as a <code>String</code> */ public static String getElementData(Node parentElement, String childName) throws TransformerException { // Grab the child node Node childNode = XPathAPI.selectSingleNode(parentElement, childName); if (childNode == null) { // No child node, so no values return null; } // Get the #text Node dataNode = childNode.getFirstChild(); if (dataNode == null) { return null; } // Get the data String value = dataNode.getNodeValue().trim(); return value; } /** * Get repeated CDATA for a particular element. For example, if the XML * document contains: * <P> * <code> * <foo> * <bar>val1</bar> * <bar>val2</bar> * </foo> * </code> * passing this the <code>foo</code> node and <code>bar</code> will * return <code>val1</code> and <code>val2</code>. * </P> * Why this also isn't a core part of the XML API I do not know... * * @param parentElement * the element, whose child element you want the CDATA from * @param childName * the name of the element you want the CDATA from * @throws TransformerException if error * @return the CDATA as a <code>String</code> */ public static String[] getRepeatedElementData(Node parentElement, String childName) throws TransformerException { // Grab the child node NodeList childNodes = XPathAPI.selectNodeList(parentElement, childName); String[] data = new String[childNodes.getLength()]; for (int i = 0; i < childNodes.getLength(); i++) { // Get the #text node Node dataNode = childNodes.item(i).getFirstChild(); // Get the data data[i] = dataNode.getNodeValue().trim(); } return data; } }