/*
* RegistryImporter.java
*
* Copyright (c) 2006, Imperial College London. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Imperial College nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.administer;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.xpath.XPathAPI;
import org.dspace.administer.DCType;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.content.NonUniqueMetadataException;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.SAXException;
/**
* @author Richard Jones
*
* This class provides the tools that registry importers might need to
* use. Basically some utility methods. And actually, although it says
* I am the author, really I ripped these methods off from other
* classes
*/
public class RegistryImporter
{
/**
* Load in the XML from file.
*
* @param filename
* the filename to load from
*
* @return the DOM representation of the XML file
*/
public static Document loadXML(String filename)
throws IOException, ParserConfigurationException, SAXException
{
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document document = builder.parse(new File(filename));
return document;
}
/**
* Get the CDATA of a particular element. For example, if the XML document
* contains:
* <P>
* <code>
* <foo><mimetype>application/pdf</mimetype></foo>
* </code>
* passing this the <code>foo</code> node and <code>mimetype</code> will
* return <code>application/pdf</code>.
* </P>
* Why this isn't a core part of the XML API I do not know...
*
* @param parentElement
* the element, whose child element you want the CDATA from
* @param childName
* the name of the element you want the CDATA from
*
* @return the CDATA as a <code>String</code>
*/
public static String getElementData(Node parentElement, String childName)
throws TransformerException
{
// Grab the child node
Node childNode = XPathAPI.selectSingleNode(parentElement, childName);
if (childNode == null)
{
// No child node, so no values
return null;
}
// Get the #text
Node dataNode = childNode.getFirstChild();
if (dataNode == null)
{
return null;
}
// Get the data
String value = dataNode.getNodeValue().trim();
return value;
}
/**
* Get repeated CDATA for a particular element. For example, if the XML
* document contains:
* <P>
* <code>
* <foo>
* <bar>val1</bar>
* <bar>val2</bar>
* </foo>
* </code>
* passing this the <code>foo</code> node and <code>bar</code> will
* return <code>val1</code> and <code>val2</code>.
* </P>
* Why this also isn't a core part of the XML API I do not know...
*
* @param parentElement
* the element, whose child element you want the CDATA from
* @param childName
* the name of the element you want the CDATA from
*
* @return the CDATA as a <code>String</code>
*/
public static String[] getRepeatedElementData(Node parentElement,
String childName) throws TransformerException
{
// Grab the child node
NodeList childNodes = XPathAPI.selectNodeList(parentElement, childName);
String[] data = new String[childNodes.getLength()];
for (int i = 0; i < childNodes.getLength(); i++)
{
// Get the #text node
Node dataNode = childNodes.item(i).getFirstChild();
// Get the data
data[i] = dataNode.getNodeValue().trim();
}
return data;
}
}