StructBuilder.java example

Explorer
CORISCO2-master
- adore-djatoka-1.1-corisco-1
  - src
    - java
      - gov
        lanl
        adore
        djatoka
        DjatokaCompress.java
        DjatokaConstants.java
        DjatokaDecodeParam.java
        DjatokaEncodeParam.java
        DjatokaException.java
        DjatokaExtract.java
        DjatokaExtractProcessor.java
        ICompress.java
        IExtract.java
        io
        ExtractorFactory.java
        FormatConstants.java
        FormatFactory.java
        FormatIOException.java
        FormatWriterParams.java
        IReader.java
        IWriter.java
        reader
        DjatokaReader.java
        ImageIOReader.java
        ImageJReader.java
        PNMReader.java
        writer
        BMPWriter.java
        GIFWriter.java
        JP2Writer.java
        JPGWriter.java
        PNGWriter.java
        PNMWriter.java
        TIFWriter.java
        kdu
        KduCompressExe.java
        KduExtractExe.java
        jni
        KduCompressedSource.java
        KduExtractJNI.java
        KduExtractProcessorJNI.java
        openurl
        DjatokaImageMigrator.java
        IReferentMigrator.java
        IReferentResolver.java
        IdentifierNotFoundException.java
        OpenURLJP2Datastream.java
        OpenURLJP2KMetadata.java
        OpenURLJP2KService.java
        OpenURLJP2Ping.java
        OpenURLJP2XML.java
        OpenURLServlet.java
        ReferentManager.java
        ResolverException.java
        SimpleListResolver.java
        TileCacheManager.java
        plugin
        dspace
        DSpaceResolver.java
        rftdb
        DatabaseResolver.java
        plugin
        ExtractJPG.java
        ExtractPDF.java
        ITransformPlugIn.java
        ImageWatermark.java
        TextWatermark.java
        TransformException.java
        util
        IOUtils.java
        ImageProcessingUtils.java
        ImageRecord.java
        ImageRecordUtils.java
        JP2ImageInfo.java
        JP2Markers.java
        SourceImageFileFilter.java
        util
        AccessManager.java
        ConfigurationManager.java
        DBCPUtils.java
        DjatokaContextListener.java
        ExecuteStreamHandler.java
        HttpDate.java
        PumpStreamHandler.java
        StreamPumper.java
- dspace-1.6.2-src-release-corisco-1
/*
 * StructBuilder.java
 * 
 * Copyright (c) 2006, Imperial College.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * - Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 *
 * - Neither the name of Imperial College nor the names of their
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 */

package org.dspace.administer;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.Hashtable;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.xpath.XPathAPI;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.jdom.Element;
import org.jdom.output.XMLOutputter;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * This class deals with importing community and collection structures from
 * an XML file.
 * 
 * The XML file structure needs to be:
 * 
 * <import_structure>
 *     <community>
 *         <name>....</name>
 *         <community>...</community>
 *         <collection>
 *             <name>....</name>
 *         </collection>
 *     </community>
 * </import_structure>
 * 
 * it can be arbitrarily deep, and supports all the metadata elements
 * that make up the community and collection metadata.  See the system
 * documentation for more details
 * 
 * @author Richard Jones
 *
 */

public class StructBuilder
{
    /** the output xml document which will contain updated information about the 
     * imported structure
     */
    private static org.jdom.Document xmlOutput = new org.jdom.Document(new Element("imported_structure"));
    
    /** a hashtable to hold metadata for the collection being worked on */
    private static Hashtable collectionMap = new Hashtable();
    
    /** a hashtable to hold metadata for the community being worked on */
    private static Hashtable communityMap = new Hashtable();
    
    /**
     * Main method to be run from the command line to import a structure into
     * DSpace
     * 
     * This is of the form:
     * 
     * StructBuilder -f [xml source] -e [administrator email] -o [output file]
     * 
     * The output file will contain exactly the same as the source xml document, but
     * with the handle for each imported item added as an attribute.
     */
    public static void main(String[] argv) 
    	throws Exception
    {
        CommandLineParser parser = new PosixParser();

    	Options options = new Options();

    	options.addOption( "f", "file", true, "file");
    	options.addOption( "e", "eperson", true, "eperson");
    	options.addOption("o", "output", true, "output");
    	
    	CommandLine line = parser.parse( options, argv );
    	
    	String file = null;
    	String eperson = null;
    	String output = null;
    	
    	if (line.hasOption('f'))
    	{
    	    file = line.getOptionValue('f');
    	}
    	
    	if (line.hasOption('e'))
    	{
    	    eperson = line.getOptionValue('e');
    	}
    	
    	if (line.hasOption('o'))
    	{
    	    output = line.getOptionValue('o');
    	}
    	
    	if (output == null || eperson == null || file == null)
    	{
    	    usage();
    	    System.exit(0);
    	}
    	
        // create a context
        Context context = new Context();
        
        // set the context
        context.setCurrentUser(EPerson.findByEmail(context, eperson));
 
        // load the XML
        Document document = loadXML(file);
        
        // run the preliminary validation, to be sure that the the XML document
        // is properly structured
        validate(document);
        
        // load the mappings into the member variable hashmaps
        communityMap.put("name", "name");
        communityMap.put("description", "short_description");
        communityMap.put("intro", "introductory_text");
        communityMap.put("copyright", "copyright_text");
        communityMap.put("sidebar", "side_bar_text");
        
        collectionMap.put("name", "name");
        collectionMap.put("description", "short_description");
        collectionMap.put("intro", "introductory_text");
        collectionMap.put("copyright", "copyright_text");
        collectionMap.put("sidebar", "side_bar_text");
        collectionMap.put("license", "license");
        collectionMap.put("provenance", "provenance_description");
        
        // get the top level community list
        NodeList first = XPathAPI.selectNodeList(document, "/import_structure/community");
        
        // run the import starting with the top level communities
        Element[] elements = handleCommunities(context, first, null);
        
        // generate the output
        Element root = xmlOutput.getRootElement();
        for (int i = 0; i < elements.length; i++)
        {
            root.addContent(elements[i]);
        }
        
        // finally write the string into the output file
        try 
        {
            BufferedWriter out = new BufferedWriter(new FileWriter(output));
            out.write(new XMLOutputter().outputString(xmlOutput));
            out.close();
        } 
        catch (IOException e) 
        {
            System.out.println("Unable to write to output file " + output);
            System.exit(0);
        }
        
        context.complete();
    }
    
    /**
     * Output the usage information
     */
    private static void usage()
    {
        System.out.println("Usage: java StructBuilder -f <source XML file> -o <output file> -e <eperson email>");
        System.out.println("Communitities will be created from the top level, and a map of communities to handles will be returned in the output file");
        return;
    }
    
    /**
     * Validate the XML document.  This method does not return, but if validation
     * fails it generates an error and ceases execution
     * 
     * @param	document	the XML document object
     * @throws TransformerException
     * 
     */
    private static void validate(org.w3c.dom.Document document)
    	throws TransformerException
    {
        StringBuffer err = new StringBuffer();
        boolean trip = false;
        
        err.append("The following errors were encountered parsing the source XML\n");
        err.append("No changes have been made to the DSpace instance\n\n");
        
        NodeList first = XPathAPI.selectNodeList(document, "/import_structure/community");
        if (first.getLength() == 0)
        {
            err.append("-There are no top level communities in the source document");
            System.out.println(err.toString());
            System.exit(0);
        }
        
        String errs = validateCommunities(first, 1);
        if (errs != null)
        {
            err.append(errs);
            trip = true;
        }
        
        if (trip)
        {
            System.out.println(err.toString());
            System.exit(0);
        }
    }
    
    /**
     * Validate the communities section of the XML document.  This returns a string
     * containing any errors encountered, or null if there were no errors
     * 
     * @param communities the NodeList of communities to validate
     * @param level the level in the XML document that we are at, for the purposes
     * 			of error reporting
     * 
     * @return the errors that need to be generated by the calling method, or null if
     * 			no errors.
     */
    private static String validateCommunities(NodeList communities, int level)
    	throws TransformerException
    {
        StringBuffer err = new StringBuffer();
        boolean trip = false;
        String errs = null;
        
        for (int i = 0; i < communities.getLength(); i++)
        {
            Node n = communities.item(i);
	        NodeList name = XPathAPI.selectNodeList(n, "name");
	        if (name.getLength() != 1)
	        {
	            String pos = Integer.toString(i + 1);
	            err.append("-The level " + level + " community in position " + pos);
	            err.append(" does not contain exactly one name field\n");
	            trip = true;
	        }
	        
	        // validate sub communities
	        NodeList subCommunities = XPathAPI.selectNodeList(n, "community");
	        String comErrs = validateCommunities(subCommunities, level + 1);
	        if (comErrs != null)
	        {
	            err.append(comErrs);
	            trip = true;
	        }
	        
	        // validate collections
	        NodeList collections = XPathAPI.selectNodeList(n, "collection");
	        String colErrs = validateCollections(collections, level + 1);
	        if (colErrs != null)
	        {
	            err.append(colErrs);
	            trip = true;
	        }
        }
        
        if (trip)
        {
            errs = err.toString();
        }
        
        return errs;
    }
    
    /**
     * validate the collection section of the XML document.  This generates a
     * string containing any errors encountered, or returns null if no errors
     * 
     * @param collections a NodeList of collections to validate
     * @param level the level in the XML document for the purposes of error reporting
     * 
     * @return the errors to be generated by the calling method, or null if none
     */
    private static String validateCollections(NodeList collections, int level)
    	throws TransformerException
    {
        StringBuffer err = new StringBuffer();
        boolean trip = false;
        String errs = null;
        
        for (int i = 0; i < collections.getLength(); i++)
        {
            Node n = collections.item(i);
	        NodeList name = XPathAPI.selectNodeList(n, "name");
	        if (name.getLength() != 1)
	        {
	            String pos = Integer.toString(i + 1);
	            err.append("-The level " + level + " collection in position " + pos);
	            err.append(" does not contain exactly one name field\n");
	            trip = true;
	        }
        }
        
        if (trip)
        {
            errs = err.toString();
        }
        
        return errs;
    }
    
    /**
     * Load in the XML from file.
     * 
     * @param filename
     *            the filename to load from
     * 
     * @return the DOM representation of the XML file
     */
    private static org.w3c.dom.Document loadXML(String filename) 
    	throws IOException, ParserConfigurationException, SAXException
    {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance()
                .newDocumentBuilder();

        org.w3c.dom.Document document = builder.parse(new File(filename));
        
        return document;
    }
    
    /**
     * Return the String value of a Node
     * 
     * @param node the node from which we want to extract the string value
     * 
     * @return the string value of the node
     */
    public static String getStringValue(Node node)
    {
        String value = node.getNodeValue();

        if (node.hasChildNodes())
        {
            Node first = node.getFirstChild();

            if (first.getNodeType() == Node.TEXT_NODE)
            {
                return first.getNodeValue().trim();
            }
        }

        return value;
    }
    
    /**
     * Take a node list of communities and build the structure from them, delegating
     * to the relevant methods in this class for sub-communities and collections
     * 
     * @param context the context of the request
     * @param communities a nodelist of communities to create along with their subjstructures
     * @param parent the parent community of the nodelist of communities to create
     * 
     * @return an element array containing additional information regarding the 
     * 			created communities (e.g. the handles they have been assigned)
     */
    private static Element[] handleCommunities(Context context, NodeList communities, Community parent)
    	throws TransformerException, SQLException, Exception
    {
        Element[] elements = new Element[communities.getLength()];
        
        for (int i = 0; i < communities.getLength(); i++)
        {
            Community community;
            Element element = new Element("community");
            
            // create the community or sub community
            if (parent != null)
            {
                community = parent.createSubcommunity();
            }
            else
            {
                community = Community.create(null, context);
            }
            
            // default the short description to be an empty string
            community.setMetadata("short_description", " ");
            
            // now update the metadata
            Node tn = communities.item(i);
            Enumeration keys = communityMap.keys();
            while (keys.hasMoreElements())
            {
                Node node = null;
                String key = (String) keys.nextElement();
                NodeList nl = XPathAPI.selectNodeList(tn, key);
                if (nl.getLength() == 1)
                {
                    node = nl.item(0);
                    community.setMetadata((String) communityMap.get(key), getStringValue(node));
                }
            }
            
            // FIXME: at the moment, if the community already exists by name
            // then this will throw a PSQLException on a duplicate key
            // violation
            // Ideally we'd skip this row and continue to create sub
            // communities
            // and so forth where they don't exist, but it's proving
            // difficult
            // to isolate the community that already exists without hitting
            // the database directly.
            community.update();
            
            // build the element with the handle that identifies the new
            // community
            // along with all the information that we imported here
            // This looks like a lot of repetition of getting information
            // from above
            // but it's here to keep it separate from the create process in
            // case
            // we want to move it or make it switchable later
            element.setAttribute("identifier", community.getHandle());
            
            Element nameElement = new Element("name");
            nameElement.setText(community.getMetadata("name"));
            element.addContent(nameElement);
            
            if (community.getMetadata("short_description") != null)
            {
                Element descriptionElement = new Element("description");
                descriptionElement.setText(community.getMetadata("short_description"));
                element.addContent(descriptionElement);
            }
            
            if (community.getMetadata("introductory_text") != null)
            {
                Element introElement = new Element("intro");
                introElement.setText(community.getMetadata("introductory_text"));
                element.addContent(introElement);
            }
            
            if (community.getMetadata("copyright_text") != null)
            {
                Element copyrightElement = new Element("copyright");
                copyrightElement.setText(community.getMetadata("copyright_text"));
                element.addContent(copyrightElement);
            }
            
            if (community.getMetadata("side_bar_text") != null)
            {
                Element sidebarElement = new Element("sidebar");
                sidebarElement.setText(community.getMetadata("side_bar_text"));
                element.addContent(sidebarElement);
            }
            
            // handle sub communities
            NodeList subCommunities = XPathAPI.selectNodeList(tn, "community");
            Element[] subCommunityElements = handleCommunities(context, subCommunities, community);
            
            // handle collections
            NodeList collections = XPathAPI.selectNodeList(tn, "collection");
            Element[] collectionElements = handleCollections(context, collections, community);
            
            int j;
            for (j = 0; j < subCommunityElements.length; j++)
            {
                element.addContent(subCommunityElements[j]);
            }
            for (j = 0; j < collectionElements.length; j++)
            {
                element.addContent(collectionElements[j]);
            }
            
            elements[i] = element;
        }
        
        return elements;
    }
    
    /**
     *  Take a node list of collections and create the structure from them
     * 
     * @param context the context of the request
     * @param collections the node list of collections to be created
     * @param parent the parent community to whom the collections belong
     * 
     * @return an Element array containing additional information about the
     * 			created collections (e.g. the handle)
     */
    private static Element[] handleCollections(Context context, NodeList collections, Community parent)
    	throws TransformerException, SQLException, AuthorizeException, IOException, Exception
    {
        Element[] elements = new Element[collections.getLength()];
        
        for (int i = 0; i < collections.getLength(); i++)
        {
            Element element = new Element("collection");
            Collection collection = parent.createCollection();
            
            // default the short description to the empty string
            collection.setMetadata("short_description", " ");
            
            // import the rest of the metadata
            Node tn = collections.item(i);
            Enumeration keys = collectionMap.keys();
            while (keys.hasMoreElements())
            {
                Node node = null;
                String key = (String) keys.nextElement();
                NodeList nl = XPathAPI.selectNodeList(tn, key);
                if (nl.getLength() == 1)
                {
                    node = nl.item(0);
                    collection.setMetadata((String) collectionMap.get(key), getStringValue(node));
                }
            }
            
            collection.update();
            
            element.setAttribute("identifier", collection.getHandle());
            
            Element nameElement = new Element("name");
            nameElement.setText(collection.getMetadata("name"));
            element.addContent(nameElement);
            
            if (collection.getMetadata("short_description") != null)
            {
                Element descriptionElement = new Element("description");
                descriptionElement.setText(collection.getMetadata("short_description"));
                element.addContent(descriptionElement);
            }
            
            if (collection.getMetadata("introductory_text") != null)
            {
                Element introElement = new Element("intro");
                introElement.setText(collection.getMetadata("introductory_text"));
                element.addContent(introElement);
            }
            
            if (collection.getMetadata("copyright_text") != null)
            {
                Element copyrightElement = new Element("copyright");
                copyrightElement.setText(collection.getMetadata("copyright_text"));
                element.addContent(copyrightElement);
            }
            
            if (collection.getMetadata("side_bar_text") != null)
            {
                Element sidebarElement = new Element("sidebar");
                sidebarElement.setText(collection.getMetadata("side_bar_text"));
                element.addContent(sidebarElement);
            }
            
            if (collection.getMetadata("license") != null)
            {
                Element sidebarElement = new Element("license");
                sidebarElement.setText(collection.getMetadata("license"));
                element.addContent(sidebarElement);
            }
            
            if (collection.getMetadata("provenance_description") != null)
            {
                Element sidebarElement = new Element("provenance");
                sidebarElement.setText(collection.getMetadata("provenance_description"));
                element.addContent(sidebarElement);
            }
            
            elements[i] = element;
        }
        
        return elements;
    }
    
}