/* * MODSDisseminationCrosswalk.java * * Version: $Revision: 3761 $ * * Date: $Date: 2009-05-07 04:18:02 +0000 (Thu, 07 May 2009) $ * * Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the DSpace Foundation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.content.crosswalk; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.StringReader; import java.sql.SQLException; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Properties; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DCValue; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.SelfNamedPlugin; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; import org.jdom.Text; import org.jdom.input.SAXBuilder; import org.jdom.output.Format; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; /** * Configurable MODS Crosswalk * <p> * This class supports multiple dissemination crosswalks from DSpace * internal data to the MODS XML format * (see <a href="http://www.loc.gov/standards/mods/">http://www.loc.gov/standards/mods/</a>.) * <p> * It registers multiple Plugin names, which it reads from * the DSpace configuration as follows: * * <h3>Configuration</h3> * Every key starting with <code>"crosswalk.mods.properties."</code> describes a * MODS crosswalk. Everything after the last period is the <em>plugin name</em>, * and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>) * of the crosswalk configuration file. * <p> * You can have two names point to the same crosswalk, * just add two configuration entries with the same value, e.g. * <pre> * crosswalk.mods.properties.MODS = crosswalks/mods.properties * crosswalk.mods.properties.default = crosswalks/mods.properties * </pre> * The first line creates a plugin with the name <code>"MODS"</code> * which is configured from the file <em>dspace-dir</em><code>/config/crosswalks/mods.properties</code>. * <p> * Since there is significant overhead in reading the properties file to * configure the crosswalk, and a crosswalk instance may be used any number * of times, we recommend caching one instance of the crosswalk for each * name and simply reusing those instances. The PluginManager does this * by default. * * @author Larry Stone * @version $Revision: 3761 $ */ public class MODSDisseminationCrosswalk extends SelfNamedPlugin implements DisseminationCrosswalk { /** log4j category */ private static Logger log = Logger.getLogger(MODSDisseminationCrosswalk.class); private final static String CONFIG_PREFIX = "crosswalk.mods.properties."; /** * Fill in the plugin alias table from DSpace configuration entries * for configuration files for flavors of MODS crosswalk: */ private static String aliases[] = null; static { List aliasList = new ArrayList(); Enumeration pe = ConfigurationManager.propertyNames(); while (pe.hasMoreElements()) { String key = (String)pe.nextElement(); if (key.startsWith(CONFIG_PREFIX)) aliasList.add(key.substring(CONFIG_PREFIX.length())); } aliases = (String[])aliasList.toArray(new String[aliasList.size()]); } public static String[] getPluginNames() { return aliases; } /** * MODS namespace. */ public static final Namespace MODS_NS = Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3"); private static final Namespace XLINK_NS = Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink"); private static final Namespace namespaces[] = { MODS_NS, XLINK_NS }; /** URL of MODS XML Schema */ public static final String MODS_XSD = "http://www.loc.gov/standards/mods/v3/mods-3-1.xsd"; private static final String schemaLocation = MODS_NS.getURI()+" "+MODS_XSD; private static XMLOutputter outputUgly = new XMLOutputter(); private static XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); private static SAXBuilder builder = new SAXBuilder(); private HashMap modsMap = null; /** * Container for crosswalk mapping: expressed as "triple" of: * 1. QDC field name (really field.qualifier). * 2. XML subtree to add to MODS record. * 3. XPath expression showing places to plug in the value. */ static class modsTriple { public String qdc = null; public Element xml = null; public XPath xpath = null; /** * Initialize from text versions of QDC, XML and XPath. * The DC stays a string; parse the XML with appropriate * namespaces; "compile" the XPath. */ public static modsTriple create(String qdc, String xml, String xpath) { modsTriple result = new modsTriple(); final String prolog = "<mods xmlns:"+MODS_NS.getPrefix()+"=\""+MODS_NS.getURI()+"\" "+ "xmlns:"+XLINK_NS.getPrefix()+"=\""+XLINK_NS.getURI()+"\">"; final String postlog = "</mods>"; try { result.qdc = qdc; result.xpath = XPath.newInstance(xpath); result.xpath.addNamespace(MODS_NS.getPrefix(), MODS_NS.getURI()); result.xpath.addNamespace(XLINK_NS); Document d = builder.build(new StringReader(prolog+xml+postlog)); result.xml = (Element)d.getRootElement().getContent(0); } catch (JDOMException je) { log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString()); return null; } catch (IOException je) { log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString()); return null; } return result; } } /** * Initialize Crosswalk table from a properties file * which itself is the value of the DSpace configuration property * "crosswalk.mods.properties.X", where "X" is the alias name of this instance. * Each instance may be configured with a separate mapping table. * * The MODS crosswalk configuration properties follow the format: * * {field-name} = {XML-fragment} | {XPath} * * 1. qualified DC field name is of the form * {MDschema}.{element}.{qualifier} * * e.g. dc.contributor.author * * 2. XML fragment is prototype of metadata element, with empty or "%s" * placeholders for value(s). NOTE: Leave the %s's in becaue * it's much easier then to see if something is broken. * * 3. XPath expression listing point(s) in the above XML where * the value is to be inserted. Context is the element itself. * * Example properties line: * * dc.description.abstract = <mods:abstract>%s</mods:abstract> | text() * */ private void initMap() throws CrosswalkInternalException { if (modsMap != null) return; String myAlias = getPluginInstanceName(); if (myAlias == null) { log.error("Must use PluginManager to instantiate MODSDisseminationCrosswalk so the class knows its name."); return; } String cmPropName = CONFIG_PREFIX+myAlias; String propsFilename = ConfigurationManager.getProperty(cmPropName); if (propsFilename == null) { String msg = "MODS crosswalk missing "+ "configuration file for crosswalk named \""+myAlias+"\""; log.error(msg); throw new CrosswalkInternalException(msg); } else { String parent = ConfigurationManager.getProperty("dspace.dir") + File.separator + "config" + File.separator; File propsFile = new File(parent, propsFilename); Properties modsConfig = new Properties(); FileInputStream pfs = null; try { pfs = new FileInputStream(propsFile); modsConfig.load(pfs); } catch (IOException e) { log.error("Error opening or reading MODS properties file: "+propsFile.toString()+": "+e.toString()); throw new CrosswalkInternalException("MODS crosswalk cannot "+ "open config file: "+e.toString()); } finally { if (pfs != null) try { pfs.close(); } catch (IOException ioe) { } } modsMap = new HashMap(); Enumeration pe = modsConfig.propertyNames(); while (pe.hasMoreElements()) { String qdc = (String)pe.nextElement(); String val = modsConfig.getProperty(qdc); String pair[] = val.split("\\s+\\|\\s+", 2); if (pair.length < 2) log.warn("Illegal MODS mapping in "+propsFile.toString()+", line = "+ qdc + " = " + val); else { modsTriple trip = modsTriple.create(qdc, pair[0], pair[1]); if (trip != null) modsMap.put(qdc, trip); } } } } public Namespace[] getNamespaces() { return namespaces; } public String getSchemaLocation() { return schemaLocation; } /** * Returns object's metadata in MODS format, as List of XML structure nodes. */ public List disseminateList(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { return disseminateListInternal(dso, true); } public Element disseminateElement(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { Element root = new Element("mods", MODS_NS); root.setAttribute("schemaLocation", schemaLocation, XSI_NS); root.addContent(disseminateListInternal(dso,false)); return root; } private List disseminateListInternal(DSpaceObject dso, boolean addSchema) throws CrosswalkException, IOException, SQLException, AuthorizeException { if (dso.getType() != Constants.ITEM) throw new CrosswalkObjectNotSupported("MODSDisseminationCrosswalk can only crosswalk an Item."); Item item = (Item)dso; initMap(); DCValue[] dc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); List result = new ArrayList(dc.length); for (int i = 0; i < dc.length; i++) { // Compose qualified DC name - schema.element[.qualifier] // e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword" String qdc = dc[i].schema+"."+ ((dc[i].qualifier == null) ? dc[i].element : (dc[i].element + "." + dc[i].qualifier)); modsTriple trip = (modsTriple)modsMap.get(qdc); if (trip == null) log.warn("WARNING: "+getPluginInstanceName()+": No MODS mapping for \"" + qdc+"\""); else { try { Element me = (Element)trip.xml.clone(); if (addSchema) me.setAttribute("schemaLocation", schemaLocation, XSI_NS); Iterator ni = trip.xpath.selectNodes(me).iterator(); if (!ni.hasNext()) log.warn("XPath \""+trip.xpath.getXPath()+ "\" found no elements in \""+ outputUgly.outputString(me)+ "\", qdc="+qdc); while (ni.hasNext()) { Object what = ni.next(); if (what instanceof Element) ((Element)what).setText(dc[i].value); else if (what instanceof Attribute) ((Attribute)what).setValue(dc[i].value); else if (what instanceof Text) ((Text)what).setText(dc[i].value); else log.warn("Got unknown object from XPath, class="+what.getClass().getName()); } result.add(me); } catch (JDOMException je) { log.error("Error following XPath in modsTriple: context="+ outputUgly.outputString(trip.xml)+ ", xpath="+trip.xpath.getXPath()+", exception="+ je.toString()); } } } return result; } public boolean canDisseminate(DSpaceObject dso) { return true; } public boolean preferList() { return false; } }