/* * QDCCrosswalk.java * * Version: $Revision: 3705 $ * * Date: $Date: 2009-04-11 18:02:24 +0100 (Sat, 11 Apr 2009) $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the Hewlett-Packard Company nor the name of the * Massachusetts Institute of Technology nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.content.crosswalk; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.StringReader; import java.sql.SQLException; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Properties; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DCValue; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.MetadataSchema; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.SelfNamedPlugin; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.Namespace; import org.jdom.input.SAXBuilder; import org.jdom.output.Format; import org.jdom.output.XMLOutputter; /** * Configurable QDC Crosswalk * <p> * This class supports multiple dissemination crosswalks from DSpace * internal data to the Qualified Dublin Core XML format * (see <a href="http://dublincore.org/">http://dublincore.org/</a> * <p> * It registers multiple Plugin names, which it reads from * the DSpace configuration as follows: * * <h3>Configuration</h3> * Every key starting with <code>"crosswalk.qdc.properties."</code> describes a * QDC crosswalk. Everything after the last period is the <em>plugin instance</em>, * and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>) * of the crosswalk configuration file. * <p> * You can have two aliases point to the same crosswalk, * just add two configuration entries with the same value, e.g. * <pre> * crosswalk.qdc.properties.QDC = xwalk/qdc.properties * crosswalk.qdc.properties.default = xwalk/qdc.properties * </pre> * The first line creates a plugin with the name <code>"QDC"</code> * which is configured from the file <em>dspace-dir</em><code>/xwalk/qdc.properties</code>. * <p> * Since there is significant overhead in reading the properties file to * configure the crosswalk, and a crosswalk instance may be used any number * of times, we recommend caching one instance of the crosswalk for each * alias and simply reusing those instances. The PluginManager does * this by default. * <p> * Each named crosswalk has two other types of configuration lines: * <p> * XML Namespaces: all XML namespace prefixes used in the XML fragments below * <em>must</em> be defined in the configuration as follows. Add a line of * the form: <pre> * crosswalk.qdc.namespace.{NAME}.{prefix} = {namespace-URI}</pre> * e.g. for the namespaces <code>dc</code> and <code>dcterms</code> * in the plugin named <code>QDC</code>, add these lines: * <pre>crosswalk.qdc.namespace.QDC.dc = http://purl.org/dc/elements/1.1/ * crosswalk.qdc.namespace.QDC.dcterms = http://purl.org/dc/terms/</pre> * * <p> * Finally, you need to declare an XML Schema URI for the plugin, with * a line of the form <pre> * crosswalk.qdc.schema.{NAME} = {schema-URI}</pre> * for example, * <pre>crosswalk.qdc.schemaLocation.QDC = \ * http://purl.org/dc/terms/ \ * http://dublincore.org/schemas/xmls/qdc/2003/04/02/qualifieddc.xsd</pre> * * @author Larry Stone * @version $Revision: 3705 $ */ public class QDCCrosswalk extends SelfNamedPlugin implements DisseminationCrosswalk, IngestionCrosswalk { /** log4j category */ private static Logger log = Logger.getLogger(QDCCrosswalk.class); // map of qdc to JDOM Element private HashMap qdc2element = new HashMap(); // map of JDOM Element to qdc DCValue private HashMap element2qdc = new HashMap(); // the XML namespaces from config file for this name. private Namespace namespaces[] = null; private static final Namespace DCTERMS_NS = Namespace.getNamespace("dcterms", "http://purl.org/dc/terms/"); // sentinal: done init? private boolean inited = false; // my plugin name private String myName = null; // prefix of all DSpace Configuration entries. private static final String CONFIG_PREFIX = "crosswalk.qdc"; // XML schemaLocation fragment for this crosswalk, from config. private String schemaLocation = null; private static final Namespace XLINK_NS = Namespace.getNamespace("xlink", "http://www.w3.org/TR/xlink"); private static XMLOutputter outputUgly = new XMLOutputter(); private static XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); private static SAXBuilder builder = new SAXBuilder(); // GWaller 1/11/10 IssueID #486 lang attributes are only allowed on element that don't have the following type attribute (according to the dcterms schema) // NOTE: these are simply attribute values, not element named within a namespace. The prefix e.g. dcterms should match what is specified in QDC.properties private String[] TYPE_VALUES_LANG_PROHIBITED = {"dcterms:MESH", "dcterms:DDC", "dcterms:LCC", "dcterms:UDC", "dcterms:Period", "dcterms:W3CDTF", "dcterms:DCMIType", "dcterms:IMT", "dcterms:URI", "dcterms:ISO639-2", "dcterms:RFC1766", "dcterms:RFC3066", "dcterms:Point", "dcterms:ISO3166", "dcterms:Box", "dcterms:TGN"}; /** * Fill in the plugin-name table from DSpace configuration entries * for configuration files for flavors of QDC crosswalk: */ private static String aliases[] = null; static { List aliasList = new ArrayList(); Enumeration pe = ConfigurationManager.propertyNames(); String propname = CONFIG_PREFIX + ".properties."; while (pe.hasMoreElements()) { String key = (String)pe.nextElement(); if (key.startsWith(propname)) aliasList.add(key.substring(propname.length())); } aliases = (String[])aliasList.toArray(new String[aliasList.size()]); } public static String[] getPluginNames() { return aliases; } // utility: return "fully qualified" name of XML element, for a // hashtable key to use on ingesting elements. // Format is {prefix:}name where prefix is optional. private String makeQualifiedTagName(Element element) { String prefix = ""; Namespace ns = element.getNamespace(); if (ns != null) prefix = ns.getPrefix() + ":"; String tagName; String nsQualifier = element.getAttributeValue("type", DisseminationCrosswalk.XSI_NS); if (nsQualifier == null || nsQualifier.length() < 1) { String qualifier = element.getAttributeValue("type"); if (qualifier == null || qualifier.length() < 1) { tagName = prefix+element.getName(); } else { tagName = prefix+element.getName()+qualifier; } } else { tagName = prefix+element.getName()+nsQualifier; } return tagName; } /** * Initialize Crosswalk table from a properties file * which itself is the value of the DSpace configuration property * "crosswalk.qdc.properties.X", where "X" is the alias name of this instance. * Each instance may be configured with a separate mapping table. * * The QDC crosswalk configuration properties follow the format: * * {qdc-element} = {XML-fragment} * * 1. qualified DC field name is of the form (qualifier is optional) * {MDschema}.{element}.{qualifier} * * e.g. dc.contributor.author * dc.title * * 2. XML fragment is prototype of metadata element, with empty * placeholders for value). * * Example properties line: * * dc.coverage.temporal = <dcterms:temporal /> */ private void init() throws CrosswalkException, IOException { if (inited) return; inited = true; myName = getPluginInstanceName(); if (myName == null) throw new CrosswalkInternalException("Cannot determine plugin name, "+ "You must use PluginManager to instantiate QDCCrosswalk so the instance knows its name."); // grovel DSpace configuration for namespaces List nsList = new ArrayList(); Enumeration pe = ConfigurationManager.propertyNames(); String propname = CONFIG_PREFIX + ".namespace."+ myName +"."; while (pe.hasMoreElements()) { String key = (String)pe.nextElement(); if (key.startsWith(propname)) nsList.add(Namespace.getNamespace(key.substring(propname.length()), ConfigurationManager.getProperty(key))); } nsList.add(Namespace.XML_NAMESPACE); // GWaller 1/11/10 IssueID #486 type attribute belongs to the XSI prefix nsList.add(DisseminationCrosswalk.XSI_NS); namespaces = (Namespace[])nsList.toArray(new Namespace[nsList.size()]); // get XML schemaLocation fragment from config schemaLocation = ConfigurationManager.getProperty(CONFIG_PREFIX + ".schemaLocation."+ myName); // read properties String cmPropName = CONFIG_PREFIX+".properties."+myName; String propsFilename = ConfigurationManager.getProperty(cmPropName); if (propsFilename == null) throw new CrosswalkInternalException("Configuration error: "+ "No properties file configured for QDC crosswalk named \""+myName+"\""); String parent = ConfigurationManager.getProperty("dspace.dir") + File.separator + "config" + File.separator; File propsFile = new File(parent, propsFilename); Properties qdcProps = new Properties(); FileInputStream pfs = null; try { pfs = new FileInputStream(propsFile); qdcProps.load(pfs); } finally { if (pfs != null) try { pfs.close(); } catch (IOException ioe) { } } // grovel properties to initialize qdc->element and element->qdc maps. // evaluate the XML fragment with a wrapper including namespaces. String postlog = "</wrapper>"; StringBuffer prologb = new StringBuffer("<wrapper"); for (int i = 0; i < namespaces.length; ++i) { prologb.append(" xmlns:"); prologb.append(namespaces[i].getPrefix()); prologb.append("=\""); prologb.append(namespaces[i].getURI()); prologb.append("\""); } prologb.append(">"); String prolog = prologb.toString(); pe = qdcProps.propertyNames(); while (pe.hasMoreElements()) { String qdc = (String)pe.nextElement(); String val = qdcProps.getProperty(qdc); try { Document d = builder.build(new StringReader(prolog+val+postlog)); Element element = (Element)d.getRootElement().getContent(0); qdc2element.put(qdc, element); element2qdc.put(makeQualifiedTagName(element), qdc); log.debug("Building Maps: qdc=\""+qdc+"\", element=\""+element.toString()+"\""); } catch (org.jdom.JDOMException je) { throw new CrosswalkInternalException("Failed parsing XML fragment in properties file: \""+prolog+val+postlog+"\": "+je.toString()); } } } public Namespace[] getNamespaces() { try { init(); } catch (Exception e) { } return namespaces; } public String getSchemaLocation() { try { init(); } catch (Exception e) { } return schemaLocation; } /** * Returns object's metadata in MODS format, as XML structure node. */ public List disseminateList(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { return disseminateListInternal(dso, true); } // GWaller 1/11/10 IssueID #486 lang attributes aren't allowed on elements with some type attributes private boolean allowedLangAttribute(Element e){ boolean result = true; // Check the element has a type attribute first Attribute elementTypeAtt = e.getAttribute("type", DisseminationCrosswalk.XSI_NS); if (elementTypeAtt != null){ // Now check the value of the type attribute to see if its one of the types that can't have a lang attribute String typeValue = elementTypeAtt.getValue(); for (String typeToCompare : TYPE_VALUES_LANG_PROHIBITED){ if (typeValue.equalsIgnoreCase(typeToCompare)){ result = false; break; } } } return result; } private List disseminateListInternal(DSpaceObject dso, boolean addSchema) throws CrosswalkException, IOException, SQLException, AuthorizeException { if (dso.getType() != Constants.ITEM) throw new CrosswalkObjectNotSupported("QDCCrosswalk can only crosswalk an Item."); Item item = (Item)dso; init(); DCValue[] dc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); List result = new ArrayList(dc.length); for (int i = 0; i < dc.length; i++) { // Compose qualified DC name - schema.element[.qualifier] // e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword" String qdc = dc[i].schema+"."+ ((dc[i].qualifier == null) ? dc[i].element : (dc[i].element + "." + dc[i].qualifier)); Element elt = (Element)qdc2element.get(qdc); // only complain about missing elements in the DC schema: if (elt == null) { if (dc[i].schema.equals(MetadataSchema.DC_SCHEMA)) log.warn("WARNING: "+myName+": No QDC mapping for \"" + qdc+"\""); } else { Element qe = (Element)elt.clone(); qe.setText(dc[i].value); if (addSchema && schemaLocation != null) qe.setAttribute("schemaLocation", schemaLocation, XSI_NS); if (dc[i].language != null && allowedLangAttribute(qe)) qe.setAttribute("lang", dc[i].language, Namespace.XML_NAMESPACE); result.add(qe); } } return result; } public Element disseminateElement(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { init(); Element root = new Element("qualifieddc", DCTERMS_NS); if (schemaLocation != null) root.setAttribute("schemaLocation", schemaLocation, XSI_NS); root.addContent(disseminateListInternal(dso, false)); return root; } public boolean canDisseminate(DSpaceObject dso) { return true; } public void ingest(Context context, DSpaceObject dso, Element root) throws CrosswalkException, IOException, SQLException, AuthorizeException { init(); // NOTE: don't bother comparing namespace on root element // because DCMI doesn't specify one, and every app uses its // own.. just give up in the face of this madness and accept // anything with the right name. if (!(root.getName().equals("qualifieddc"))) throw new MetadataValidationException("Wrong root element for Qualified DC: "+root.toString()); ingest(context, dso, root.getChildren()); } public void ingest(Context context, DSpaceObject dso, List ml) throws CrosswalkException, IOException, SQLException, AuthorizeException { init(); // for now, forget about any targets but item. if (dso.getType() != Constants.ITEM) throw new CrosswalkInternalException("Wrong target object type, QDCCrosswalk can only crosswalk to an Item."); Item item = (Item)dso; Iterator mi = ml.iterator(); while (mi.hasNext()) { Element me = (Element)mi.next(); String key = makeQualifiedTagName(me); // if the root element gets passed here, recurse: if (me.getName().equals("qualifieddc")) ingest(context, dso, me.getChildren()); else if (element2qdc.containsKey(key)) { String qdc[] = ((String)element2qdc.get(key)).split("\\."); // get language - prefer xml:lang, accept lang. String lang = me.getAttributeValue("lang", Namespace.XML_NAMESPACE); if (lang == null) lang = me.getAttributeValue("lang"); if (qdc.length == 3) item.addMetadata(qdc[0], qdc[1], qdc[2], lang, me.getText()); else if (qdc.length == 2) item.addMetadata(qdc[0], qdc[1], null, lang, me.getText()); else throw new CrosswalkInternalException("Unrecognized format in QDC element identifier for key=\""+key+"\", qdc=\""+(String)element2qdc.get(key)+"\""); } else log.warn("WARNING: "+myName+": No mapping for Element=\"" + key+"\" to qdc."); } } public boolean preferList() { return true; } }