/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DCValue;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Item;
import org.dspace.content.Site;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.SelfNamedPlugin;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.Text;
import org.jdom.Verifier;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;
/**
* Configurable MODS Crosswalk
* <p>
* This class supports multiple dissemination crosswalks from DSpace
* internal data to the MODS XML format
* (see <a href="http://www.loc.gov/standards/mods/">http://www.loc.gov/standards/mods/</a>.)
* <p>
* It registers multiple Plugin names, which it reads from
* the DSpace configuration as follows:
*
* <h3>Configuration</h3>
* Every key starting with <code>"crosswalk.mods.properties."</code> describes a
* MODS crosswalk. Everything after the last period is the <em>plugin name</em>,
* and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>)
* of the crosswalk configuration file.
* <p>
* You can have two names point to the same crosswalk,
* just add two configuration entries with the same value, e.g.
* <pre>
* crosswalk.mods.properties.MODS = crosswalks/mods.properties
* crosswalk.mods.properties.default = crosswalks/mods.properties
* </pre>
* The first line creates a plugin with the name <code>"MODS"</code>
* which is configured from the file <em>dspace-dir</em><code>/config/crosswalks/mods.properties</code>.
* <p>
* Since there is significant overhead in reading the properties file to
* configure the crosswalk, and a crosswalk instance may be used any number
* of times, we recommend caching one instance of the crosswalk for each
* name and simply reusing those instances. The PluginManager does this
* by default.
*
* @author Larry Stone
* @author Scott Phillips
* @version $Revision$
*/
public class MODSDisseminationCrosswalk extends SelfNamedPlugin
implements DisseminationCrosswalk
{
/** log4j category */
private static Logger log = Logger.getLogger(MODSDisseminationCrosswalk.class);
private static final String CONFIG_PREFIX = "crosswalk.mods.properties.";
/**
* Fill in the plugin alias table from DSpace configuration entries
* for configuration files for flavors of MODS crosswalk:
*/
private static String aliases[] = null;
static
{
List<String> aliasList = new ArrayList<String>();
Enumeration<String> pe = (Enumeration<String>)ConfigurationManager.propertyNames();
while (pe.hasMoreElements())
{
String key = pe.nextElement();
if (key.startsWith(CONFIG_PREFIX))
{
aliasList.add(key.substring(CONFIG_PREFIX.length()));
}
}
aliases = (String[])aliasList.toArray(new String[aliasList.size()]);
}
public static String[] getPluginNames()
{
return (String[]) ArrayUtils.clone(aliases);
}
/**
* MODS namespace.
*/
public static final Namespace MODS_NS =
Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3");
private static final Namespace XLINK_NS =
Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");
private static final Namespace namespaces[] = { MODS_NS, XLINK_NS };
/** URL of MODS XML Schema */
public static final String MODS_XSD =
"http://www.loc.gov/standards/mods/v3/mods-3-1.xsd";
private static final String schemaLocation =
MODS_NS.getURI()+" "+MODS_XSD;
private static XMLOutputter outputUgly = new XMLOutputter();
private static SAXBuilder builder = new SAXBuilder();
private Map<String, modsTriple> modsMap = null;
/**
* Container for crosswalk mapping: expressed as "triple" of:
* 1. QDC field name (really field.qualifier).
* 2. XML subtree to add to MODS record.
* 3. XPath expression showing places to plug in the value.
*/
static class modsTriple
{
public String qdc = null;
public Element xml = null;
public XPath xpath = null;
/**
* Initialize from text versions of QDC, XML and XPath.
* The DC stays a string; parse the XML with appropriate
* namespaces; "compile" the XPath.
*/
public static modsTriple create(String qdc, String xml, String xpath)
{
modsTriple result = new modsTriple();
final String prolog = "<mods xmlns:"+MODS_NS.getPrefix()+"=\""+MODS_NS.getURI()+"\" "+
"xmlns:"+XLINK_NS.getPrefix()+"=\""+XLINK_NS.getURI()+"\">";
final String postlog = "</mods>";
try
{
result.qdc = qdc;
result.xpath = XPath.newInstance(xpath);
result.xpath.addNamespace(MODS_NS.getPrefix(), MODS_NS.getURI());
result.xpath.addNamespace(XLINK_NS);
Document d = builder.build(new StringReader(prolog+xml+postlog));
result.xml = (Element)d.getRootElement().getContent(0);
}
catch (JDOMException je)
{
log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString());
return null;
}
catch (IOException je)
{
log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString());
return null;
}
return result;
}
}
/**
* Initialize Crosswalk table from a properties file
* which itself is the value of the DSpace configuration property
* "crosswalk.mods.properties.X", where "X" is the alias name of this instance.
* Each instance may be configured with a separate mapping table.
*
* The MODS crosswalk configuration properties follow the format:
*
* {field-name} = {XML-fragment} | {XPath}
*
* 1. qualified DC field name is of the form
* {MDschema}.{element}.{qualifier}
*
* e.g. dc.contributor.author
*
* 2. XML fragment is prototype of metadata element, with empty or "%s"
* placeholders for value(s). NOTE: Leave the %s's in becaue
* it's much easier then to see if something is broken.
*
* 3. XPath expression listing point(s) in the above XML where
* the value is to be inserted. Context is the element itself.
*
* Example properties line:
*
* dc.description.abstract = <mods:abstract>%s</mods:abstract> | text()
*
*/
private void initMap()
throws CrosswalkInternalException
{
if (modsMap != null)
{
return;
}
String myAlias = getPluginInstanceName();
if (myAlias == null)
{
log.error("Must use PluginManager to instantiate MODSDisseminationCrosswalk so the class knows its name.");
return;
}
String cmPropName = CONFIG_PREFIX+myAlias;
String propsFilename = ConfigurationManager.getProperty(cmPropName);
if (propsFilename == null)
{
String msg = "MODS crosswalk missing "+
"configuration file for crosswalk named \""+myAlias+"\"";
log.error(msg);
throw new CrosswalkInternalException(msg);
}
else
{
String parent = ConfigurationManager.getProperty("dspace.dir") +
File.separator + "config" + File.separator;
File propsFile = new File(parent, propsFilename);
Properties modsConfig = new Properties();
FileInputStream pfs = null;
try
{
pfs = new FileInputStream(propsFile);
modsConfig.load(pfs);
}
catch (IOException e)
{
log.error("Error opening or reading MODS properties file: "+propsFile.toString()+": "+e.toString());
throw new CrosswalkInternalException("MODS crosswalk cannot "+
"open config file: "+e.toString(), e);
}
finally
{
if (pfs != null)
{
try
{
pfs.close();
}
catch (IOException ioe)
{
}
}
}
modsMap = new HashMap<String, modsTriple>();
Enumeration<String> pe = (Enumeration<String>)modsConfig.propertyNames();
while (pe.hasMoreElements())
{
String qdc = pe.nextElement();
String val = modsConfig.getProperty(qdc);
String pair[] = val.split("\\s+\\|\\s+", 2);
if (pair.length < 2)
{
log.warn("Illegal MODS mapping in " + propsFile.toString() + ", line = " +
qdc + " = " + val);
}
else
{
modsTriple trip = modsTriple.create(qdc, pair[0], pair[1]);
if (trip != null)
{
modsMap.put(qdc, trip);
}
}
}
}
}
/**
* Return the MODS namespace
*/
public Namespace[] getNamespaces()
{
return (Namespace[]) ArrayUtils.clone(namespaces);
}
/**
* Return the MODS schema
*/
public String getSchemaLocation()
{
return schemaLocation;
}
/**
* Returns object's metadata in MODS format, as List of XML structure nodes.
*/
public List<Element> disseminateList(DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
return disseminateListInternal(dso, true);
}
/**
* Disseminate an Item, Collection, or Community to MODS.
*/
public Element disseminateElement(DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
Element root = new Element("mods", MODS_NS);
root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
root.addContent(disseminateListInternal(dso,false));
return root;
}
private List<Element> disseminateListInternal(DSpaceObject dso, boolean addSchema)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
DCValue[] dcvs = null;
if (dso.getType() == Constants.ITEM)
{
dcvs = item2Metadata((Item) dso);
}
else if (dso.getType() == Constants.COLLECTION)
{
dcvs = collection2Metadata((Collection) dso);
}
else if (dso.getType() == Constants.COMMUNITY)
{
dcvs = community2Metadata((Community) dso);
}
else if (dso.getType() == Constants.SITE)
{
dcvs = site2Metadata((Site) dso);
}
else
{
throw new CrosswalkObjectNotSupported(
"MODSDisseminationCrosswalk can only crosswalk Items, Collections, or Communities");
}
initMap();
List<Element> result = new ArrayList<Element>(dcvs.length);
for(int i=0; i < dcvs.length; i++)
{
String qdc = dcvs[i].schema + "." + dcvs[i].element;
if (dcvs[i].qualifier != null)
{
qdc += "." + dcvs[i].qualifier;
}
String value = dcvs[i].value;
modsTriple trip = modsMap.get(qdc);
if (trip == null)
{
log.warn("WARNING: " + getPluginInstanceName() + ": No MODS mapping for \"" + qdc + "\"");
}
else
{
try
{
Element me = (Element)trip.xml.clone();
if (addSchema)
{
me.setAttribute("schemaLocation", schemaLocation, XSI_NS);
}
Iterator ni = trip.xpath.selectNodes(me).iterator();
if (!ni.hasNext())
{
log.warn("XPath \"" + trip.xpath.getXPath() +
"\" found no elements in \"" +
outputUgly.outputString(me) +
"\", qdc=" + qdc);
}
while (ni.hasNext())
{
Object what = ni.next();
if (what instanceof Element)
{
((Element) what).setText(checkedString(value));
}
else if (what instanceof Attribute)
{
((Attribute) what).setValue(checkedString(value));
}
else if (what instanceof Text)
{
((Text) what).setText(checkedString(value));
}
else
{
log.warn("Got unknown object from XPath, class=" + what.getClass().getName());
}
}
result.add(me);
}
catch (JDOMException je)
{
log.error("Error following XPath in modsTriple: context="+
outputUgly.outputString(trip.xml)+
", xpath="+trip.xpath.getXPath()+", exception="+
je.toString());
}
}
}
return result;
}
/**
* ModsCrosswalk can disseminate: Items, Collections, Communities, and Site.
*/
public boolean canDisseminate(DSpaceObject dso)
{
return (dso.getType() == Constants.ITEM ||
dso.getType() == Constants.COLLECTION ||
dso.getType() == Constants.COMMUNITY ||
dso.getType() == Constants.SITE);
}
/**
* ModsCrosswalk prefer's element form over list.
*/
public boolean preferList()
{
return false;
}
/**
* Generate a list of metadata elements for the given DSpace
* site.
*
* @param site
* The site to derive metadata from
*/
protected DCValue[] site2Metadata(Site site)
{
List<DCValue> metadata = new ArrayList<DCValue>();
String identifier_uri = "http://hdl.handle.net/"
+ site.getHandle();
String title = site.getName();
String url = site.getURL();
if (identifier_uri != null)
{
metadata.add(createDCValue("identifier.uri", null, identifier_uri));
}
//FIXME: adding two URIs for now (site handle and URL), in case site isn't using handles
if (url != null)
{
metadata.add(createDCValue("identifier.uri", null, url));
}
if (title != null)
{
metadata.add(createDCValue("title", null, title));
}
return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]);
}
/**
* Generate a list of metadata elements for the given DSpace
* community.
*
* @param community
* The community to derive metadata from
*/
protected DCValue[] community2Metadata(Community community)
{
List<DCValue> metadata = new ArrayList<DCValue>();
String description = community.getMetadata("introductory_text");
String description_abstract = community
.getMetadata("short_description");
String description_table = community.getMetadata("side_bar_text");
String identifier_uri = "http://hdl.handle.net/"
+ community.getHandle();
String rights = community.getMetadata("copyright_text");
String title = community.getMetadata("name");
if (description != null)
{
metadata.add(createDCValue("description", null, description));
}
if (description_abstract != null)
{
metadata.add(createDCValue("description", "abstract", description_abstract));
}
if (description_table != null)
{
metadata.add(createDCValue("description", "tableofcontents", description_table));
}
if (identifier_uri != null)
{
metadata.add(createDCValue("identifier.uri", null, identifier_uri));
}
if (rights != null)
{
metadata.add(createDCValue("rights", null, rights));
}
if (title != null)
{
metadata.add(createDCValue("title", null, title));
}
return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]);
}
/**
* Generate a list of metadata elements for the given DSpace
* collection.
*
* @param collection
* The collection to derive metadata from
*/
protected DCValue[] collection2Metadata(Collection collection)
{
List<DCValue> metadata = new ArrayList<DCValue>();
String description = collection.getMetadata("introductory_text");
String description_abstract = collection
.getMetadata("short_description");
String description_table = collection.getMetadata("side_bar_text");
String identifier_uri = "http://hdl.handle.net/"
+ collection.getHandle();
String provenance = collection.getMetadata("provenance_description");
String rights = collection.getMetadata("copyright_text");
String rights_license = collection.getMetadata("license");
String title = collection.getMetadata("name");
if (description != null)
{
metadata.add(createDCValue("description", null, description));
}
if (description_abstract != null)
{
metadata.add(createDCValue("description", "abstract", description_abstract));
}
if (description_table != null)
{
metadata.add(createDCValue("description", "tableofcontents", description_table));
}
if (identifier_uri != null)
{
metadata.add(createDCValue("identifier", "uri", identifier_uri));
}
if (provenance != null)
{
metadata.add(createDCValue("provenance", null, provenance));
}
if (rights != null)
{
metadata.add(createDCValue("rights", null, rights));
}
if (rights_license != null)
{
metadata.add(createDCValue("rights.license", null, rights_license));
}
if (title != null)
{
metadata.add(createDCValue("title", null, title));
}
return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]);
}
/**
* Generate a list of metadata elements for the given DSpace item.
*
* @param item
* The item to derive metadata from
*/
protected DCValue[] item2Metadata(Item item)
{
DCValue[] dcvs = item.getMetadata(Item.ANY, Item.ANY, Item.ANY,
Item.ANY);
return dcvs;
}
private DCValue createDCValue(String element, String qualifier, String value) {
DCValue dcv = new DCValue();
dcv.schema = "dc";
dcv.element = element;
dcv.qualifier = qualifier;
dcv.value = value;
return dcv;
}
// check for non-XML characters
private String checkedString(String value)
{
if (value == null)
{
return null;
}
String reason = Verifier.checkCharacterData(value);
if (reason == null)
{
return value;
}
else
{
if (log.isDebugEnabled())
{
log.debug("Filtering out non-XML characters in string, reason=" + reason);
}
StringBuffer result = new StringBuffer(value.length());
for (int i = 0; i < value.length(); ++i)
{
char c = value.charAt(i);
if (Verifier.isXMLCharacter((int)c))
{
result.append(c);
}
}
return result.toString();
}
}
}