/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.*;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.CommunityService;
import org.dspace.content.service.ItemService;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.SelfNamedPlugin;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.Text;
import org.jdom.Verifier;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;
/**
* Configurable MODS Crosswalk
* <p>
* This class supports multiple dissemination crosswalks from DSpace
* internal data to the MODS XML format
* (see <a href="http://www.loc.gov/standards/mods/">http://www.loc.gov/standards/mods/</a>.)
* <p>
* It registers multiple Plugin names, which it reads from
* the DSpace configuration as follows:
*
* <h3>Configuration</h3>
* Every key starting with <code>"crosswalk.mods.properties."</code> describes a
* MODS crosswalk. Everything after the last period is the <em>plugin name</em>,
* and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>)
* of the crosswalk configuration file.
* <p>
* You can have two names point to the same crosswalk,
* just add two configuration entries with the same value, e.g.
* <pre>
* crosswalk.mods.properties.MODS = crosswalks/mods.properties
* crosswalk.mods.properties.default = crosswalks/mods.properties
* </pre>
* The first line creates a plugin with the name <code>"MODS"</code>
* which is configured from the file <em>dspace-dir</em><code>/config/crosswalks/mods.properties</code>.
* <p>
* Since there is significant overhead in reading the properties file to
* configure the crosswalk, and a crosswalk instance may be used any number
* of times, we recommend caching one instance of the crosswalk for each
* name and simply reusing those instances. The PluginService does this
* by default.
*
* @author Larry Stone
* @author Scott Phillips
* @version $Revision$
*/
public class MODSDisseminationCrosswalk extends SelfNamedPlugin
implements DisseminationCrosswalk
{
/** log4j category */
private static Logger log = Logger.getLogger(MODSDisseminationCrosswalk.class);
private static final String CONFIG_PREFIX = "crosswalk.mods.properties.";
protected final CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();
protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
/**
* Fill in the plugin alias table from DSpace configuration entries
* for configuration files for flavors of MODS crosswalk:
*/
private static String aliases[] = null;
static
{
List<String> aliasList = new ArrayList<String>();
Enumeration<String> pe = (Enumeration<String>)ConfigurationManager.propertyNames();
while (pe.hasMoreElements())
{
String key = pe.nextElement();
if (key.startsWith(CONFIG_PREFIX))
{
aliasList.add(key.substring(CONFIG_PREFIX.length()));
}
}
aliases = (String[])aliasList.toArray(new String[aliasList.size()]);
}
public static String[] getPluginNames()
{
return (String[]) ArrayUtils.clone(aliases);
}
/**
* MODS namespace.
*/
public static final Namespace MODS_NS =
Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3");
private static final Namespace XLINK_NS =
Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");
private static final Namespace namespaces[] = { MODS_NS, XLINK_NS };
/** URL of MODS XML Schema */
public static final String MODS_XSD =
"http://www.loc.gov/standards/mods/v3/mods-3-1.xsd";
private static final String schemaLocation =
MODS_NS.getURI()+" "+MODS_XSD;
private static XMLOutputter outputUgly = new XMLOutputter();
private static SAXBuilder builder = new SAXBuilder();
private Map<String, modsTriple> modsMap = null;
/**
* Container for crosswalk mapping: expressed as "triple" of:
* 1. QDC field name (really field.qualifier).
* 2. XML subtree to add to MODS record.
* 3. XPath expression showing places to plug in the value.
*/
static class modsTriple
{
public String qdc = null;
public Element xml = null;
public XPath xpath = null;
/**
* Initialize from text versions of QDC, XML and XPath.
* The DC stays a string; parse the XML with appropriate
* namespaces; "compile" the XPath.
*/
public static modsTriple create(String qdc, String xml, String xpath)
{
modsTriple result = new modsTriple();
final String prolog = "<mods xmlns:"+MODS_NS.getPrefix()+"=\""+MODS_NS.getURI()+"\" "+
"xmlns:"+XLINK_NS.getPrefix()+"=\""+XLINK_NS.getURI()+"\">";
final String postlog = "</mods>";
try
{
result.qdc = qdc;
result.xpath = XPath.newInstance(xpath);
result.xpath.addNamespace(MODS_NS.getPrefix(), MODS_NS.getURI());
result.xpath.addNamespace(XLINK_NS);
Document d = builder.build(new StringReader(prolog+xml+postlog));
result.xml = (Element)d.getRootElement().getContent(0);
}
catch (JDOMException je)
{
log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString());
return null;
}
catch (IOException je)
{
log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString());
return null;
}
return result;
}
}
/**
* Initialize Crosswalk table from a properties file
* which itself is the value of the DSpace configuration property
* "crosswalk.mods.properties.X", where "X" is the alias name of this instance.
* Each instance may be configured with a separate mapping table.
*
* The MODS crosswalk configuration properties follow the format:
*
* {field-name} = {XML-fragment} | {XPath}
*
* 1. qualified DC field name is of the form
* {MDschema}.{element}.{qualifier}
*
* e.g. dc.contributor.author
*
* 2. XML fragment is prototype of metadata element, with empty or "%s"
* placeholders for value(s). NOTE: Leave the %s's in becaue
* it's much easier then to see if something is broken.
*
* 3. XPath expression listing point(s) in the above XML where
* the value is to be inserted. Context is the element itself.
*
* Example properties line:
*
* dc.description.abstract = <mods:abstract>%s</mods:abstract> | text()
*
*/
private void initMap()
throws CrosswalkInternalException
{
if (modsMap != null)
{
return;
}
String myAlias = getPluginInstanceName();
if (myAlias == null)
{
log.error("Must use PluginService to instantiate MODSDisseminationCrosswalk so the class knows its name.");
return;
}
String cmPropName = CONFIG_PREFIX+myAlias;
String propsFilename = ConfigurationManager.getProperty(cmPropName);
if (propsFilename == null)
{
String msg = "MODS crosswalk missing "+
"configuration file for crosswalk named \""+myAlias+"\"";
log.error(msg);
throw new CrosswalkInternalException(msg);
}
else
{
String parent = ConfigurationManager.getProperty("dspace.dir") +
File.separator + "config" + File.separator;
File propsFile = new File(parent, propsFilename);
Properties modsConfig = new Properties();
FileInputStream pfs = null;
try
{
pfs = new FileInputStream(propsFile);
modsConfig.load(pfs);
}
catch (IOException e)
{
log.error("Error opening or reading MODS properties file: "+propsFile.toString()+": "+e.toString());
throw new CrosswalkInternalException("MODS crosswalk cannot "+
"open config file: "+e.toString(), e);
}
finally
{
if (pfs != null)
{
try
{
pfs.close();
}
catch (IOException ioe)
{
}
}
}
modsMap = new HashMap<String, modsTriple>();
Enumeration<String> pe = (Enumeration<String>)modsConfig.propertyNames();
while (pe.hasMoreElements())
{
String qdc = pe.nextElement();
String val = modsConfig.getProperty(qdc);
String pair[] = val.split("\\s+\\|\\s+", 2);
if (pair.length < 2)
{
log.warn("Illegal MODS mapping in " + propsFile.toString() + ", line = " +
qdc + " = " + val);
}
else
{
modsTriple trip = modsTriple.create(qdc, pair[0], pair[1]);
if (trip != null)
{
modsMap.put(qdc, trip);
}
}
}
}
}
/**
* Return the MODS namespace
*/
@Override
public Namespace[] getNamespaces()
{
return (Namespace[]) ArrayUtils.clone(namespaces);
}
/**
* Return the MODS schema
*/
@Override
public String getSchemaLocation()
{
return schemaLocation;
}
/**
* Returns object's metadata in MODS format, as List of XML structure nodes.
* @param context context
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
@Override
public List<Element> disseminateList(Context context, DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
return disseminateListInternal(dso, true);
}
/**
* Disseminate an Item, Collection, or Community to MODS.
* @param context context
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
@Override
public Element disseminateElement(Context context, DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
Element root = new Element("mods", MODS_NS);
root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
root.addContent(disseminateListInternal(dso,false));
return root;
}
private List<Element> disseminateListInternal(DSpaceObject dso, boolean addSchema)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
List<MockMetadataValue> dcvs = null;
if (dso.getType() == Constants.ITEM)
{
dcvs = item2Metadata((Item) dso);
}
else if (dso.getType() == Constants.COLLECTION)
{
dcvs = collection2Metadata((Collection) dso);
}
else if (dso.getType() == Constants.COMMUNITY)
{
dcvs = community2Metadata((Community) dso);
}
else if (dso.getType() == Constants.SITE)
{
dcvs = site2Metadata((Site) dso);
}
else
{
throw new CrosswalkObjectNotSupported(
"MODSDisseminationCrosswalk can only crosswalk Items, Collections, or Communities");
}
initMap();
List<Element> result = new ArrayList<Element>(dcvs.size());
for (MockMetadataValue dcv : dcvs)
{
String qdc = dcv.getSchema() + "." + dcv.getElement();
if (dcv.getQualifier() != null)
{
qdc += "." + dcv.getQualifier();
}
String value = dcv.getValue();
modsTriple trip = modsMap.get(qdc);
if (trip == null) {
log.warn("WARNING: " + getPluginInstanceName() + ": No MODS mapping for \"" + qdc + "\"");
} else {
try {
Element me = (Element) trip.xml.clone();
if (addSchema) {
me.setAttribute("schemaLocation", schemaLocation, XSI_NS);
}
Iterator ni = trip.xpath.selectNodes(me).iterator();
if (!ni.hasNext()) {
log.warn("XPath \"" + trip.xpath.getXPath() +
"\" found no elements in \"" +
outputUgly.outputString(me) +
"\", qdc=" + qdc);
}
while (ni.hasNext()) {
Object what = ni.next();
if (what instanceof Element) {
((Element) what).setText(checkedString(value));
} else if (what instanceof Attribute) {
((Attribute) what).setValue(checkedString(value));
} else if (what instanceof Text) {
((Text) what).setText(checkedString(value));
} else {
log.warn("Got unknown object from XPath, class=" + what.getClass().getName());
}
}
result.add(me);
} catch (JDOMException je) {
log.error("Error following XPath in modsTriple: context=" +
outputUgly.outputString(trip.xml) +
", xpath=" + trip.xpath.getXPath() + ", exception=" +
je.toString());
}
}
}
return result;
}
/**
* ModsCrosswalk can disseminate: Items, Collections, Communities, and Site.
*/
@Override
public boolean canDisseminate(DSpaceObject dso)
{
return (dso.getType() == Constants.ITEM ||
dso.getType() == Constants.COLLECTION ||
dso.getType() == Constants.COMMUNITY ||
dso.getType() == Constants.SITE);
}
/**
* ModsCrosswalk prefer's element form over list.
*/
@Override
public boolean preferList()
{
return false;
}
/**
* Generate a list of metadata elements for the given DSpace
* site.
*
* @param site
* The site to derive metadata from
* @return list of metadata
*/
protected List<MockMetadataValue> site2Metadata(Site site)
{
List<MockMetadataValue> metadata = new ArrayList<>();
String identifier_uri = "http://hdl.handle.net/"
+ site.getHandle();
String title = site.getName();
String url = site.getURL();
if (identifier_uri != null)
{
metadata.add(createDCValue("identifier.uri", null, identifier_uri));
}
//FIXME: adding two URIs for now (site handle and URL), in case site isn't using handles
if (url != null)
{
metadata.add(createDCValue("identifier.uri", null, url));
}
if (title != null)
{
metadata.add(createDCValue("title", null, title));
}
return metadata;
}
/**
* Generate a list of metadata elements for the given DSpace
* community.
*
* @param community
* The community to derive metadata from
* @return list of metadata
*/
protected List<MockMetadataValue> community2Metadata(Community community)
{
List<MockMetadataValue> metadata = new ArrayList<>();
String description = communityService.getMetadata(community, "introductory_text");
String description_abstract = communityService.getMetadata(community, "short_description");
String description_table = communityService.getMetadata(community,"side_bar_text");
String identifier_uri = "http://hdl.handle.net/"
+ community.getHandle();
String rights = communityService.getMetadata(community,"copyright_text");
String title = communityService.getMetadata(community,"name");
metadata.add(createDCValue("description", null, description));
if (description_abstract != null)
{
metadata.add(createDCValue("description", "abstract", description_abstract));
}
if (description_table != null)
{
metadata.add(createDCValue("description", "tableofcontents", description_table));
}
if (identifier_uri != null)
{
metadata.add(createDCValue("identifier.uri", null, identifier_uri));
}
if (rights != null)
{
metadata.add(createDCValue("rights", null, rights));
}
if (title != null)
{
metadata.add(createDCValue("title", null, title));
}
return metadata;
}
/**
* Generate a list of metadata elements for the given DSpace
* collection.
*
* @param collection
* The collection to derive metadata from
* @return list of metadata
*/
protected List<MockMetadataValue> collection2Metadata(Collection collection)
{
List<MockMetadataValue> metadata = new ArrayList<>();
String description = collectionService.getMetadata(collection, "introductory_text");
String description_abstract = collectionService.getMetadata(collection, "short_description");
String description_table = collectionService.getMetadata(collection, "side_bar_text");
String identifier_uri = "http://hdl.handle.net/"
+ collection.getHandle();
String provenance = collectionService.getMetadata(collection, "provenance_description");
String rights = collectionService.getMetadata(collection, "copyright_text");
String rights_license = collectionService.getMetadata(collection, "license");
String title = collectionService.getMetadata(collection, "name");
if (description != null)
{
metadata.add(createDCValue("description", null, description));
}
if (description_abstract != null)
{
metadata.add(createDCValue("description", "abstract", description_abstract));
}
if (description_table != null)
{
metadata.add(createDCValue("description", "tableofcontents", description_table));
}
if (identifier_uri != null)
{
metadata.add(createDCValue("identifier", "uri", identifier_uri));
}
if (provenance != null)
{
metadata.add(createDCValue("provenance", null, provenance));
}
if (rights != null)
{
metadata.add(createDCValue("rights", null, rights));
}
if (rights_license != null)
{
metadata.add(createDCValue("rights.license", null, rights_license));
}
if (title != null)
{
metadata.add(createDCValue("title", null, title));
}
return metadata;
}
/**
* Generate a list of metadata elements for the given DSpace item.
*
* @param item
* The item to derive metadata from
* @return list of metadata
*/
protected List<MockMetadataValue> item2Metadata(Item item)
{
List<MetadataValue> dcvs = itemService.getMetadata(item, Item.ANY, Item.ANY, Item.ANY,
Item.ANY);
List<MockMetadataValue> result = new ArrayList<>();
for (MetadataValue metadataValue : dcvs) {
result.add(new MockMetadataValue(metadataValue));
}
return result;
}
protected MockMetadataValue createDCValue(String element, String qualifier, String value) {
MockMetadataValue dcv = new MockMetadataValue();
dcv.setSchema("dc");
dcv.setElement(element);
dcv.setQualifier(qualifier);
dcv.setValue(value);
return dcv;
}
// check for non-XML characters
private String checkedString(String value)
{
if (value == null)
{
return null;
}
String reason = Verifier.checkCharacterData(value);
if (reason == null)
{
return value;
}
else
{
if (log.isDebugEnabled())
{
log.debug("Filtering out non-XML characters in string, reason=" + reason);
}
StringBuffer result = new StringBuffer(value.length());
for (int i = 0; i < value.length(); ++i)
{
char c = value.charAt(i);
if (Verifier.isXMLCharacter((int)c))
{
result.append(c);
}
}
return result.toString();
}
}
}