/*
* UKETDDCCrosswalk.java
*
* Version: $Revision: 3705 $
*
* Date: $Date: 2009-04-11 17:02:24 +0000 (Sat, 11 Apr 2009) $
*
* Copyright (c) 2002-2009, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.app.oai;
import java.sql.SQLException;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Utils;
import org.dspace.search.HarvestedItemInfo;
import org.dspace.content.*;
import ORG.oclc.oai.server.crosswalk.Crosswalk;
import ORG.oclc.oai.server.verb.CannotDisseminateFormatException;
/**
* A Crosswalk implementation that extracts qualified Dublin Core from
* DSpace items into the uketd_dc format.
*
* It supports the writing of UKETD_DC metadata
* in a METS document and to make the schema URIs available for
* inclusion in such a METS document. For this reason, the writing
* of the metadata itself has been separated from the writing
* of the schemas.
* This version places the writing of the header and metadata
* in its own method called by createMetadata so the headers are
* included in the UKETD_METS that also uses those methods.
* This allows the writeMetadata method to remain unchanged,
* with no header information included. It is therefore consistent with
* other DSpace crosswalks.
*
* @author Paul Needham (Cranfield University)
* @author Jon Bell & Stuart Lewis (Aberystwyth University)
*/
public class UKETDDCCrosswalk extends Crosswalk
{
// Pattern containing all the characters we want to filter out / replace
// converting a String to xml
private static final Pattern invalidXmlPattern =
Pattern.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])");
// String constants for metadata schemas...
/** Used to open the metadata in a OAI-PMH record. */
private String uketdIn = "<uketd_dc:uketddc";
/** The identifier for the uketd namespace. */
private String uketdNs = "uketd_dc";
/** The URI of the uketd namespace. */
private String uketdUri = "http://naca.central.cranfield.ac.uk/ethos-oai/2.0/";
/** The identifier for the namespace of the DC used in the UKETD_DC metadata set. */
private String dcNs = "dc";
/** The URI of the DC namespace. */
private String dcUri = "http://purl.org/dc/elements/1.1/";
/** The identifier for the namespace of the qualified DC terms used in UKETD_DC. */
private String dcTermsNs = "dcterms";
/** The URI of the DC terms namespace. */
private String dcTermsUri = "http://purl.org/dc/terms/";
/** Identifier of the UKETD terms namespace.*/
private String uketdTermsNs = "uketdterms";
/** The URI of the uketd terms namespace. */
private String uketdTermsUri = "http://naca.central.cranfield.ac.uk/ethos-oai/terms/";
/** The xsi string (identifier and URI) used for UKETD records.*/
private String xsi = "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"";
/** The xsi schema location tag, used in UKETD records. */
private String schemaLoc = "xsi:schemaLocation";
/** The URI of the uketd location namespace. */
private static String uketdSchemaLocNs = "http://naca.central.cranfield.ac.uk/ethos-oai/2.0/";
/** The URI of the uketd location. */
private static String uketdSchemaLocUri = "http://naca.central.cranfield.ac.uk/ethos-oai/2.0/uketd_dc.xsd";
/**
* UKETDDCCrosswalk contructor.
*
* @param properties Not used
* */
public UKETDDCCrosswalk(Properties properties)
{
super(uketdSchemaLocNs + " " + uketdSchemaLocUri);
}
/**
* Returns the identifier for the UKETD namespace.
*
* @return uketdNs
*/
public String getUketdNs ()
{
return uketdNs;
}
/**
* Returns the URI of the UKETD namespace.
*
* @return uketdUri
*/
public String getUketdUri ()
{
return uketdUri;
}
/**
* Returns the identifier for the Dublin Core namespace.
*
* @return dcNs
*/
public String getDcNs ()
{
return dcNs;
}
/**
* Returns the URI of the Dublin Core namespace.
*
* @return dcUri
*/
public String getDcUri ()
{
return dcUri;
}
/**
* Returns the identifier for the DC terms (qualifiers) namespace.
*
* @return cdTermsNs
*/
public String getDcTermsNs ()
{
return dcTermsNs;
}
/**
* Returns the URI of the DC terms namespace.
*
* @return dcTermsUri
*/
public String getDcTermsUri ()
{
return dcTermsUri;
}
/**
* Returns the identifier for the UKETD terms namespace.
*
* @return uketdTermsNs
*/
public String getUketdTermsNs ()
{
return uketdTermsNs;
}
/**
* Returns the URI of the UKETD terms namespace.
*
* @return uketdTermsUri
*/
public String getUketdTermsUri ()
{
return uketdTermsUri;
}
/**
* Returns the identifier for the UKETD schema location.
*
* @return uketdSchemaLocNs
*/
public String getUketdSchemaLocNs ()
{
return uketdSchemaLocNs;
}
/**
* Returns the URI of the UKETD schema location.
*
* @return uketdSchemaLocUri
*/
public String getUketdSchemaLocUri ()
{
return uketdSchemaLocUri;
}
/**
* Shows what items UKETD_DC OAI-PMH is available for.
* This is every item in the repository.
*
* @return a boolean (true)
*/
public boolean isAvailableFor(Object nativeItem)
{
// We have DC for everything
return true;
}
/**
* Creates the metadata necessary for UKTEDDC crosswalk.
* Adds the name space details and schemas to the metadata itself.
* It therefore creates a complete OAI-PMH record that matches
* the UKETD_DC metadata prefix.
*
* @return The OAI-PMH xml
*/
public String createMetadata (Object nativeItem)
throws CannotDisseminateFormatException
{
// Get the Item
Item item = ((HarvestedItemInfo) nativeItem).item;
// Write the record out
return writeMetadataWithSchema(item);
}
/**
* Write the item's metadata, headed by the schema namespace
* details. Separated from createMetadata, so UKETD_METS can
* use the method with an Item , not an Object (nativeItem).
*
* @param item The org.dspace.content.Item
* @return a String, the item's metadata in UKETD_DC format.
* @throws SQLException
*/
public String writeMetadataWithSchema (Item item)
{
StringBuffer metadata = new StringBuffer ();
metadata.append (uketdIn + " ");
metadata.append ("xmlns:" + uketdNs + "=\"" + uketdUri + "\" ");
metadata.append ("xmlns:" + dcNs + "=\"" + dcUri + "\" ");
metadata.append ("xmlns:" + dcTermsNs + "=\"" + dcTermsUri + "\" ");
metadata.append ("xmlns:" + uketdTermsNs + "=\"" + uketdTermsUri + "\" ");
metadata.append (xsi + " ");
metadata.append (schemaLoc + "=\"" + uketdSchemaLocNs + " ");
metadata.append (uketdSchemaLocUri + "\">\n");
metadata.append (writeMetadata (item));
metadata.append ("</uketd_dc:uketddc>\n");
return metadata.toString ( );
}
/**
* Writes the UKETD_DC metadata for the specified item.
* It simply gets hold of the Dublin Core for an Item
* and converts it to UKEDT_DC, including the splitting
* of the Dublin Core publisher and type fields.
* The metadata is identical to that returned by
* the original version's create metadata method,
* without the schema information.
* This method does no checking of the correctness of the
* metadata format, nor does it throw any exception.
*
* @param item a org.dspace.content.Item
* @return a String, the item's metadata in UKETD_DC xml.
*/
public String writeMetadata(Item item)
{
// The string we are constructing
StringBuffer metadata = new StringBuffer();
// Get all the DC
DCValue[] allDC = item.getMetadata(MetadataSchema.DC_SCHEMA, Item.ANY, Item.ANY, Item.ANY);
// Get the handle of the item
String itemhandle = item.getHandle();
for (int i = 0; i < allDC.length; i++)
{
// Get the element, qualifier and value
String element = allDC[i].element;
String qualifier = allDC[i].qualifier;
String value = Utils.addEntities(allDC[i].value);
// title
if (allDC[i].element.equals("title"))
{
if (allDC[i].qualifier != null) {
if (allDC[i].qualifier.equals("alternative"))
{
// title.alternative exposed as 'dcterms:alternative'
this.makeDCTermsElement(qualifier, null, value, metadata);
}
} else
{
this.makeDCElement(element, null, value, metadata);
}
}
// contributor
if (allDC[i].element.equals("contributor"))
{
if (allDC[i].qualifier != null) {
if (allDC[i].qualifier.equals("author"))
{
this.makeDCElement("creator", null, value, metadata);
} else if ((allDC[i].qualifier.equals("advisor")) ||
(allDC[i].qualifier.equals("sponsor")))
{
// contributor.qualifier exposed as 'uketdterms:qualifier'
this.makeUKDCTermsElement(qualifier, null, value, metadata);
} else if (allDC[i].qualifier.equals("funder"))
{
// contributor.qualifier exposed as 'uketdterms:qualifier'
this.makeUKDCTermsElement("sponsor", null, value, metadata);
} else
{
// contributor.qualifier exposed as 'dcterms:qualifier'
this.makeDCTermsElement(qualifier, null, value, metadata);
}
} else {
this.makeDCElement(element, null, value, metadata);
}
}
// subject
if (allDC[i].element.equals("subject"))
{
if (allDC[i].qualifier != null) {
boolean ddc = allDC[i].qualifier.equals("ddc");
boolean lcc = allDC[i].qualifier.equals("lcc");
boolean lcsh = allDC[i].qualifier.equals("lcsh");
boolean mesh = allDC[i].qualifier.equals("mesh");
boolean udc = allDC[i].qualifier.equals("udc");
if (ddc || lcc || lcsh || mesh || udc)
{
// subject.qualifier exposed as 'dc:element xsi:type="dcterms:qualifier"'
qualifier = qualifier.toUpperCase();
this.makeDCElement(element, qualifier, value, metadata);
} else
{
this.makeDCElement(element, null, value, metadata);
}
} else
{
this.makeDCElement(element, null, value, metadata);
}
}
// description
if (allDC[i].element.equals("description"))
{
if (allDC[i].qualifier != null)
{
if (allDC[i].qualifier.equals("abstract"))
{
// e.g. description.abstract exposed as 'dcterms:abstract'
this.makeDCTermsElement(qualifier, null, value, metadata);
} else if (allDC[i].qualifier.equals("sponsorship"))
{
// description.sponsorship exposed as 'uketdterms:sponsor"'
this.makeUKDCTermsElement("sponsor", null, value, metadata);
}
} else {
this.makeDCElement(element, null, value, metadata);
}
}
// publisher
if (allDC[i].element.equals("publisher"))
{
if (allDC[i].qualifier != null) {
if ((allDC[i].qualifier.equals("department")) ||
(allDC[i].qualifier.equals("commercial")))
{
this.makeUKDCTermsElement(qualifier, null, value, metadata);
}
} else {
String[] pubParts = value.split("(?<!(&[0-9a-zA-Z#]{2,4}));");
this.makeUKDCTermsElement("institution", null,
pubParts[0], metadata);
StringBuffer dept = new StringBuffer();
if ((pubParts.length > 1) && (pubParts[1] != null)) {
dept.append(pubParts[1] + ";");
}
if ((pubParts.length > 2) && (pubParts[2] != null)) {
dept.append(" " + pubParts[2]);
}
if (dept.toString().length() > 0) {
this.makeUKDCTermsElement("department", null,
dept.toString(), metadata);
}
}
}
// date
if (allDC[i].element.equals("date"))
{
if (allDC[i].qualifier != null)
{
if (allDC[i].qualifier.equals("issued"))
{
this.makeDCTermsElement(qualifier, null, value, metadata);
} else
{
this.makeDCElement(element, null, value, metadata);
}
} else
{
this.makeDCElement(element, null, value, metadata);
}
}
// type
if (allDC[i].element.equals("type"))
{
if (allDC[i].qualifier != null)
{
if ((allDC[i].qualifier.equals("qualificationlevel")) ||
(allDC[i].qualifier.equals("qualificationname")))
{
this.makeUKDCTermsElement(qualifier, null, value, metadata);
}
} else {
String[] Typepart = value.split("[;]");
this.makeDCElement(element, null, Typepart[0], metadata);
if ((Typepart.length > 1) && (Typepart[1] != null))
{
this.makeUKDCTermsElement("qualificationlevel", null,
Typepart[1], metadata);
} if ((Typepart.length > 2) && (Typepart[2] != null))
{
this.makeUKDCTermsElement("qualificationname", null,
Typepart[2], metadata);
}
}
}
// language
if (allDC[i].element.equals("language"))
{
if (allDC[i].qualifier != null) {
if (allDC[i].qualifier.equals("iso"))
{
// language.iso exposed as 'dc:element xsi:type="dcterms:qualifier"'
this.makeDCElement(element, "ISO639-2", value, metadata);
} else
{
this.makeDCElement(element, null, value, metadata);
}
} else
{
this.makeDCElement(element, null, value, metadata);
}
}
// relation
if (allDC[i].element.equals("relation"))
{
if (allDC[i].qualifier != null) {
if (allDC[i].qualifier.equals("hasversion"))
{
// relation.hasversion exposed as 'dcterms:qualifier'
this.makeDCElement("hasVersion", null, value, metadata);
} else if ((allDC[i].qualifier.equals("references")) ||
(allDC[i].qualifier.equals("requires")))
{
// relation.references exposed as 'dcterms:qualifier'
this.makeDCTermsElement(qualifier, null, value, metadata);
} else
{
this.makeDCElement(element, null, value, metadata);
}
} else
{
this.makeDCElement(element, null, value, metadata);
}
}
// format
if (allDC[i].element.equals("format"))
{
if (allDC[i].qualifier != null)
{
if (allDC[i].qualifier.equals("extent"))
{
// format exposed as 'dcterms:qualifier'
this.makeDCTermsElement(qualifier, null, value, metadata);
} else if (allDC[i].qualifier.equals("mimetype"))
{
this.makeDCElement(element, "IMT", value, metadata);
}
} else
{
// format exposed as 'dc:element'
this.makeDCElement(element, null, value, metadata);
}
}
// identifier
if (allDC[i].element.equals("identifier"))
{
if (allDC[i].qualifier != null)
{
if (allDC[i].qualifier.equals("uri"))
{
this.makeDCTermsElement("isReferencedBy", "URI", value, metadata);
} else if (allDC[i].qualifier.equals("citation"))
{
this.makeDCTermsElement("hasVersion", null, value, metadata);
} else if (allDC[i].qualifier.equals("grantnumber"))
{
this.makeUKDCTermsElement(qualifier, null, value, metadata);
}
} else
{
// identifier exposed as 'dc:element'
this.makeDCElement(element, null, value, metadata);
}
}
// rights
if (allDC[i].element.equals("rights"))
{
if (allDC[i].qualifier != null)
{
if ((allDC[i].qualifier.equals("embargodate")) ||
(allDC[i].qualifier.equals("embargoreason")))
{
this.makeUKDCTermsElement(qualifier, null, value, metadata);
} else
{
// rights exposed as 'dc:element'
this.makeDCElement(element, null, value, metadata);
}
} else
{
// rights exposed as 'dc:element'
this.makeDCElement(element, null, value, metadata);
}
}
}
// Generate bitstream URIs
Bundle[] bundles = {};
try
{
bundles = item.getBundles("ORIGINAL");
String url;
if (bundles.length > 0)
{
// Itterate through each bundle
for (int i = 0; i < bundles.length; i++)
{
// Itterate through each bitstream
Bitstream[] bitstreams = bundles[i].getBitstreams();
for (int k = 0; k < bitstreams.length ; k++)
{
// Skip internal types
if (!bitstreams[k].getFormat().isInternal())
{
url = ConfigurationManager.getProperty("dspace.url") +
"/bitstream/" + itemhandle + "/" +
bitstreams[k].getSequenceID() + "/" +
bitstreams[k].getName();
this.makeDCElement("identifier", "URI", url, metadata);
this.makeUKDCTermsElement("checksum",
bitstreams[k].getChecksumAlgorithm(),
bitstreams[k].getChecksum(), metadata);
}
}
}
}
} catch (SQLException sqle)
{
// Nothing we can do
}
// Return the metadata - all done!
return metadata.toString();
}
/**
* Private wrapper method to create a DC term element.
*
* @param element The element name
* @param qualifier The qualifier name (or null)
* @param value The value of the element
* @param buffer The buffer to add the element to
* @return The buffer with the new element appended to
*/
private StringBuffer makeDCElement(String element, String qualifier,
String value, StringBuffer buffer)
{
return this.makeTermsElement(element, qualifier, value,
buffer, "dc", "dcterms");
}
/**
* Private wrapper method to create a DCterms term element.
*
* @param element The element name
* @param qualifier The qualifier name (or null)
* @param value The value of the element
* @param buffer The buffer to add the element to
* @return The buffer with the new element appended to
*/
private StringBuffer makeDCTermsElement(String element, String qualifier,
String value, StringBuffer buffer)
{
return this.makeTermsElement(element, qualifier, value,
buffer, "dcterms", "dcterms");
}
/**
* Private wrapper method to create a UKETD DC term element.
*
* @param element The element name
* @param qualifier The qualifier name (or null)
* @param value The value of the element
* @param buffer The buffer to add the element to
* @return The buffer with the new element appended to
*/
private StringBuffer makeUKDCTermsElement(String element, String qualifier,
String value, StringBuffer buffer)
{
return this.makeTermsElement(element, qualifier, value,
buffer, "uketdterms", "uketdterms");
}
/**
* Private wrapper method to create an element.
*
* @param element The element name
* @param qualifier The qualifier name (or null)
* @param value The value of the element
* @param buffer The buffer to add the element to
* @param terms The namespace of the term
* @return The buffer with the new element appended to
*/
private StringBuffer makeTermsElement(String element, String qualifier,
String value, StringBuffer buffer,
String namespace, String terms)
{
// Escape XML chars <, > and &
// Also replace all invalid characters with ' '
if (value != null)
{
StringBuffer valueBuf = new StringBuffer(value.length());
Matcher xmlMatcher = invalidXmlPattern.matcher(value.trim());
while (xmlMatcher.find())
{
String group = xmlMatcher.group();
// group will either contain a character that we need to encode for xml
// (ie. <, > or &), or it will be an invalid character
// test the contents and replace appropriately
if (group.equals("&"))
xmlMatcher.appendReplacement(valueBuf, "&");
else if (group.equals("<"))
xmlMatcher.appendReplacement(valueBuf, "<");
else if (group.equals(">"))
xmlMatcher.appendReplacement(valueBuf, ">");
else
xmlMatcher.appendReplacement(valueBuf, " ");
}
// add bit of the string after the final match
xmlMatcher.appendTail(valueBuf);
if (qualifier == null)
{
buffer.append("<" + namespace + ":" + element + ">" +
valueBuf.toString() +
"</" + namespace + ":" + element + ">\n");
} else
{
buffer.append("<" + namespace + ":" + element +
" xsi:type=\"" + terms + ":" + qualifier + "\">" +
valueBuf.toString() +
"</" + namespace + ":" + element + ">\n");
}
} else
{
buffer.append("<" + namespace + ":" + element + " />\n");
}
// Return the updated buffer
return buffer;
}
}