/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;
import java.io.IOException;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.FormatIdentifier;
import org.dspace.content.Item;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.jdom.Element;
import org.jdom.Namespace;
/**
* PREMIS Crosswalk
* <p>
* Translate between DSpace Bitstream properties and PREMIS metadata format
* (see <a href="http://www.oclc.org/research/projects/pmwg/">
* http://www.oclc.org/research/projects/pmwg/</a> for details).
* This is intended to implement the requirements of the DSpace METS SIP
* specification for both ingest and dissemination.
*
* @author Larry Stone
* @version $Revision$
*/
public class PREMISCrosswalk
implements IngestionCrosswalk, DisseminationCrosswalk
{
/** log4j category */
private static Logger log = Logger.getLogger(PREMISCrosswalk.class);
private static final Namespace PREMIS_NS =
Namespace.getNamespace("premis", "http://www.loc.gov/standards/premis");
// XML schemaLocation fragment for this crosswalk, from config.
private String schemaLocation =
PREMIS_NS.getURI()+" http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd";
private static final Namespace namespaces[] = { PREMIS_NS };
/*----------- Submission functions -------------------*/
public void ingest(Context context, DSpaceObject dso, Element root)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
if (!(root.getName().equals("premis")))
{
throw new MetadataValidationException("Wrong root element for PREMIS: " + root.toString());
}
ingest(context, dso, root.getChildren());
}
public void ingest(Context context, DSpaceObject dso, List<Element> ml)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
// we only understand how to crosswalk PREMIS to a Bitstream.
if (dso.getType() != Constants.BITSTREAM)
{
throw new CrosswalkObjectNotSupported("Wrong target object type, PREMISCrosswalk can only crosswalk to a Bitstream.");
}
Bitstream bitstream = (Bitstream)dso;
String MIMEType = null;
String bsName = null;
for (Element me : ml)
{
if (me.getName().equals("premis"))
{
// if we're fed a <premis> wrapper object, recurse on its guts:
ingest(context, dso, me.getChildren());
}
else if (me.getName().equals("object"))
{
// "object" section:
// originalName becomes new bitstream source and (default) name
Element on = me.getChild("originalName", PREMIS_NS);
if (on != null)
{
bsName = on.getTextTrim();
}
// Reconcile technical metadata with bitstream content;
// check that length and message digest (checksum) match.
// XXX FIXME: wait for Checksum Checker code to add better test.
Element oc = me.getChild("objectCharacteristics", PREMIS_NS);
if (oc != null)
{
String ssize = oc.getChildTextTrim("size", PREMIS_NS);
if (ssize != null)
{
try
{
int size = Integer.parseInt(ssize);
if (bitstream.getSize() != size)
{
throw new MetadataValidationException(
"Bitstream size (" + String.valueOf(bitstream.getSize()) +
") does not match size in PREMIS (" + ssize + "), rejecting it.");
}
}
catch (NumberFormatException ne)
{
throw new MetadataValidationException("Bad number value in PREMIS object/objectCharacteristics/size: "+ssize, ne);
}
}
Element fixity = oc.getChild("fixity", PREMIS_NS);
if (fixity != null)
{
String alg = fixity.getChildTextTrim("messageDigestAlgorithm", PREMIS_NS);
String md = fixity.getChildTextTrim("messageDigest", PREMIS_NS);
String b_alg = bitstream.getChecksumAlgorithm();
String b_md = bitstream.getChecksum();
if (StringUtils.equals(alg, b_alg))
{
if (StringUtils.equals(md, b_md))
{
log.debug("Bitstream checksum agrees with PREMIS: " + bitstream.getName());
}
else
{
throw new MetadataValidationException("Bitstream " + alg + " Checksum does not match value in PREMIS (" + b_md + " != " + md + "), for bitstream: " + bitstream.getName());
}
}
else
{
log.warn("Cannot test checksum on bitstream=" + bitstream.getName() +
", algorithm in PREMIS is different: " + alg);
}
}
// Look for formatDesignation/formatName, which is
// MIME Type. Match with DSpace bitstream format.
Element format = oc.getChild("format", PREMIS_NS);
if (format != null)
{
Element fd = format.getChild("formatDesignation", PREMIS_NS);
if (fd != null)
{
MIMEType = fd.getChildTextTrim("formatName", PREMIS_NS);
}
}
}
// Apply new bitstream name if we found it.
if (bsName != null)
{
bitstream.setName(bsName);
log.debug("Changing bitstream id="+String.valueOf(bitstream.getID())+"name and source to: "+bsName);
}
// reconcile bitstream format; if there's a MIMEtype,
// get it from that, otherwise try to divine from file extension
// (guessFormat() looks at bitstream Name, which we just set)
BitstreamFormat bf = (MIMEType == null) ? null :
BitstreamFormat.findByMIMEType(context, MIMEType);
if (bf == null)
{
bf = FormatIdentifier.guessFormat(context, bitstream);
}
if (bf != null)
{
bitstream.setFormat(bf);
}
}
else
{
log.debug("Skipping element: " + me.toString());
}
}
bitstream.update();
}
/*----------- Dissemination functions -------------------*/
public Namespace[] getNamespaces()
{
return (Namespace[]) ArrayUtils.clone(namespaces);
}
public String getSchemaLocation()
{
return schemaLocation;
}
public boolean canDisseminate(DSpaceObject dso)
{
//PREMISCrosswalk can only crosswalk a Bitstream
return (dso.getType() == Constants.BITSTREAM);
}
public Element disseminateElement(DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
if (dso.getType() != Constants.BITSTREAM)
{
throw new CrosswalkObjectNotSupported("PREMISCrosswalk can only crosswalk a Bitstream.");
}
Bitstream bitstream = (Bitstream)dso;
Element premis = new Element("premis", PREMIS_NS);
Element object = new Element("object", PREMIS_NS);
premis.addContent(object);
// objectIdentifier is required
Element oid = new Element("objectIdentifier", PREMIS_NS);
Element oit = new Element("objectIdentifierType", PREMIS_NS);
oit.setText("URL");
oid.addContent(oit);
Element oiv = new Element("objectIdentifierValue", PREMIS_NS);
// objectIdentifier value: by preference, if available:
// a. DSpace "persistent" URL to bitstream, if components available.
// b. name of bitstream, if any
// c. made-up name based on sequence ID and extension.
String sid = String.valueOf(bitstream.getSequenceID());
String baseUrl = ConfigurationManager.getProperty("dspace.url");
String handle = null;
// get handle of parent Item of this bitstream, if there is one:
Bundle[] bn = bitstream.getBundles();
if (bn.length > 0)
{
Item bi[] = bn[0].getItems();
if (bi.length > 0)
{
handle = bi[0].getHandle();
}
}
// get or make up name for bitstream:
String bsName = bitstream.getName();
if (bsName == null)
{
String ext[] = bitstream.getFormat().getExtensions();
bsName = "bitstream_"+sid+ (ext.length > 0 ? ext[0] : "");
}
if (handle != null && baseUrl != null)
{
oiv.setText(baseUrl
+ "/bitstream/"
+ URLEncoder.encode(handle, "UTF-8")
+ "/"
+ sid
+ "/"
+ URLEncoder.encode(bsName, "UTF-8"));
}
else
{
oiv.setText(URLEncoder.encode(bsName, "UTF-8"));
}
oid.addContent(oiv);
object.addContent(oid);
// objectCategory is fixed value, "File".
Element oc = new Element("objectCategory", PREMIS_NS);
oc.setText("File");
object.addContent(oc);
Element ochar = new Element("objectCharacteristics", PREMIS_NS);
object.addContent(ochar);
// checksum if available
String cks = bitstream.getChecksum();
String cka = bitstream.getChecksumAlgorithm();
if (cks != null && cka != null)
{
Element fixity = new Element("fixity", PREMIS_NS);
Element mda = new Element("messageDigestAlgorithm", PREMIS_NS);
mda.setText(cka);
fixity.addContent(mda);
Element md = new Element("messageDigest", PREMIS_NS);
md.setText(cks);
fixity.addContent(md);
ochar.addContent(fixity);
}
// size
Element size = new Element("size", PREMIS_NS);
size.setText(String.valueOf(bitstream.getSize()));
ochar.addContent(size);
// Punt and set formatName to the MIME type; the best we can
// do for now in the absence of any usable global format registries.
// objectCharacteristics/format/formatDesignation/
// formatName <- MIME Type
//
Element format = new Element("format", PREMIS_NS);
Element formatDes = new Element("formatDesignation", PREMIS_NS);
Element formatName = new Element("formatName", PREMIS_NS);
formatName.setText(bitstream.getFormat().getMIMEType());
formatDes.addContent(formatName);
format.addContent(formatDes);
ochar.addContent(format);
// originalName <- name (or source if none)
String oname = bitstream.getName();
if (oname == null)
{
oname = bitstream.getSource();
}
if (oname != null)
{
Element on = new Element("originalName", PREMIS_NS);
on.setText(oname);
object.addContent(on);
}
return premis;
}
public List<Element> disseminateList(DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
List<Element> result = new ArrayList<Element>(1);
result.add(disseminateElement(dso));
return result;
}
public boolean preferList()
{
return false;
}
}