/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import org.dspace.app.util.Util;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Site;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.core.Constants;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.license.CreativeCommons;
import edu.harvard.hul.ois.mets.Agent;
import edu.harvard.hul.ois.mets.Loctype;
import edu.harvard.hul.ois.mets.Mets;
import edu.harvard.hul.ois.mets.MetsHdr;
import edu.harvard.hul.ois.mets.Name;
import edu.harvard.hul.ois.mets.Role;
import edu.harvard.hul.ois.mets.Div;
import edu.harvard.hul.ois.mets.Mptr;
import edu.harvard.hul.ois.mets.StructMap;
import edu.harvard.hul.ois.mets.Type;
import edu.harvard.hul.ois.mets.helper.MetsException;
import edu.harvard.hul.ois.mets.helper.PCData;
import java.util.Date;
import org.dspace.core.Utils;
/**
* Subclass of the METS packager framework to disseminate a DSpace
* Archival Information Package (AIP). The AIP is intended to be, foremost,
* a _complete_ and _accurate_ representation of one object in the DSpace
* object model. An AIP contains all of the information needed to restore
* the object precisely in another DSpace archive instance.
* <p>
* Configuration keys:
* <p>
* The following take as values a space-and-or-comma-separated list
* of plugin names that name *either* a DisseminationCrosswalk or
* StreamDisseminationCrosswalk plugin. Shown are the default values.
* The value may be a simple crosswalk name, or a METS MDsec-name followed by
* a colon and the crosswalk name e.g. "DSpaceDepositLicense:DSPACE_DEPLICENSE"
*
* # MD types to put in the sourceMD section of the object.
* aip.disseminate.sourceMD = AIP-TECHMD
*
* # MD types to put in the techMD section of the object (and member Bitstreams if an Item)
* aip.disseminate.techMD = PREMIS
*
* # MD types to put in digiprovMD section of the object.
* #aip.disseminate.digiprovMD =
*
* # MD types to put in the rightsMD section of the object.
* aip.disseminate.rightsMD = DSpaceDepositLicense:DSPACE_DEPLICENSE, \
* CreativeCommonsRDF:DSPACE_CCRDF, CreativeCommonsText:DSPACE_CCTXT, METSRights
*
* # MD types to put in dmdSec's corresponding the object.
* aip.disseminate.dmd = MODS, DIM
*
* @author Larry Stone
* @author Tim Donohue
* @version $Revision: 1.1 $
* @see AbstractMETSDisseminator
* @see AbstractPackageDisseminator
*/
public class DSpaceAIPDisseminator extends AbstractMETSDisseminator
{
private static final Logger log = Logger.getLogger(DSpaceAIPDisseminator.class);
/**
* Unique identifier for the profile of the METS document.
* To ensure uniqueness, it is the URL that the XML schema document would
* have _if_ there were to be one. There is no schema at this time.
*/
public static final String PROFILE_1_0 =
"http://www.dspace.org/schema/aip/mets_aip_1_0.xsd";
/** TYPE of the div containing AIP's parent handle in its mptr. */
public static final String PARENT_DIV_TYPE = "AIP Parent Link";
// Default MDTYPE value for deposit license -- "magic string"
// NOTE: format is <label-for-METS>:<DSpace-crosswalk-name>
private static final String DSPACE_DEPOSIT_LICENSE_MDTYPE =
"DSpaceDepositLicense:DSPACE_DEPLICENSE";
// Default MDTYPE value for CC license in RDF -- "magic string"
// NOTE: format is <label-for-METS>:<DSpace-crosswalk-name>
private static final String CREATIVE_COMMONS_RDF_MDTYPE =
"CreativeCommonsRDF:DSPACE_CCRDF";
// Default MDTYPE value for CC license in Text -- "magic string"
// NOTE: format is <label-for-METS>:<DSpace-crosswalk-name>
private static final String CREATIVE_COMMONS_TEXT_MDTYPE =
"CreativeCommonsText:DSPACE_CCTXT";
// dissemination parameters passed to the AIP Disseminator
private PackageParameters disseminateParams = null;
// List of Bundles to filter on, when building AIP
private List<String> filterBundles = new ArrayList<String>();
// Whether 'filterBundles' specifies an exclusion list (default) or inclusion list.
private boolean excludeBundles = true;
@Override
public void disseminate(Context context, DSpaceObject dso,
PackageParameters params, File pkgFile)
throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException
{
//Before disseminating anything, save the passed in PackageParameters, so they can be used by all methods
disseminateParams = params;
boolean disseminate = true; //by default, always disseminate
//if user specified to only disseminate objects updated *after* a specific date
// (Note: this only works for Items right now, as DSpace doesn't store a
// last modified date for Collections or Communities)
if(disseminateParams.containsKey("updatedAfter") && dso.getType()==Constants.ITEM)
{
Date afterDate = Utils.parseISO8601Date(disseminateParams.getProperty("updatedAfter"));
//if null is returned, we couldn't parse the date!
if(afterDate==null)
{
throw new IOException("Invalid date passed in via 'updatedAfter' option. Date must be in ISO-8601 format, and include both a day and time (e.g. 2010-01-01T00:00:00).");
}
//check when this item was last modified.
Item i = (Item) dso;
if(i.getLastModified().after(afterDate))
{
disseminate = true;
}
else
{
disseminate = false;
}
}
if(disseminate)
{
//just do a normal dissemination as specified by AbstractMETSDisseminator
super.disseminate(context, dso, params, pkgFile);
}
}
/**
* Return identifier string for the METS profile this produces.
*
* @return string name of profile.
*/
@Override
public String getProfile()
{
return PROFILE_1_0;
}
/**
* Returns name of METS fileGrp corresponding to a DSpace bundle name.
* For AIP the mapping is direct.
*
* @param bname name of DSpace bundle.
* @return string name of fileGrp
*/
@Override
public String bundleToFileGrp(String bname)
{
return bname;
}
/**
* Create the metsHdr element for the AIP METS Manifest.
* <p>
* CREATEDATE is time at which the package (i.e. this manifest) was created.
* LASTMODDATE is last-modified time of the target object, if available.
* Agent describes the archive this belongs to.
*
* @param context DSpace Context
* @param dso current DSpace Object
* @param params Packager Parameters
* @return List of crosswalk names to run
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
*/
@Override
public MetsHdr makeMetsHdr(Context context, DSpaceObject dso,
PackageParameters params)
{
MetsHdr metsHdr = new MetsHdr();
// Note: we specifically do not add a CREATEDATE to <metsHdr>
// as for AIPs we want md5 checksums to be identical if no content
// has changed. Adding a CREATEDATE changes checksum each time.
// Add a LASTMODDATE for items
if (dso.getType() == Constants.ITEM)
{
metsHdr.setLASTMODDATE(((Item) dso).getLastModified());
}
// Agent Custodian - name custodian, the DSpace Archive, by handle.
Agent agent = new Agent();
agent.setROLE(Role.CUSTODIAN);
agent.setTYPE(Type.OTHER);
agent.setOTHERTYPE("DSpace Archive");
Name name = new Name();
name.getContent()
.add(new PCData(Site.getSiteHandle()));
agent.getContent().add(name);
metsHdr.getContent().add(agent);
// Agent Creator - name creator, which is a specific version of DSpace.
Agent agentCreator = new Agent();
agentCreator.setROLE(Role.CREATOR);
agentCreator.setTYPE(Type.OTHER);
agentCreator.setOTHERTYPE("DSpace Software");
Name creatorName = new Name();
creatorName.getContent()
.add(new PCData("DSpace " + Util.getSourceVersion()));
agentCreator.getContent().add(creatorName);
metsHdr.getContent().add(agentCreator);
return metsHdr;
}
/**
* Return the name of all crosswalks to run for the dmdSec section of
* the METS Manifest.
* <p>
* Default is DIM (DSpace Internal Metadata) and MODS.
*
* @param context DSpace Context
* @param dso current DSpace Object
* @param params Packager Parameters
* @return List of crosswalk names to run
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
*/
@Override
public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params)
throws SQLException, IOException, AuthorizeException
{
String dmdTypes = ConfigurationManager.getProperty("aip.disseminate.dmd");
if (dmdTypes == null)
{
String result[] = new String[2];
result[0] = "MODS";
result[1] = "DIM";
return result;
}
else
{
return dmdTypes.split("\\s*,\\s*");
}
}
/**
* Return the name of all crosswalks to run for the techMD section of
* the METS Manifest.
* <p>
* Default is PREMIS.
*
* @param context DSpace Context
* @param dso current DSpace Object
* @param params Packager Parameters
* @return List of crosswalk names to run
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
*/
@Override
public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params)
throws SQLException, IOException, AuthorizeException
{
String techTypes = ConfigurationManager.getProperty("aip.disseminate.techMD");
if (techTypes == null)
{
if (dso.getType() == Constants.BITSTREAM)
{
String result[] = new String[1];
result[0] = "PREMIS";
return result;
}
else
{
return new String[0];
}
}
else
{
return techTypes.split("\\s*,\\s*");
}
}
/**
* Return the name of all crosswalks to run for the sourceMD section of
* the METS Manifest.
* <p>
* Default is AIP-TECHMD.
* <p>
* In an AIP, the sourceMD element MUST include the original persistent
* identifier (Handle) of the object, and the original persistent ID
* (Handle) of its parent in the archive, so that it can be restored.
*
* @param context DSpace Context
* @param dso current DSpace Object
* @param params Packager Parameters
* @return List of crosswalk names to run
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
*/
@Override
public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params)
throws SQLException, IOException, AuthorizeException
{
String sourceTypes = ConfigurationManager.getProperty("aip.disseminate.sourceMD");
if (sourceTypes == null)
{
String result[] = new String[1];
result[0] = "AIP-TECHMD";
return result;
}
else
{
return sourceTypes.split("\\s*,\\s*");
}
}
/**
* Return the name of all crosswalks to run for the digiprovMD section of
* the METS Manifest.
* <p>
* By default, none are returned
*
* @param context DSpace Context
* @param dso current DSpace Object
* @param params Packager Parameters
* @return List of crosswalk names to run
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
*/
@Override
public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params)
throws SQLException, IOException, AuthorizeException
{
String dpTypes = ConfigurationManager.getProperty("aip.disseminate.digiprovMD");
if (dpTypes == null)
{
return new String[0];
}
else
{
return dpTypes.split("\\s*,\\s*");
}
}
/**
* Return the name of all crosswalks to run for the rightsMD section of
* the METS Manifest.
* <p>
* By default, Deposit Licenses and CC Licenses will be added for Items.
* Also, by default METSRights info will be added for all objects.
*
* @param context DSpace Context
* @param dso current DSpace Object
* @param params Packager Parameters
* @return List of crosswalk names to run
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
*/
@Override
public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params)
throws SQLException, IOException, AuthorizeException
{
List<String> result = new ArrayList<String>();
String rTypes = ConfigurationManager.getProperty("aip.disseminate.rightsMD");
//If unspecified in configuration file, add default settings
if (rTypes == null)
{
// Licenses only apply to an Item
if (dso.getType() == Constants.ITEM)
{
//By default, disseminate Deposit License, and any CC Licenses
// to an item's rightsMD section
if (PackageUtils.findDepositLicense(context, (Item)dso) != null)
{
result.add(DSPACE_DEPOSIT_LICENSE_MDTYPE);
}
if (CreativeCommons.getLicenseRdfBitstream((Item)dso) != null)
{
result.add(CREATIVE_COMMONS_RDF_MDTYPE);
}
else if (CreativeCommons.getLicenseTextBitstream((Item)dso) != null)
{
result.add(CREATIVE_COMMONS_TEXT_MDTYPE);
}
}
//By default, also add METSRights info to the rightsMD
result.add("METSRights");
}
else
{
return rTypes.split("\\s*,\\s*");
}
return result.toArray(new String[result.size()]);
}
/**
* Adds another structMap element to contain the "parent link" that
* is an essential part of every AIP. This is a structmap of one
* div, which contains an mptr indicating the Handle of the parent
* of this object in the archive. The div has a unique TYPE attribute
* value, "AIP Parent Link", and the mptr has a LOCTYPE of "HANDLE"
* and an xlink:href containing the raw Handle value.
* <p>
* Note that the parent Handle has to be stored here because the
* parent is needed to create a DSpace Object when restoring the
* AIP; it cannot be determined later once the ingester parses it
* out of the metadata when the crosswalks are run. So, since the
* crosswalks require an object to operate on, and creating the
* object requires a parent, we cannot depend on metadata processed
* by crosswalks (e.g. AIP techMd) for the parent, it has to be at
* a higher level in the AIP manifest. The structMap is an obvious
* and standards-compliant location for it.
*
* @param context DSpace context
* @param dso Current DSpace object
* @param params Packager Parameters
* @param mets METS manifest
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
* @throws MetsException
*/
@Override
public void addStructMap(Context context, DSpaceObject dso,
PackageParameters params, Mets mets)
throws SQLException, IOException, AuthorizeException, MetsException
{
// find parent Handle
String parentHandle = null;
switch (dso.getType())
{
case Constants.ITEM:
parentHandle = ((Item)dso).getOwningCollection().getHandle();
break;
case Constants.COLLECTION:
parentHandle = (((Collection)dso).getCommunities())[0].getHandle();
break;
case Constants.COMMUNITY:
Community parent = ((Community)dso).getParentCommunity();
if (parent == null)
{
parentHandle = Site.getSiteHandle();
}
else
{
parentHandle = parent.getHandle();
}
case Constants.SITE:
break;
}
// Parent Handle should only be null if we are creating a site-wide AIP
if(parentHandle!=null)
{
// add a structMap to contain div pointing to parent:
StructMap structMap = new StructMap();
structMap.setID(gensym("struct"));
structMap.setTYPE("LOGICAL");
structMap.setLABEL("Parent");
Div div0 = new Div();
div0.setID(gensym("div"));
div0.setTYPE(PARENT_DIV_TYPE);
div0.setLABEL("Parent of this DSpace Object");
Mptr mptr = new Mptr();
mptr.setID(gensym("mptr"));
mptr.setLOCTYPE(Loctype.HANDLE);
mptr.setXlinkHref(parentHandle);
div0.getContent().add(mptr);
structMap.getContent().add(div0);
mets.getContent().add(structMap);
}
}
/**
* By default, include all bundles in AIP as content.
* <P>
* However, if the user specified a comma separated list of bundle names
* via the "filterBundles" (or "includeBundles") option, then check if this
* bundle is in that list. If it is, return true. If it is not, return false.
*
* @param bundle Bundle to check for
* @return true if bundle should be disseminated when disseminating Item AIPs
*/
@Override
public boolean includeBundle(Bundle bundle)
{
List<String> bundleList = getBundleList();
//Check if we are disseminating all bundles
if(bundleList.size()==1 && bundleList.get(0).equalsIgnoreCase("all") && !this.excludeBundles)
{
return true; //all bundles should be disseminated
}
else
{
//Check if bundle name is in our list of filtered bundles
boolean inList = filterBundles.contains(bundle.getName());
//Based on whether this is an inclusion or exclusion filter,
//return whether this bundle should be included.
return this.excludeBundles ? !inList : inList;
}
}
/**
* Get our list of bundles to include/exclude in this AIP,
* based on the passed in parameters
* @return List of bundles to filter on
*/
protected List<String> getBundleList()
{
// Check if we already have our list of bundles to filter on, if so, just return it.
if(this.filterBundles!=null && !this.filterBundles.isEmpty())
return this.filterBundles;
// Check for 'filterBundles' option, as this allows for inclusion/exclusion of bundles.
String bundleList = this.disseminateParams.getProperty("filterBundles");
if(bundleList==null || bundleList.isEmpty())
{
//For backwards compatibility with DSpace 1.7.x, check the
//'includeBundles' option to see if a list of bundles was provided
bundleList = this.disseminateParams.getProperty("includeBundles", "+all");
//if we are taking the 'includeBundles' value, prepend "+" to specify that this is an inclusion
bundleList = bundleList.startsWith("+") ? bundleList : "+".concat(bundleList);
}
// At this point, 'bundleList' will be *non-null*. If neither option was passed in,
// then 'bundleList' defaults to "+all" (i.e. include all bundles).
//If our filter list of bundles begins with a '+', then this list
// specifies all the bundles to *include*. Otherwise all
// bundles *except* the listed ones are included
if(bundleList.startsWith("+"))
{
this.excludeBundles = false;
//remove the preceding '+' from our bundle list
bundleList = bundleList.substring(1);
}
//Split our list of bundles to filter on commas
this.filterBundles = Arrays.asList(bundleList.split(","));
return this.filterBundles;
}
/**
* Returns a user help string which should describe the
* additional valid command-line options that this packager
* implementation will accept when using the <code>-o</code> or
* <code>--option</code> flags with the Packager script.
*
* @return a string describing additional command-line options available
* with this packager
*/
@Override
public String getParameterHelp()
{
String parentHelp = super.getParameterHelp();
//Return superclass help info, plus the extra parameter/option that this class supports
return parentHelp +
"\n\n" +
"* filterBundles=[bundleList] " +
"List of bundles specifying which Bundles should be included in an AIP. If this list starts with a '+' symbol," +
" then it represents a list of bundles to *include* in the AIP. By default, the list represents a list of bundles" +
" to *exclude* from the AIP.";
}
}