/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.workflow.WorkflowException;
/**
* An abstract implementation of a DSpace Package Ingester, which
* implements a few helper/utility methods that most (all?) PackageIngesters
* may find useful.
* <P>
* First, implements recursive functionality in ingestAll() and replaceAll()
* methods of the PackageIngester interface. These methods are setup to
* recursively call ingest() and replace() respectively.
* <P>
* Finally, it also implements several utility methods (createDSpaceObject(),
* finishCreateItem(), updateDSpaceObject()) which subclasses may find useful.
* This classes will allow subclasses to easily create/update objects without
* having to worry too much about normal DSpace submission workflows (which is
* taken care of in these utility methods).
* <P>
* All Package ingesters should either extend this abstract class
* or implement <code>PackageIngester</code> to better suit their needs.
*
* @author Tim Donohue
* @see PackageIngester
* @see org.dspace.core.service.PluginService
*/
public abstract class AbstractPackageIngester
implements PackageIngester
{
/** log4j category */
private static Logger log = Logger.getLogger(AbstractPackageIngester.class);
protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
/**
* References to other packages -- these are the next packages to ingest recursively
* Key = DSpace Object just ingested, Value = List of all packages relating to a DSpaceObject
**/
private Map<DSpaceObject,List<String>> packageReferences = new HashMap<DSpaceObject,List<String>>();
/**
* Map of all successfully ingested/replaced DSpace objects for current
* import process (used by ingestAll()/replaceAll()).
* The key is the package file (which was used to create the object),
* and the value is the Identifier (i.e. Handle) of the DSpaceObject created/replaced.
**/
private Map<File, String> pkgIngestedMap = new LinkedHashMap<File, String>();
/**
* Recursively create one or more DSpace Objects out of the contents
* of the ingested package (and all other referenced packages).
* The initial object is created under the indicated parent. All other
* objects are created based on their relationship to the initial object.
* <p>
* For example, a scenario may be to create a Collection based on a
* collection-level package, and also create an Item for every item-level
* package referenced by the collection-level package.
* <p>
* The output of this method is one or more newly created DSpaceObject Identifiers
* (i.e. Handles).
* <p>
* The packager <em>may</em> choose not to implement <code>ingestAll</code>,
* or simply forward the call to <code>ingest</code> if it is unable to support
* recursive ingestion.
* <p>
* The deposit license (Only significant for Item) is passed
* explicitly as a string since there is no place for it in many
* package formats. It is optional and may be given as
* <code>null</code>.
*
* @param context DSpace context.
* @param parent parent under which to create the initial object
* (may be null -- in which case ingester must determine parent from package
* or throw an error).
* @param pkgFile The initial package file to ingest
* @param params Properties-style list of options (interpreted by each packager).
* @param license may be null, which takes default license.
* @return List of DSpaceObjects created
*
* @throws PackageValidationException if initial package (or any referenced package)
* is unacceptable or there is a fatal error in creating a DSpaceObject
* @throws UnsupportedOperationException if this packager does not
* implement <code>ingestAll</code>
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws WorkflowException if workflow error
*/
@Override
public List<String> ingestAll(Context context, DSpaceObject parent, File pkgFile,
PackageParameters params, String license)
throws PackageException, UnsupportedOperationException,
CrosswalkException, AuthorizeException,
SQLException, IOException, WorkflowException {
//If unset, make sure the Parameters specifies this is a recursive ingest
if(!params.recursiveModeEnabled())
{
params.setRecursiveModeEnabled(true);
}
//Initial DSpace Object to ingest
DSpaceObject dso = null;
// If we have not previously parsed/ingested this package file
// NOTE: This ensures we don't accidentally ingest the same package
// TWICE, e.g. an Item's package may be referenced from multiple
// Collection packages (if Item is mapped to multiple Collections)
if(!getIngestedMap().containsKey(pkgFile))
{
try
{
//actually ingest pkg using provided PackageIngester
dso = ingest(context, parent, pkgFile, params, license);
}
catch(IllegalStateException ie)
{
// NOTE: if we encounter an IllegalStateException, this means the
// handle is already in use and this object already exists.
//if we are skipping over (i.e. keeping) existing objects
if(params.keepExistingModeEnabled())
{
log.warn(LogManager.getHeader(context, "skip_package_ingest", "Object already exists, package-skipped=" + pkgFile.getName()));
}
else // Pass this exception on -- which essentially causes a full rollback of all changes (this is the default)
{
throw ie;
}
}
}
else
{
log.info(LogManager.getHeader(context, "skip_package_ingest", "Object was already ingested, package-skipped=" + pkgFile.getName()));
}
// As long as an object was successfully created from this package
if(dso!=null)
{
// Add to map of successfully ingested packages/objects (if not already added)
addToIngestedMap(pkgFile, dso);
//We can only recursively ingest non-Item packages
//(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages)
if(dso.getType()!=Constants.ITEM)
{
//Check if we found child package references when ingesting this latest DSpaceObject
List<String> childPkgRefs = getPackageReferences(dso);
//we can only recursively ingest child packages
//if we have references to them
if(childPkgRefs!=null && !childPkgRefs.isEmpty())
{
//Recursively ingest each child package, using this current object as the parent DSpace Object
for(String childPkgRef : childPkgRefs)
{
//Assume package reference is relative to current (parent) package location
File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef);
// fun, it's recursive! -- ingested referenced package
// NOTE: we are passing "null" as the Parent object, since we want to restore to the
// Parent object specified in the child Package.
// (Just in case this child is only *mapped* to the current Collection)
ingestAll(context, null, childPkg, params, license);
// A Collection can map to Items that it does not "own".
// If a Collection package has an Item as a child, it
// should be mapped regardless of ownership.
if (Constants.COLLECTION == dso.getType())
{
// If this newly ingested parent object was a Collection,
// lookup the newly ingested child Item and make sure
// it is mapped to this Collection.
String childHandle = getIngestedMap().get(childPkg);
if(childHandle!=null)
{
Item childItem = (Item) handleService.resolveToObject(context, childHandle);
// Ensure Item is mapped to Collection that referenced it
Collection collection = (Collection) dso;
if (childItem!=null && !itemService.isIn(childItem, collection))
{
collectionService.addItem(context, collection, childItem);
}
}
}
}
}//end if child pkgs
}//end if not an Item
}//end if DSpaceObject not null
//Return list of all objects ingested
return getIngestedList();
}
/**
* Recursively replace one or more DSpace Objects out of the contents
* of the ingested package (and all other referenced packages).
* The initial object to replace is indicated by <code>dso</code>. All other
* objects are replaced based on information provided in the referenced packages.
* <p>
* For example, a scenario may be to replace a Collection based on a
* collection-level package, and also replace *every* Item in that collection
* based on the item-level packages referenced by the collection-level package.
* <p>
* Please note that since the <code>dso</code> input only specifies the
* initial object to replace, any additional objects to replace must be
* determined based on the referenced packages (or initial package itself).
* <p>
* The output of this method is one or more replaced DSpaceObject Identifiers
* (i.e. Handles).
* <p>
* The packager <em>may</em> choose not to implement <code>replaceAll</code>,
* since it somewhat contradicts the archival nature of DSpace. It also
* may choose to forward the call to <code>replace</code> if it is unable to
* support recursive replacement.
*
* @param context DSpace context.
* @param dso initial existing DSpace Object to be replaced, may be null
* if object to replace can be determined from package
* @param pkgFile The package file to ingest.
* @param params Properties-style list of options specific to this packager
* @return List of Identifiers of DSpaceObjects replaced
*
* @throws PackageValidationException if initial package (or any referenced package)
* is unacceptable or there is a fatal error in creating a DSpaceObject
* @throws UnsupportedOperationException if this packager does not
* implement <code>replaceAll</code>
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws WorkflowException if workflow error
*/
@Override
public List<String> replaceAll(Context context, DSpaceObject dso,
File pkgFile, PackageParameters params)
throws PackageException, UnsupportedOperationException,
CrosswalkException, AuthorizeException,
SQLException, IOException, WorkflowException {
//If unset, make sure the Parameters specifies this is a recursive replace
if(!params.recursiveModeEnabled())
{
params.setRecursiveModeEnabled(true);
}
//Initial DSpace Object to replace
DSpaceObject replacedDso = null;
// If we have not previously parsed/ingested this package file
// NOTE: This ensures we don't accidentally ingest the same package
// TWICE, e.g. an Item's package may be referenced from multiple
// Collection packages (if Item is mapped to multiple Collections)
if(!getIngestedMap().containsKey(pkgFile))
{
//Actually ingest pkg using provided PackageIngester, and replace object
//NOTE: 'dso' may be null! If it is null, the PackageIngester must determine
// the object to be replaced from the package itself.
replacedDso = replace(context, dso, pkgFile, params);
}
else
{
log.info(LogManager.getHeader(context, "skip_package_replace", "Object was already replaced, package-skipped=" + pkgFile.getName()));
}
// As long as an object was successfully replaced from this package
if(replacedDso!=null)
{
// Add to map of successfully ingested packages/objects (if not already added)
addToIngestedMap(pkgFile, replacedDso);
//We can only recursively ingest non-Item packages
//(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages)
if(replacedDso.getType()!=Constants.ITEM)
{
//Check if we found child package references when replacing this latest DSpaceObject
List<String> childPkgRefs = getPackageReferences(replacedDso);
//we can only recursively ingest child packages
//if we have references to them
if(childPkgRefs!=null && !childPkgRefs.isEmpty())
{
//Recursively replace each child package
for(String childPkgRef : childPkgRefs)
{
//Assume package reference is relative to current package location
File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef);
//fun, it's recursive! -- replaced referenced package as a child of current object
// Pass object to replace as 'null', as we don't know which object to replace.
// (it will therefore be looked up in the package itself)
replaceAll(context, null, childPkg, params);
// A Collection can map to Items that it does not "own".
// If a Collection package has an Item as a child, it
// should be mapped regardless of ownership.
if (Constants.COLLECTION == replacedDso.getType())
{
// If this newly ingested parent object was a Collection,
// lookup the newly ingested child Item and make sure
// it is mapped to this Collection.
String childHandle = getIngestedMap().get(childPkg);
if(childHandle!=null)
{
Item childItem = (Item) handleService.resolveToObject(context, childHandle);
// Ensure Item is mapped to Collection that referenced it
Collection collection = (Collection) replacedDso;
if (childItem!=null && !itemService.isIn(childItem, collection))
{
collectionService.addItem(context, collection, childItem);
}
}
}
}
}//end if child pkgs
}//end if not an Item
}//end if DSpaceObject not null
//Return list of all objects replaced
return getIngestedList();
}
/**
* During ingestion process, some submission information packages (SIPs)
* may reference other packages to be ingested (recursively).
* <P>
* This method collects all references to other packages, so that we
* can choose to recursively ingest them, as necessary, alongside the
* DSpaceObject created from the original SIP.
* <P>
* References are collected based on the DSpaceObject created from the SIP
* (this way we keep the context of these references).
*
* @param dso DSpaceObject whose SIP referenced another package
* @param packageRef A reference to another package, which can be ingested after this one
*/
public void addPackageReference(DSpaceObject dso, String packageRef)
{
List<String> packageRefValues = null;
// Check if we already have an entry for packages reference by this object
if(packageReferences.containsKey(dso))
{
packageRefValues = packageReferences.get(dso);
}
else
{
//Create a new empty list of references
packageRefValues = new ArrayList<String>();
}
//add this package reference to existing list and save
packageRefValues.add(packageRef);
packageReferences.put(dso, packageRefValues);
}
/**
* Return a list of known SIP references from a newly created DSpaceObject.
* <P>
* These references should detail where another package exists which
* should be ingested alongside the current DSpaceObject.
* <P>
* The <code>AbstractPackageIngester</code> or an equivalent SIP handler is expected
* to understand how to deal with these package references.
*
* @param dso DSpaceObject whose SIP referenced other SIPs
* @return List of Strings which are the references to external submission ingestion packages
* (may be null if no SIPs were referenced)
*/
public List<String> getPackageReferences(DSpaceObject dso)
{
return packageReferences.get(dso);
}
/**
* Add parsed package and resulting DSpaceObject to list of successfully
* ingested/replaced objects.
* @param pkgFile the package file that was used to create the object
* @param dso the DSpaceObject created/replaced
*/
protected void addToIngestedMap(File pkgFile, DSpaceObject dso)
{
// Add to list of successfully ingested packages
if(!pkgIngestedMap.containsKey(pkgFile))
{
pkgIngestedMap.put(pkgFile, dso.getHandle());
}
}
/**
* Return Map of all packages ingested and the DSpaceObjects which have been
* created/replaced by this instance of the Ingester.
*
* <P>
* The Map "key" is the package file which was parsed, and the "value"
* is the Identifier (i.e. Handle) of the DSpaceObject which was created/replaced.
*
* @return Map of DSpaceObjects which have been created/replaced.
*/
protected Map<File,String> getIngestedMap()
{
return pkgIngestedMap;
}
/**
* Return List of all DSpaceObject Identifiers which have been ingested/replaced by
* this instance of the Ingester.
* <P>
* This list can be useful in reporting back to the user what content has
* been added or replaced. It's used by ingestAll() and replaceAll() to
* return this list of everything that was ingested/replaced.
*
* @return List of Identifiers for DSpaceObjects which have been added/replaced
*/
protected List<String> getIngestedList()
{
// We have the list of ingested objects in our IngestedMap.
// So, we simply have to convert that Collection to a List
java.util.Collection<String> coll = pkgIngestedMap.values();
if(coll instanceof List)
return (List) coll;
else
return new ArrayList(coll);
}
}