/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * University Of Edinburgh (EDINA) * Scotland * * * File Name : PackageDetectorStep.java * Author : gwaller * Approver : Gareth Waller * * Notes : * * *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * HISTORY * ------- * * $LastChangedRevision$ * $LastChangedDate$ * $LastChangedBy$ */ package uk.ac.jorum.submit.step; import java.io.IOException; import java.sql.SQLException; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; import org.apache.log4j.Logger; import org.dspace.app.util.SubmissionInfo; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeManager; import org.dspace.content.Bitstream; import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; import org.dspace.content.Collection; import org.dspace.content.DCValue; import org.dspace.content.DSpaceObject; import org.dspace.content.InstallItem; import org.dspace.content.Item; import org.dspace.content.WorkspaceItem; import org.dspace.content.packager.PackageDetector; import org.dspace.content.packager.PackageIngester; import org.dspace.content.packager.PackageParameters; import org.dspace.content.packager.PackageUtils; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.handle.HandleManager; import org.dspace.submit.AbstractProcessingStep; import uk.ac.jorum.dspace.utils.BundleUtils; import uk.ac.jorum.utils.ExceptionLogger; /** * @author gwaller * */ public class PackageDetectorStep extends AbstractProcessingStep { private static Logger logger = Logger.getLogger(PackageDetectorStep.class); // GWaller 2/10/09 Read validation param from config public static final String VALIDATE_KEY = "packagedetectorstep.validate.manifest"; public static final boolean validate = ConfigurationManager.getBooleanProperty(VALIDATE_KEY, true); // GWaller 17/11/09 Support for feed urls public static final String FEED_PREFIX = "feed://"; class FeedProcessingException extends Exception{ public FeedProcessingException(){ super(); } public FeedProcessingException(String message){ super(message); } } /** * Creates the configuratiokn used by the PackageIngester class - this is essentially a set of properties. * @return the PackageParams instance */ private PackageParameters createPackagerConfig(){ PackageParameters params = new PackageParameters(); // Turn XML validation for manifest on/off depending on config params.addProperty(PackageIngester.VALIDATE_PARAM, new Boolean(validate).toString()); // Don't fail if a CC licence isn't matched - the user can still select this via the GUI in a later step params.addProperty(PackageIngester.FAIL_NO_LICENCE_PARAM, "false"); // GWaller 6/5/10 IssueID#263 For the web interface only take the first web link found in the metadata /* * !! NOTE !! * * Setting this to be 1 mainly for an intraLibrary hack. It is possible for there to be multiple * links in metadata representing the same resource e.g. * - link to the real web page * - link to the item in intraLibrary (i.e. the preview command link) which redirects to the real web page link * * This is therefore making the assumption that the first listed link in the metadata is the link to web link for the resource */ params.addProperty(PackageIngester.MAX_METADATA_WEB_LINKS_TO_USE, "1"); return params; } /** * This method iterates through all the package detector classes as specified by * detectorClasses, creates a detector instance, checks if the bitstream is a package supported * by the detector, and if so, calls "ingest" to process the package i.e. * create a bundle for the file content, create bitstreams for the files, create a metadata bundle for the * metadata, create a licence bundle for the licence. * @param context the DSpace context * @param subInfo the currently in progress submission created by XMLUI * @param stream the stream of bits to process i.e. the file * @param copyMetadataToItem boolean indicating if the metadata found in the package (if the stream is indeed a supported package) * should be copied to the submissioninfo item. This can be used if only one package is ingested and the * metadata should appear in the GUI to aid the user when submitting. If the user is however submitting * multiple packages in one submission, this should be set ot false to force the user to enter metadata * for this "wrapper" object pointing to the processed packages. * @param session the HttpSession bound to the current HTTP request */ private void checkPackageAndProcess(Context context, SubmissionInfo subInfo, Bundle bundleContainingStream, Bitstream stream, HttpSession session, boolean copyMetadataToItem, boolean createArchive, Collection[] collections){ Item item = subInfo.getSubmissionItem().getItem(); if (collections == null){ collections = new Collection[]{subInfo.getSubmissionItem().getCollection()}; } // GWaller 9/1//09 IssueID #133 - archived content package should not be in wrapper Bundle archivedBundle = null; // Cycle through all the supported package detectors and see if we have a valid match for (Class<? extends PackageDetector> detector: PackageUtils.getDetectorClasses()){ try{ logger.debug("Checking for validPackage with detector: " + detector.getCanonicalName()); PackageDetector detectorInst = detector.newInstance(); detectorInst.setBitstream(stream); // need to set the stream the detector should look at // Check to see if we have a package this detector supports if (detectorInst.isValidPackage()){ // Found package - the ingester class is stored in this instance logger.debug("Detected valid package"); Class<? extends PackageIngester> ingesterClass = detectorInst.ingesterClass(); // Create an ingester instance and call ingest logger.debug("Instantiating ingester: " + ingesterClass.getCanonicalName()); PackageIngester ingester = ingesterClass.newInstance(); // Create the packager configuration PackageParameters params = createPackagerConfig(); // TODO: Support multiple bitstreams and read collections from manifest! logger.debug("Calling ingest on " + ingester); WorkspaceItem ingestedPackage = ingester.ingest(context, collections, stream.retrieve(), params, null); // Add the bitstream to the list of processed streams so we don't expand the package twice if the users moves back and forth logger.debug("adding stream name " + stream.getName() + " to processed list"); // Need to add item metadata from package - do this if its only 1 package ingested // If there are multiple streams we want the user to enter the metadata to this "package wrapper" object if ( copyMetadataToItem ){ /* * Rather than deleting the original submission item in submissioninfo (i.e. the package zip), add new * bitstreams containing the package data and metadata - this leaves the original zip there intact for * archival purposes. */ BundleUtils.copyBundlesAndResequence(ingestedPackage, item, null); DCValue[] dcValues = ingestedPackage.getItem().getDC(Item.ANY, Item.ANY, Item.ANY); for (DCValue v : dcValues){ item.addMetadata(v.schema, v.element, v.qualifier, v.language, v.value); } } else { // Dealing with multiple content packages so don't copy anything to this 'wrapper' } /* * Now we need to tidy up the ingested package WorkspaceItem - delete the wrapper and item otherwise it will appear in * unfinished submissions and also have unnecessary duplicate rows in the item table */ if (copyMetadataToItem){ // Only delete if we have a single content package ingestedPackage.deleteAll(); // GWaller 9/1//09 IssueID #133 - archived content package should not be in wrapper if (createArchive){ archivedBundle = BundleUtils.getBundleByName(item, Constants.ARCHIVED_CONTENT_PACKAGE_BUNDLE); } } else { // GWaller 9/1//09 IssueID #133 - archived content package should not be in wrapper if (createArchive){ archivedBundle = BundleUtils.getBundleByName(ingestedPackage.getItem(), Constants.ARCHIVED_CONTENT_PACKAGE_BUNDLE); } } /* * Now move the bitstream containing the original uploaded package into an archived bundle - means * it won't get processed again if the user moves back and forth between the submission steps */ // GWaller 9/1//09 IssueID #133 The archived bundle will be in this "item" instance if only a single content package was // deposited. If however multiple packages were deposited, the "item" instance referes to the // wrapper object - the archived bundle should not appear in this! It instead appears in the child // item installed above. if (createArchive){ // GWaller 12/1/10 IssueID #161 Before adding the archive bitstream to the related item, reset the sequence number // so that it will have a unique number when it is installed by installItem stream.setSequenceID(-1); // if set to < 0, installItem will reassign the sequence num to a unique val on install archivedBundle.addBitstream(stream); archivedBundle.update(); } // GWaller 11/1/10 IssueID #157 Must archive the zip *before* calling install item so that the preview is created! if (! copyMetadataToItem){ // ie multiple packages submitted // Install the ingested package - this assigns a handle Item relatedItem = InstallItem.installItem(context, ingestedPackage); // Add the handle to the related bundle Bundle b = BundleUtils.getBundleByName(item, Constants.RELATED_CONTENT_PACKAGE_BUNDLE); BitstreamFormat bs_format = BitstreamFormat.findByShortDescription(context, "Text"); BundleUtils.setBitstreamFromBytes(b, relatedItem.getHandle(), bs_format, relatedItem.getHandle().getBytes(), true); } bundleContainingStream.removeBitstream(stream); bundleContainingStream.update(); // Break the for loop iterating across package detectors break; } } catch (Exception e){ ExceptionLogger.logException(logger, e); // Problem creating the detector instance - misconfiguration. Just assume it is a regular file and let it through } } } public int doProcessing(Context context, HttpServletRequest request, HttpServletResponse response, SubmissionInfo subInfo) throws ServletException, IOException, SQLException, AuthorizeException{ logger.debug("PackageDetectorStep::doProcessing Entering ..."); int result = STATUS_COMPLETE; boolean multipleFiles = false; Item item = subInfo.getSubmissionItem().getItem(); String updatedUrl = null; // Check to see if the user submitted a URL as this may be a feed URL which we can process try{ if (BundleUtils.checkUrl(subInfo)){ // User submitted a URL - now check to see if it is a feed url String url = BundleUtils.getFirstUrlInUrlBundle(item); if (url != null){ Collection[] collections = null; // Got a URL - check for the feed prefix if (url.startsWith(FEED_PREFIX)){ // We only want to support feed ingest via the GUI if the user is an admin if (!AuthorizeManager.isAdmin(context)){ logger.warn("Non-admin user attempting to process a feed url <" + url + ">"); throw new FeedProcessingException("Non admin user attempted to enter a feed <" + url + ">"); } // Create a FEED bundle Bundle feedBundle = BundleUtils.getBundleByName(item, Constants.FEED_BUNDLE); // Now store the def collection if supplied int atPos = url.indexOf("@"); String colHandle = subInfo.getCollectionHandle(); String forcedCollectionHandle = null; if (atPos > -1){ forcedCollectionHandle = url.substring(FEED_PREFIX.length(), atPos); // Check collection exists DSpaceObject obj = HandleManager.resolveToObject(context, colHandle); if (obj == null){ logger.error("Supplied feed collection handle <" + colHandle + "> was not found"); throw new FeedProcessingException(); } else if (! (obj instanceof Collection)){ logger.error("Supplied feed collection does not resolve to a DSpace collection: " + colHandle); throw new FeedProcessingException(); } else { logger.debug("Found feed collection: " + colHandle); collections = new Collection[] {(Collection)obj}; } // Update the URL to remove the collection info try{ updatedUrl = FEED_PREFIX + url.substring(atPos + 1); BundleUtils.setFirstUrlInUrlBundle(context, item, url, updatedUrl); } catch (IndexOutOfBoundsException ie){ // @ char was last char in URL! logger.warn("Feed url doesn't contain any chars after @ symbol: " + url); throw new FeedProcessingException(); } } else { // Used the default collection the user selected collections = new Collection[] {(Collection)(HandleManager.resolveToObject(context, colHandle))}; updatedUrl = url; } // Write the default collection handle and url to bitstreams in the feed bundle BitstreamFormat bs_format = BitstreamFormat.findByShortDescription(context, "Text"); // set the URL as the primary bitstream Bitstream feedUrlBitstream = BundleUtils.setBitstreamFromBytes(feedBundle, Constants.FEED_BUNDLE_URL_BITSTREAM_NAME, bs_format, updatedUrl.getBytes(), true); BundleUtils.setBitstreamFromBytes(feedBundle, Constants.FEED_BUNDLE_DEF_COL_HANDLE_NAME, bs_format, colHandle.getBytes(), false); // Set forced collection handle if it was supplied if (forcedCollectionHandle != null){ BundleUtils.setBitstreamFromBytes(feedBundle, Constants.FEED_BUNDLE_FORCE_COL_HANDLE_NAME, bs_format, forcedCollectionHandle.getBytes(), false); } // Now process using the suitable ingester checkPackageAndProcess(context, subInfo, feedBundle, feedUrlBitstream, request.getSession(), true, false, collections); } else{ logger.debug("User submitted a normal non feed url - no more processing required"); } } else { // Item has a URL bundle but no url found! throw new FeedProcessingException(); } } else { logger.debug("File submitted"); // Uploaded file should be in a bitstream in the item bundle Bundle[] contentBundles = item.getBundles(Constants.CONTENT_BUNDLE_NAME); logger.debug("Found " + contentBundles.length + " bundles with name " + Constants.CONTENT_BUNDLE_NAME); // NOTE: Does it make sense to have multiple ORIGINAL Bundles?? Is the first one only ever used? if (contentBundles.length > 0){ // The code in JorumUploadStep appears to add the content to the first bundle so use this Bitstream streams[] = contentBundles[0].getBitstreams(); logger.debug("Found " + streams.length + " bitstreams for first content bundle"); // Only need to do any work if we have any streams! if (streams.length > 0){ // If we are uploading multiple items at once then there would be multiple bitstreams if (streams.length > 1){ multipleFiles = true; } // Cycle through all the streams and process them for (Bitstream s : streams){ // Only copy the metadata to the submission item if we aren't dealing with multiple files checkPackageAndProcess(context, subInfo, contentBundles[0], s, request.getSession(), !multipleFiles, true, null); } } } } } catch (FeedProcessingException f){ // Caught exception processing a feed - just return STATUC_COMPLETE and treat as a normal url ExceptionLogger.logException(logger, f); } logger.debug("PackageDetectorStep::doProcessing Leaving with " + result); return result; } public int getNumberOfPages(HttpServletRequest request, SubmissionInfo subInfo) throws ServletException{ return 1; } }