/* * ItemUpdate.java * * Version: $Revision: 3984 $ * * Date: $Date: 2009-06-29 22:33:25 -0400 (Mon, 29 Jun 2009) $ * * Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the DSpace Foundation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.app.itemupdate; import java.io.BufferedWriter; import java.io.File; import java.io.FilenameFilter; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.PosixParser; import org.dspace.content.Item; import org.dspace.core.ConfigurationManager; import org.dspace.core.Context; import org.dspace.eperson.EPerson; /** * * Provides some batch editing capabilities for items in DSpace: * Metadata fields - Add, Delete * Bitstreams - Add, Delete * * The design has been for compatibility with ItemImporter * in the use of the DSpace archive format which is used to * specify changes on a per item basis. The directory names * to correspond to each item are arbitrary and will only be * used for logging purposes. The reference to the item is * from a required dc.identifier with the item handle to be * included in the dublin_core.xml (or similar metadata) file. * * Any combination of these actions is permitted in a single run of this class * The order of actions is important when used in combination. * It is the responsibility of the calling class (here, ItemUpdate) * to register UpdateAction classes in the order to which they are * to be performed. * * * It is unfortunate that so much code needs to be borrowed * from ItemImport as it is not reusable in private methods, etc. * Some of this has been placed into the MetadataUtilities class * for possible reuse elsewhere. * * * @author W. Hays based on a conceptual design by R. Rodgers * */ public class ItemUpdate { static public String SUPPRESS_UNDO_FILENAME = "suppress_undo"; static public final String CONTENTS_FILE = "contents"; static public final String DELETE_CONTENTS_FILE = "delete_contents"; static public String HANDLE_PREFIX = null; static public Map<String, String> filterAliases = new HashMap<String, String>(); static public boolean verbose = false; static { filterAliases.put("ORIGINAL", "org.dspace.app.itemupdate.OriginalBitstreamFilter"); filterAliases.put("ORIGINAL_AND_DERIVATIVES", "org.dspace.app.itemupdate.OriginalWithDerivativesBitstreamFilter"); filterAliases.put("TEXT", "org.dspace.app.itemupdate.DerivativeTextBitstreamFilter"); filterAliases.put("THUMBNAIL", "org.dspace.app.itemupdate.ThumbnailBitstreamFilter"); } // File listing filter to check for folders static FilenameFilter directoryFilter = new FilenameFilter() { public boolean accept(File dir, String n) { File f = new File(dir.getAbsolutePath() + File.separatorChar + n); return f.isDirectory(); } }; // File listing filter to check for files (not directories) static FilenameFilter fileFilter = new FilenameFilter() { public boolean accept(File dir, String n) { File f = new File(dir.getAbsolutePath() + File.separatorChar + n); return (f.isFile()); } }; // instance variables private ActionManager actionMgr = new ActionManager(); private List<String> undoActionList = new ArrayList<String>(); private String eperson; /** * * @param argv */ public static void main(String[] argv) { // create an options object and populate it CommandLineParser parser = new PosixParser(); Options options = new Options(); //processing basis for detemining items //item-specific changes with metadata in source directory with dublic_core.xml files options.addOption("s", "source", true, "root directory of source dspace archive "); //actions on items options.addOption("a", "addmetadata", true, "add metadata specified for each item; multiples separated by semicolon ';'"); options.addOption("d", "deletemetadata", true, "delete metadata specified for each item"); options.addOption("A", "addbitstreams", false, "add bitstreams as specified for each item"); // extra work to get optional argument Option delBitstreamOption = new Option("D", "deletebitstreams", true, "delete bitstreams as specified for each item"); delBitstreamOption.setOptionalArg(true); delBitstreamOption.setArgName("BitstreamFilter"); options.addOption(delBitstreamOption); //other params options.addOption("e", "eperson", true, "email of eperson doing the update"); options.addOption("i", "itemfield", true, "optional metadata field that containing item identifier; default is dc.identifier.uri"); options.addOption("F", "filter-properties", true, "filter class name; only for deleting bitstream"); options.addOption("v", "verbose", false, "verbose logging"); //special run states options.addOption("t", "test", false, "test run - do not actually import items"); options.addOption("P", "provenance", false, "suppress altering provenance field for bitstream changes"); options.addOption("h", "help", false, "help"); int status = 0; boolean isTest = false; boolean alterProvenance = true; String itemField = null; String metadataIndexName = null; Context context = null; ItemUpdate iu = new ItemUpdate(); try { CommandLine line = parser.parse(options, argv); if (line.hasOption('h')) { HelpFormatter myhelp = new HelpFormatter(); myhelp.printHelp("ItemUpdate", options); pr(""); pr("Examples:"); pr(" adding metadata: ItemUpdate -e jsmith@mit.edu -s sourcedir -a dc.contributor -a dc.subject "); pr(" deleting metadata: ItemUpdate -e jsmith@mit.edu -s sourcedir -d dc.description.other"); pr(" adding bitstreams: ItemUpdate -e jsmith@mit.edu -s sourcedir -A -i dc.identifier"); pr(" deleting bitstreams: ItemUpdate -e jsmith@mit.edu -s sourcedir -D ORIGINAL "); pr(""); System.exit(0); } if (line.hasOption('v')) { verbose = true; } if (line.hasOption('P')) { alterProvenance = false; pr("Suppressing changes to Provenance field option"); } iu.eperson = line.getOptionValue('e'); // db ID or email if (!line.hasOption('s')) // item specific changes from archive dir { pr("Missing source archive option"); System.exit(1); } String sourcedir = line.getOptionValue('s'); if (line.hasOption('t')) //test { isTest = true; pr("**Test Run** - not actually updating items."); } if (line.hasOption('i')) { itemField = line.getOptionValue('i'); } if (line.hasOption('d')) { String[] targetFields = line.getOptionValues('d'); DeleteMetadataAction delMetadataAction = (DeleteMetadataAction) iu.actionMgr.getUpdateAction(DeleteMetadataAction.class); delMetadataAction.addTargetFields(targetFields); //undo is an add for (String field : targetFields) { iu.undoActionList.add(" -a " + field + " "); } pr("Delete metadata for fields: "); for (String s : targetFields) { pr(" " + s); } } if (line.hasOption('a')) { String[] targetFields = line.getOptionValues('a'); AddMetadataAction addMetadataAction = (AddMetadataAction) iu.actionMgr.getUpdateAction(AddMetadataAction.class); addMetadataAction.addTargetFields(targetFields); //undo is a delete followed by an add of a replace record for target fields for (String field : targetFields) { iu.undoActionList.add(" -d " + field + " "); } for (String field : targetFields) { iu.undoActionList.add(" -a " + field + " "); } pr("Add metadata for fields: "); for (String s : targetFields) { pr(" " + s); } } if (line.hasOption('D')) // undo not supported { pr("Delete bitstreams "); String[] filterNames = line.getOptionValues('D'); if ((filterNames != null) && (filterNames.length > 1)) { pr("Error: Only one filter can be a used at a time."); System.exit(1); } String filterName = line.getOptionValue('D'); pr("Filter argument: " + filterName); if (filterName == null) // indicates using delete_contents files { DeleteBitstreamsAction delAction = (DeleteBitstreamsAction) iu.actionMgr.getUpdateAction(DeleteBitstreamsAction.class); delAction.setAlterProvenance(alterProvenance); } else { // check if param is on ALIAS list String filterClassname = filterAliases.get(filterName); if (filterClassname == null) { filterClassname = filterName; } BitstreamFilter filter = null; try { Class<?> cfilter = Class.forName(filterClassname); pr("BitstreamFilter class to instantiate: " + cfilter.toString()); filter = (BitstreamFilter) cfilter.newInstance(); //unfortunate cast, an erasure consequence } catch(Exception e) { pr("Error: Failure instantiating bitstream filter class: " + filterClassname); System.exit(1); } String filterPropertiesName = line.getOptionValue('F'); if (filterPropertiesName != null) //not always required { try { // TODO try multiple relative locations, e.g. source dir if (!filterPropertiesName.startsWith("/")) { filterPropertiesName = sourcedir + File.separator + filterPropertiesName; } filter.initProperties(filterPropertiesName); } catch(Exception e) { pr("Error: Failure finding properties file for bitstream filter class: " + filterPropertiesName); System.exit(1); } } DeleteBitstreamsByFilterAction delAction = (DeleteBitstreamsByFilterAction) iu.actionMgr.getUpdateAction(DeleteBitstreamsByFilterAction.class); delAction.setAlterProvenance(alterProvenance); delAction.setBitstreamFilter(filter); //undo not supported } } if (line.hasOption('A')) { pr("Add bitstreams "); AddBitstreamsAction addAction = (AddBitstreamsAction) iu.actionMgr.getUpdateAction(AddBitstreamsAction.class); addAction.setAlterProvenance(alterProvenance); iu.undoActionList.add(" -D "); // delete_contents file will be written, no arg required } if (!iu.actionMgr.hasActions()) { pr("Error - an action must be specified"); System.exit(1); } else { pr("Actions to be performed: "); for (UpdateAction ua : iu.actionMgr) { pr(" " + ua.getClass().getName()); } } pr("ItemUpdate - initializing run on " + (new Date()).toString()); context = new Context(); iu.setEPerson(context, iu.eperson); context.setIgnoreAuthorization(true); HANDLE_PREFIX = ConfigurationManager.getProperty("handle.canonical.prefix"); if (HANDLE_PREFIX == null || HANDLE_PREFIX.length() == 0) { HANDLE_PREFIX = "http://hdl.handle.net/"; } iu.processArchive(context, sourcedir, itemField, metadataIndexName, alterProvenance, isTest); context.complete(); // complete all transactions context.setIgnoreAuthorization(false); } catch (Exception e) { if (context != null && context.isValid()) { context.abort(); context.setIgnoreAuthorization(false); } e.printStackTrace(); pr(e.toString()); status = 1; } if (isTest) { pr("***End of Test Run***"); } else { pr("End."); } System.exit(status); } private void processArchive(Context context, String sourceDirPath, String itemField, String metadataIndexName, boolean alterProvenance, boolean isTest) throws Exception { // open and process the source directory File sourceDir = new File(sourceDirPath); if ((sourceDir == null) || !sourceDir.exists() || !sourceDir.isDirectory()) { pr("Error, cannot open archive source directory " + sourceDirPath); throw new Exception("error with archive source directory " + sourceDirPath); } String[] dircontents = sourceDir.list(directoryFilter); //just the names, not the path Arrays.sort(dircontents); //Undo is suppressed to prevent undo of undo boolean suppressUndo = false; File fSuppressUndo = new File(sourceDir, SUPPRESS_UNDO_FILENAME); if (fSuppressUndo.exists()) { suppressUndo = true; } File undoDir = null; //sibling directory of source archive if (!suppressUndo && !isTest) { undoDir = initUndoArchive(sourceDir); } int itemCount = 0; int successItemCount = 0; for (String dirname : dircontents) { itemCount++; pr(""); pr("processing item " + dirname); try { ItemArchive itarch = ItemArchive.create(context, new File(sourceDir, dirname), itemField); for (UpdateAction action : actionMgr) { pr("action: " + action.getClass().getName()); action.execute(context, itarch, isTest, suppressUndo); if (!isTest) { if (!suppressUndo) { itarch.writeUndo(undoDir); } } } if (!isTest) { Item item = itarch.getItem(); item.update(); //need to update before commit context.commit(); item.decache(); } ItemUpdate.pr("Item " + dirname + " completed"); successItemCount++; } catch(Exception e) { pr("Exception processing item " + dirname + ": " + e.toString()); } } if (!suppressUndo && !isTest) { StringBuilder sb = new StringBuilder("dsrun org.dspace.app.itemupdate.ItemUpdate "); sb.append(" -e ").append(this.eperson); sb.append(" -s ").append(undoDir); if (itemField != null) { sb.append(" -i ").append(itemField); } if (!alterProvenance) { sb.append(" -P "); } if (isTest) { sb.append(" -t "); } for (String actionOption : undoActionList) { sb.append(actionOption); } PrintWriter pw = null; try { File cmdFile = new File (undoDir.getParent(), undoDir.getName() + "_command.sh"); pw = new PrintWriter(new BufferedWriter(new FileWriter(cmdFile))); pw.println(sb.toString()); } finally { pw.close(); } } pr(""); pr("Done processing. Successful items: " + successItemCount + " of " + itemCount + " items in source archive"); pr(""); } /** * * to avoid overwriting the undo source tree on repeated processing * sequence numbers are added and checked * * @param sourceDir - the original source directory * @return the directory of the undo archive * @throws FileNotFoundException * @throws IOException */ private File initUndoArchive(File sourceDir) throws FileNotFoundException, IOException { File parentDir = sourceDir.getAbsoluteFile().getParentFile(); if (parentDir == null) { throw new FileNotFoundException("Parent directory of archive directory not found; unable to write UndoArchive; no processing performed"); } String sourceDirName = sourceDir.getName(); int seqNo = 1; File undoDir = new File(parentDir, "undo_" + sourceDirName + "_" + seqNo); while (undoDir.exists()) { undoDir = new File(parentDir, "undo_" + sourceDirName+ "_" + ++seqNo); //increment } // create root directory if (!undoDir.mkdir()) { pr("ERROR creating Undo Archive directory "); throw new IOException("ERROR creating Undo Archive directory "); } //Undo is suppressed to prevent undo of undo File fSuppressUndo = new File(undoDir, ItemUpdate.SUPPRESS_UNDO_FILENAME); try { fSuppressUndo.createNewFile(); } catch(IOException e) { pr("ERROR creating Suppress Undo File " + e.toString()); throw e; } return undoDir; } //private void write private void setEPerson(Context context, String eperson) throws Exception { if (eperson == null) { pr("Error - an eperson to do the importing must be specified"); pr(" (run with -h flag for details)"); throw new Exception("EPerson not specified."); } EPerson myEPerson = null; if (eperson.indexOf('@') != -1) { // @ sign, must be an email myEPerson = EPerson.findByEmail(context, eperson); } else { myEPerson = EPerson.find(context, Integer.parseInt(eperson)); } if (myEPerson == null) { pr("Error, eperson cannot be found: " + eperson); throw new Exception("Invalid EPerson"); } context.setCurrentUser(myEPerson); } /** * poor man's logging * As with ItemImport, API logging goes through log4j to the DSpace.log files * whereas the batch logging goes to the console to be captured there. * @param s */ static void pr(String s) { System.out.println(s); } /** * print if verbose flag is set * @param s */ static void prv(String s) { if (verbose) { System.out.println(s); } } } //end of class