/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.app.mediafilter; import java.io.InputStream; import java.util.*; import org.dspace.app.mediafilter.service.MediaFilterService; import org.dspace.authorize.service.AuthorizeService; import org.dspace.content.*; import org.dspace.content.Collection; import org.dspace.content.service.*; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.SelfNamedPlugin; import org.dspace.eperson.Group; import org.dspace.eperson.service.GroupService; import org.dspace.services.ConfigurationService; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; /** * MediaFilterManager is the class that invokes the media/format filters over the * repository's content. A few command line flags affect the operation of the * MFM: -v verbose outputs all extracted text to STDOUT; -f force forces all * bitstreams to be processed, even if they have been before; -n noindex does not * recreate index after processing bitstreams; -i [identifier] limits processing * scope to a community, collection or item; and -m [max] limits processing to a * maximum number of items. */ public class MediaFilterServiceImpl implements MediaFilterService, InitializingBean { @Autowired(required = true) protected AuthorizeService authorizeService; @Autowired(required = true) protected BitstreamFormatService bitstreamFormatService; @Autowired(required = true) protected BitstreamService bitstreamService; @Autowired(required = true) protected BundleService bundleService; @Autowired(required = true) protected CollectionService collectionService; @Autowired(required = true) protected CommunityService communityService; @Autowired(required = true) protected GroupService groupService; @Autowired(required = true) protected ItemService itemService; @Autowired(required = true) protected ConfigurationService configurationService; protected int max2Process = Integer.MAX_VALUE; // maximum number items to process protected int processed = 0; // number items processed protected Item currentItem = null; // current item being processed protected List<FormatFilter> filterClasses = null; protected Map<String, List<String>> filterFormats = new HashMap<>(); protected List<String> skipList = null; //list of identifiers to skip during processing protected final List<String> publicFiltersClasses = new ArrayList<>(); protected boolean isVerbose = false; protected boolean isQuiet = false; protected boolean isForce = false; // default to not forced protected MediaFilterServiceImpl() { } @Override public void afterPropertiesSet() throws Exception { String[] publicPermissionFilters = configurationService.getArrayProperty("filter.org.dspace.app.mediafilter.publicPermission"); if(publicPermissionFilters != null) { for(String filter : publicPermissionFilters) { publicFiltersClasses.add(filter.trim()); } } } @Override public void applyFiltersAllItems(Context context) throws Exception { if(skipList!=null) { //if a skip-list exists, we need to filter community-by-community //so we can respect what is in the skip-list List<Community> topLevelCommunities = communityService.findAllTop(context); for (Community topLevelCommunity : topLevelCommunities) { applyFiltersCommunity(context, topLevelCommunity); } } else { //otherwise, just find every item and process Iterator<Item> itemIterator = itemService.findAll(context); while (itemIterator.hasNext() && processed < max2Process) { applyFiltersItem(context, itemIterator.next()); } } } @Override public void applyFiltersCommunity(Context context, Community community) throws Exception { //only apply filters if community not in skip-list if(!inSkipList(community.getHandle())) { List<Community> subcommunities = community.getSubcommunities(); for (Community subcommunity : subcommunities) { applyFiltersCommunity(context, subcommunity); } List<Collection> collections = community.getCollections(); for (Collection collection : collections) { applyFiltersCollection(context, collection); } } } @Override public void applyFiltersCollection(Context context, Collection collection) throws Exception { //only apply filters if collection not in skip-list if(!inSkipList(collection.getHandle())) { Iterator<Item> itemIterator = itemService.findAllByCollection(context, collection); while (itemIterator.hasNext() && processed < max2Process) { applyFiltersItem(context, itemIterator.next()); } } } @Override public void applyFiltersItem(Context c, Item item) throws Exception { //only apply filters if item not in skip-list if(!inSkipList(item.getHandle())) { //cache this item in MediaFilterManager //so it can be accessed by MediaFilters as necessary currentItem = item; if (filterItem(c, item)) { // increment processed count ++processed; } // clear item objects from context cache and internal cache currentItem = null; } } @Override public boolean filterItem(Context context, Item myItem) throws Exception { // get 'original' bundles List<Bundle> myBundles = itemService.getBundles(myItem, "ORIGINAL"); boolean done = false; for (Bundle myBundle : myBundles) { // now look at all of the bitstreams List<Bitstream> myBitstreams = myBundle.getBitstreams(); for (Bitstream myBitstream : myBitstreams) { done |= filterBitstream(context, myItem, myBitstream); } } return done; } @Override public boolean filterBitstream(Context context, Item myItem, Bitstream myBitstream) throws Exception { boolean filtered = false; // iterate through filter classes. A single format may be actioned // by more than one filter for (FormatFilter filterClass : filterClasses) { //List fmts = (List)filterFormats.get(filterClasses[i].getClass().getName()); String pluginName = null; //if this filter class is a SelfNamedPlugin, //its list of supported formats is different for //differently named "plugin" if (SelfNamedPlugin.class.isAssignableFrom(filterClass.getClass())) { //get plugin instance name for this media filter pluginName = ((SelfNamedPlugin) filterClass).getPluginInstanceName(); } //Get list of supported formats for the filter (and possibly named plugin) //For SelfNamedPlugins, map key is: // <class-name><separator><plugin-name> //For other MediaFilters, map key is just: // <class-name> List<String> fmts = filterFormats.get(filterClass.getClass().getName() + (pluginName != null ? FILTER_PLUGIN_SEPARATOR + pluginName : "")); if (fmts.contains(myBitstream.getFormat(context).getShortDescription())) { try { // only update item if bitstream not skipped if (processBitstream(context, myItem, myBitstream, filterClass)) { itemService.update(context, myItem); // Make sure new bitstream has a sequence // number filtered = true; } } catch (Exception e) { String handle = myItem.getHandle(); List<Bundle> bundles = myBitstream.getBundles(); long size = myBitstream.getSize(); String checksum = myBitstream.getChecksum() + " (" + myBitstream.getChecksumAlgorithm() + ")"; int assetstore = myBitstream.getStoreNumber(); // Printout helpful information to find the errored bitstream. System.out.println("ERROR filtering, skipping bitstream:\n"); System.out.println("\tItem Handle: " + handle); for (Bundle bundle : bundles) { System.out.println("\tBundle Name: " + bundle.getName()); } System.out.println("\tFile Size: " + size); System.out.println("\tChecksum: " + checksum); System.out.println("\tAsset Store: " + assetstore); System.out.println(e); e.printStackTrace(); } } else if (filterClass instanceof SelfRegisterInputFormats) { // Filter implements self registration, so check to see if it should be applied // given the formats it claims to support SelfRegisterInputFormats srif = (SelfRegisterInputFormats) filterClass; boolean applyFilter = false; // Check MIME type String[] mimeTypes = srif.getInputMIMETypes(); if (mimeTypes != null) { for (String mimeType : mimeTypes) { if (mimeType.equalsIgnoreCase(myBitstream.getFormat(context).getMIMEType())) { applyFilter = true; } } } // Check description if (!applyFilter) { String[] descriptions = srif.getInputDescriptions(); if (descriptions != null) { for (String desc : descriptions) { if (desc.equalsIgnoreCase(myBitstream.getFormat(context).getShortDescription())) { applyFilter = true; } } } } // Check extensions if (!applyFilter) { String[] extensions = srif.getInputExtensions(); if (extensions != null) { for (String ext : extensions) { List<String> formatExtensions = myBitstream.getFormat(context).getExtensions(); if (formatExtensions != null && formatExtensions.contains(ext)) { applyFilter = true; } } } } // Filter claims to handle this type of file, so attempt to apply it if (applyFilter) { try { // only update item if bitstream not skipped if (processBitstream(context, myItem, myBitstream, filterClass)) { itemService.update(context, myItem); // Make sure new bitstream has a sequence // number filtered = true; } } catch (Exception e) { System.out.println("ERROR filtering, skipping bitstream #" + myBitstream.getID() + " " + e); e.printStackTrace(); } } } } return filtered; } @Override public boolean processBitstream(Context context, Item item, Bitstream source, FormatFilter formatFilter) throws Exception { //do pre-processing of this bitstream, and if it fails, skip this bitstream! if(!formatFilter.preProcessBitstream(context, item, source, isVerbose)) { return false; } boolean overWrite = isForce; // get bitstream filename, calculate destination filename String newName = formatFilter.getFilteredName(source.getName()); Bitstream existingBitstream = null; // is there an existing rendition? Bundle targetBundle = null; // bundle we're modifying List<Bundle> bundles = itemService.getBundles(item, formatFilter.getBundleName()); // check if destination bitstream exists if (bundles.size() > 0) { // only finds the last match (FIXME?) for (Bundle bundle : bundles) { List<Bitstream> bitstreams = bundle.getBitstreams(); for (Bitstream bitstream : bitstreams) { if (bitstream.getName().equals(newName)) { targetBundle = bundle; existingBitstream = bitstream; } } } } // if exists and overwrite = false, exit if (!overWrite && (existingBitstream != null)) { if (!isQuiet) { System.out.println("SKIPPED: bitstream " + source.getID() + " (item: " + item.getHandle() + ") because '" + newName + "' already exists"); } return false; } if(isVerbose) { System.out.println("PROCESSING: bitstream " + source.getID() + " (item: " + item.getHandle() + ")"); } InputStream destStream; try { System.out.println("File: " + newName); destStream = formatFilter.getDestinationStream(item, bitstreamService.retrieve(context, source), isVerbose); if (destStream == null) { if (!isQuiet) { System.out.println("SKIPPED: bitstream " + source.getID() + " (item: " + item.getHandle() + ") because filtering was unsuccessful"); } return false; } } catch (OutOfMemoryError oome) { System.out.println("!!! OutOfMemoryError !!!"); return false; } // create new bundle if needed if (bundles.size() < 1) { targetBundle = bundleService.create(context, item, formatFilter.getBundleName()); } else { // take the first match targetBundle = bundles.get(0); } Bitstream b = bitstreamService.create(context, targetBundle, destStream); // Now set the format and name of the bitstream b.setName(context, newName); b.setSource(context, "Written by FormatFilter " + formatFilter.getClass().getName() + " on " + DCDate.getCurrent() + " (GMT)."); b.setDescription(context, formatFilter.getDescription()); // Find the proper format BitstreamFormat bf = bitstreamFormatService.findByShortDescription(context, formatFilter.getFormatString()); bitstreamService.setFormat(context, b, bf); bitstreamService.update(context, b); //Set permissions on the derivative bitstream //- First remove any existing policies authorizeService.removeAllPolicies(context, b); //- Determine if this is a public-derivative format if(publicFiltersClasses.contains(formatFilter.getClass().getSimpleName())) { //- Set derivative bitstream to be publicly accessible Group anonymous = groupService.findByName(context, Group.ANONYMOUS); authorizeService.addPolicy(context, b, Constants.READ, anonymous); } else { //- Inherit policies from the source bitstream authorizeService.inheritPolicies(context, source, b); } // fixme - set date? // we are overwriting, so remove old bitstream if (existingBitstream != null) { bundleService.removeBitstream(context, targetBundle, existingBitstream); } if (!isQuiet) { System.out.println("FILTERED: bitstream " + source.getID() + " (item: " + item.getHandle() + ") and created '" + newName + "'"); } //do post-processing of the generated bitstream formatFilter.postProcessBitstream(context, item, b); return true; } @Override public Item getCurrentItem() { return currentItem; } @Override public boolean inSkipList(String identifier) { if(skipList!=null && skipList.contains(identifier)) { if (!isQuiet) { System.out.println("SKIP-LIST: skipped bitstreams within identifier " + identifier); } return true; } else { return false; } } @Override public void setVerbose(boolean isVerbose) { this.isVerbose = isVerbose; } @Override public void setQuiet(boolean isQuiet) { this.isQuiet = isQuiet; } @Override public void setForce(boolean isForce) { this.isForce = isForce; } @Override public void setMax2Process(int max2Process) { this.max2Process = max2Process; } @Override public void setFilterClasses(List<FormatFilter> filterClasses) { this.filterClasses = filterClasses; } @Override public void setSkipList(List<String> skipList) { this.skipList = skipList; } @Override public void setFilterFormats(Map<String, List<String>> filterFormats) { this.filterFormats = filterFormats; } }