package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataFileIO; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import java.io.IOException; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.ArrayList; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; /** * Deletes a data file, both DB entity and filesystem object. * * @author michael */ @RequiredPermissions(Permission.EditDataset) public class DeleteDataFileCommand extends AbstractVoidCommand { private static final Logger logger = Logger.getLogger(DeleteDataFileCommand.class.getCanonicalName()); private final DataFile doomed; private final boolean destroy; public DeleteDataFileCommand(DataFile doomed, DataverseRequest aRequest) { this(doomed, aRequest, false); } public DeleteDataFileCommand(DataFile doomed, DataverseRequest aRequest, boolean destroy) { super(aRequest, doomed.getOwner()); this.doomed = doomed; this.destroy = destroy; } @Override protected void executeImpl(CommandContext ctxt) throws CommandException { if (destroy) { //todo: clean this logic up! //for now, if called as destroy, will check for superuser acess if ( doomed.getOwner().isReleased() && (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { throw new PermissionException("Destroy can only be called by superusers.", this, Collections.singleton(Permission.DeleteDatasetDraft), doomed); } } else { // since this is not a destroy, we want to make sure the file is a draft // we'll do three sanity checks // 1. confirm the file is not released // 2. confirm the file is only attached to one version (i.e. only has one fileMetadata) // 3. confirm that version is not released if (doomed.isReleased() || doomed.getFileMetadatas().size() > 1 || doomed.getFileMetadata().getDatasetVersion().isReleased()) { throw new CommandException("Cannot delete file: the DataFile is published, is attached to more than one Dataset Version, or is attached to a released Dataset Version.", this); } } // We need to delete a bunch of files from the file system; // First we try to delete the data file itself; if that // fails, we throw an exception and abort the command without // trying to remove the object from the database: // TODO: !! // The code below assumes all the files are stored locally, on the filesystem! // -- L.A. 4.0.2 logger.log(Level.FINE, "Delete command called on an unpublished DataFile {0}", doomed.getId()); String fileSystemName = doomed.getStorageIdentifier(); logger.log(Level.FINE, "Storage identifier for the file: {0}", fileSystemName); DataFileIO dataAccess = null; try { dataAccess = doomed.getAccessObject(); } catch (IOException ioex) { throw new CommandExecutionException("Failed to initialize physical access driver.", ioex, this); } if (dataAccess != null) { // If this is a local file, we only want to attempt to delete it // if it actually exists on the filesystem: // TODO: // add a generic .exists() method to the dataAccess object. // -- L.A. 4.0 boolean physicalFileExists = false; if (dataAccess.isLocalFile()) { try { if (dataAccess.getFileSystemPath() != null && dataAccess.getFileSystemPath().toFile() != null && dataAccess.getFileSystemPath().toFile().exists()) { physicalFileExists = true; } } catch (IOException ioex) { physicalFileExists = true; } } if (physicalFileExists || (!dataAccess.isLocalFile())) { try { dataAccess.delete(); } catch (IOException ex) { throw new CommandExecutionException("Error deleting physical file object while deleting DataFile " + doomed.getId() + " from the database.", ex, this); } } logger.log(Level.FINE, "Successfully deleted physical storage object (file) for the DataFile {0}", doomed.getId()); // Destroy the dataAccess object - we will need to purge the // DataFile from the database (below), so we don't want to have any // objects in this transaction that reference it: dataAccess = null; // We may also have a few extra files associated with this object - // preserved original that was used in the tabular data ingest, // cached R data frames, image thumbnails, etc. // We need to delete these too; failures however are less // important with these. If we fail to delete any of these // auxiliary files, we'll just leave an error message in the // log file and proceed deleting the database object. // Note that the assumption here is that all these auxiliary // files - saved original, cached format conversions, etc., are // all stored on the physical filesystem locally. // TODO: revisit and review this assumption! -- L.A. 4.0 List<Path> victims = new ArrayList<>(); // 1. preserved original: Path filePath = doomed.getSavedOriginalFile(); if (filePath != null) { victims.add(filePath); } // 2. Cached files: victims.addAll(listCachedFiles(doomed)); // Delete them all: List<String> failures = new ArrayList<>(); for (Path deadFile : victims) { try { logger.log(Level.FINE, "Deleting cached file {0}", deadFile.toString()); Files.delete(deadFile); } catch (IOException ex) { failures.add(deadFile.toString()); } } if (!failures.isEmpty()) { String failedFiles = StringUtils.join(failures, ","); Logger.getLogger(DeleteDataFileCommand.class.getName()).log(Level.SEVERE, "Error deleting physical file(s) {0} while deleting DataFile {1}", new Object[]{failedFiles, doomed.getName()}); } DataFile doomedAndMerged = ctxt.em().merge(doomed); ctxt.em().remove(doomedAndMerged); /** * @todo consider adding an em.flush here (despite the performance * impact) if you need to operate on the dataset below. Without the * flush, the dataset still thinks it has the file that was just * deleted. */ // ctxt.em().flush(); /** * We *could* re-index the entire dataset but it's more efficient to * target individual files for deletion, which should always be * drafts. * * See also https://redmine.hmdc.harvard.edu/issues/3786 */ String indexingResult = ctxt.index().removeSolrDocFromIndex(IndexServiceBean.solrDocIdentifierFile + doomed.getId() + "_draft"); /** * @todo check indexing result for success or failure. Really, we * need an indexing queuing system: * https://redmine.hmdc.harvard.edu/issues/3643 */ } } private List<Path> listCachedFiles(DataFile dataFile) { List<Path> victims = new ArrayList<>(); // cached files for a given datafiles are stored on the filesystem // as <filesystemname>.*; for example, <filename>.thumb64 or // <filename>.RData. final String baseName = dataFile.getStorageIdentifier(); if (baseName == null || baseName.equals("")) { return null; } Path datasetDirectory = dataFile.getOwner().getFileSystemDirectory(); DirectoryStream.Filter<Path> filter = new DirectoryStream.Filter<Path>() { @Override public boolean accept(Path file) throws IOException { return (file.getFileName() != null && file.getFileName().toString().startsWith(baseName + ".")); } }; try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(datasetDirectory, filter)) { for (Path filePath : dirStream) { victims.add(filePath); } } catch (IOException ex) { } return victims; } }