/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE file at the root of the source * tree and available online at * * https://github.com/keeps/roda */ package org.roda.core.plugins.plugins.base; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.roda.core.RodaCoreFactory; import org.roda.core.data.common.RodaConstants; import org.roda.core.data.common.RodaConstants.PreservationEventType; import org.roda.core.data.exceptions.InvalidParameterException; import org.roda.core.data.v2.LiteOptionalWithCause; import org.roda.core.data.v2.ip.AIP; import org.roda.core.data.v2.jobs.Job; import org.roda.core.data.v2.jobs.PluginParameter; import org.roda.core.data.v2.jobs.PluginParameter.PluginParameterType; import org.roda.core.data.v2.jobs.PluginType; import org.roda.core.data.v2.jobs.Report; import org.roda.core.index.IndexService; import org.roda.core.model.ModelService; import org.roda.core.plugins.AbstractPlugin; import org.roda.core.plugins.Plugin; import org.roda.core.plugins.PluginException; import org.roda.core.plugins.RODAObjectProcessingLogic; import org.roda.core.plugins.RODAProcessingLogic; import org.roda.core.plugins.orchestrate.SimpleJobPluginInfo; import org.roda.core.plugins.plugins.PluginHelper; import org.roda.core.storage.StorageService; import org.roda.core.storage.fs.FSUtils; import org.roda.core.util.IdUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class InventoryReportPlugin extends AbstractPlugin<AIP> { private static final Logger LOGGER = LoggerFactory.getLogger(InventoryReportPlugin.class); public static final String EXPORT_CSV_TEMP_FOLDER = "CSV"; public static final String CSV_FILE_FIELDS = "parameter.csv.file.fields"; public static final String CSV_FILE_OUTPUT = "parameter.csv.file.output"; public static final String CSV_FILE_HEADERS = "parameter.csv.file.headers"; public static final String CSV_FILE_OUTPUT_DATA = "parameter.csv.file.output.data"; public static final String CSV_FILE_OUTPUT_DESCRIPTIVE = "parameter.csv.file.output.descriptive"; public static final String CSV_FILE_OTHER_METADATA_TYPES = "parameter.csv.file.output.other"; public static final String CSV_FIELD_SIP_ID = "sipId"; public static final String CSV_FIELD_AIP_ID = "aipId"; public static final String CSV_FIELD_REPRESENTATION_ID = "representationId"; public static final String CSV_FIELD_FILE_PATH = "filePath"; public static final String CSV_FIELD_FILE_ID = "fileId"; public static final String CSV_FIELD_ISDIRECTORY = "isDirectory"; public static final String CSV_FIELD_CHECKSUM_SHA1 = "SHA-1"; public static final String CSV_FIELD_CHECKSUM_SHA256 = "SHA-256"; public static final String CSV_FIELD_CHECKSUM_MD5 = "MD5"; public static final String CSV_FILE_TYPE = "type"; public enum CSV_LINE_TYPE { DATA, METADATA_DESCRIPTIVE, METADATA_OTHER } protected static final List<String> CHECKSUM_ALGORITHMS = Arrays.asList(CSV_FIELD_CHECKSUM_MD5, CSV_FIELD_CHECKSUM_SHA1, CSV_FIELD_CHECKSUM_SHA256); public static final String CSV_DEFAULT_FIELDS = StringUtils.join(Arrays.asList(CSV_FIELD_SIP_ID, CSV_FIELD_AIP_ID, CSV_FIELD_REPRESENTATION_ID, CSV_FIELD_FILE_PATH, CSV_FIELD_FILE_ID, CSV_FIELD_ISDIRECTORY, CSV_FILE_TYPE, CSV_FIELD_CHECKSUM_SHA256, CSV_FIELD_CHECKSUM_MD5, CSV_FIELD_CHECKSUM_SHA1), ","); public static final String CSV_DEFAULT_OUTPUT = "/tmp/output.csv"; public static final String CSV_DEFAULT_HEADERS = "true"; public static final String CSV_DEFAULT_OTHER_METADATA = "tika,siegfried"; private List<String> fields = null; private Path output; private boolean enableHeaders; private boolean outputDataInformation; private boolean outputDescriptiveMetadataInformation; private List<String> otherMetadataTypes; private static Map<String, PluginParameter> pluginParameters = new HashMap<>(); // TODO -> add plugin parameter type "LIST"... static { pluginParameters.put(CSV_FILE_FIELDS, new PluginParameter(CSV_FILE_FIELDS, "Attributes to include in the report", PluginParameterType.STRING, CSV_DEFAULT_FIELDS, true, false, "List of file attributes to include in the inventory export. The example includes all the possible options. Remove attributes as necessary.")); pluginParameters.put(CSV_FILE_OUTPUT, new PluginParameter(CSV_FILE_OUTPUT, "Report file path", PluginParameterType.STRING, CSV_DEFAULT_OUTPUT, true, false, "The full path and file name on the server where the inventory report file should be created.")); pluginParameters.put(CSV_FILE_HEADERS, new PluginParameter(CSV_FILE_HEADERS, "Include header line", PluginParameterType.BOOLEAN, CSV_DEFAULT_HEADERS, true, false, "Include a header line in the CSV inventory report.")); pluginParameters.put(CSV_FILE_OUTPUT_DATA, new PluginParameter(CSV_FILE_OUTPUT_DATA, "Include data files", PluginParameterType.BOOLEAN, CSV_DEFAULT_HEADERS, true, false, "Include in the inventory report information about data files that exist inside AIPs.")); pluginParameters.put(CSV_FILE_OUTPUT_DESCRIPTIVE, new PluginParameter(CSV_FILE_OUTPUT_DESCRIPTIVE, "Include descriptive metadata files", PluginParameterType.BOOLEAN, CSV_DEFAULT_HEADERS, true, false, "Include in the inventory report information about descriptive metadata files that exist inside AIPs.")); pluginParameters.put(CSV_FILE_OTHER_METADATA_TYPES, new PluginParameter(CSV_FILE_OTHER_METADATA_TYPES, "Include other metadata files", PluginParameterType.STRING, CSV_DEFAULT_OTHER_METADATA, true, false, "Include in the inventory report information about other metadata files that exist inside AIPs.")); } @Override public void init() throws PluginException { // do nothing } @Override public void shutdown() { // do nothing } @Override public String getName() { return "Inventory report"; } @Override public String getDescription() { return "Creates a report in CSV format that includes a listing of all AIP and its inner files (data and metadata) which also includes some of " + "their technical properties (e.g. sipId, aipId, representationId, filePath, SHA-256, MD5, SHA-1). The report will be stored in a folder on " + "the server side as defined by the user. To obtain the report, one needs access to the storage layer of the repository server.\nThis report" + " may be used to validate the completeness and correctness of an ingest process."; } @Override public String getVersionImpl() { return "1.0"; } @Override public List<PluginParameter> getParameters() { ArrayList<PluginParameter> parameters = new ArrayList<>(); parameters.add(pluginParameters.get(CSV_FILE_FIELDS)); PluginParameter outputPluginParameter = pluginParameters.get(CSV_FILE_OUTPUT); SimpleDateFormat df = new SimpleDateFormat(RodaConstants.DEFAULT_DATETIME_FORMAT); String reportName = "inventory_report_" + df.format(new Date()) + ".csv"; outputPluginParameter.setDefaultValue(RodaCoreFactory.getReportsDirectory().resolve(reportName).toString()); parameters.add(outputPluginParameter); parameters.add(pluginParameters.get(CSV_FILE_HEADERS)); parameters.add(pluginParameters.get(CSV_FILE_OUTPUT_DATA)); parameters.add(pluginParameters.get(CSV_FILE_OUTPUT_DESCRIPTIVE)); parameters.add(pluginParameters.get(CSV_FILE_OTHER_METADATA_TYPES)); return parameters; } @Override public void setParameterValues(Map<String, String> parameters) throws InvalidParameterException { super.setParameterValues(parameters); if (parameters.containsKey(CSV_FILE_FIELDS)) { String fieldsSTR = parameters.get(CSV_FILE_FIELDS); if (fieldsSTR != null && !"".equals(fieldsSTR.trim())) { fields = new ArrayList<>(); fields.addAll(Arrays.asList(fieldsSTR.split(","))); } } if (parameters.containsKey(CSV_FILE_OUTPUT)) { try { output = Paths.get(parameters.get(CSV_FILE_OUTPUT)); Path parent = output.getParent(); Files.createDirectories(parent); } catch (IOException e) { LOGGER.error("Error creating output parent folder.", e); } } if (parameters.containsKey(CSV_FILE_HEADERS)) { enableHeaders = Boolean.parseBoolean(parameters.get(CSV_FILE_HEADERS)); } if (parameters.containsKey(CSV_FILE_OUTPUT_DATA)) { outputDataInformation = Boolean.parseBoolean(parameters.get(CSV_FILE_OUTPUT_DATA)); } if (parameters.containsKey(CSV_FILE_OUTPUT_DESCRIPTIVE)) { outputDescriptiveMetadataInformation = Boolean.parseBoolean(parameters.get(CSV_FILE_OUTPUT_DESCRIPTIVE)); } if (parameters.containsKey(CSV_FILE_OTHER_METADATA_TYPES)) { String otherMetadataSTR = parameters.get(CSV_FILE_OTHER_METADATA_TYPES); if (otherMetadataSTR != null && !"".trim().equalsIgnoreCase(otherMetadataSTR)) { otherMetadataTypes = new ArrayList<>(); otherMetadataTypes.addAll(Arrays.asList(otherMetadataSTR.split(","))); } } } @Override public Report execute(IndexService index, ModelService model, StorageService storage, List<LiteOptionalWithCause> liteList) throws PluginException { final CSVPrinter csvFilePrinter = createCSVPrinter(); return PluginHelper.processObjects(this, new RODAObjectProcessingLogic<AIP>() { @Override public void process(IndexService index, ModelService model, StorageService storage, Report report, Job cachedJob, SimpleJobPluginInfo jobPluginInfo, Plugin<AIP> plugin, AIP object) { processAIP(model, storage, jobPluginInfo, csvFilePrinter, object); } }, new RODAProcessingLogic<AIP>() { @Override public void process(IndexService index, ModelService model, StorageService storage, Report report, Job cachedJob, SimpleJobPluginInfo jobPluginInfo, Plugin<AIP> plugin) { IOUtils.closeQuietly(csvFilePrinter); } }, index, model, storage, liteList); } private CSVPrinter createCSVPrinter() { Path jobCSVTempFolder = getJobCSVTempFolder(); Path csvTempFile = jobCSVTempFolder.resolve(IdUtils.createUUID() + ".csv"); CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator("\n"); try (BufferedWriter fileWriter = Files.newBufferedWriter(csvTempFile)) { return new CSVPrinter(fileWriter, csvFileFormat); } catch (IOException e) { LOGGER.error("Unable to instantiate CSVPrinter", e); return null; } } private void processAIP(ModelService model, StorageService storage, SimpleJobPluginInfo jobPluginInfo, CSVPrinter csvFilePrinter, AIP aip) { if (csvFilePrinter == null) { LOGGER.warn("CSVPrinter is NULL! Skipping..."); return; } try { if (outputDataInformation && aip.getRepresentations() != null) { List<List<String>> dataInformation = InventoryReportPluginUtils.getDataInformation(fields, aip, model, storage); csvFilePrinter.printRecords(dataInformation); } if (outputDescriptiveMetadataInformation && aip.getDescriptiveMetadata() != null) { List<List<String>> dataInformation = InventoryReportPluginUtils.getDescriptiveMetadataInformation(fields, aip, model, storage); csvFilePrinter.printRecords(dataInformation); } if (otherMetadataTypes != null && !otherMetadataTypes.isEmpty()) { for (String otherMetadataType : otherMetadataTypes) { List<List<String>> otherMetadataInformation = InventoryReportPluginUtils.getOtherMetadataInformation(fields, otherMetadataType, aip, model, storage); csvFilePrinter.printRecords(otherMetadataInformation); } } jobPluginInfo.incrementObjectsProcessedWithSuccess(); } catch (IOException e) { jobPluginInfo.incrementObjectsProcessedWithFailure(); } } @Override public Report beforeAllExecute(IndexService index, ModelService model, StorageService storage) throws PluginException { try { Path jobCSVTempFolder = getJobCSVTempFolder(); Files.createDirectories(jobCSVTempFolder); } catch (IOException e) { LOGGER.error("Error while creating plugin working dir", e); } try { Path reportsFolder = RodaCoreFactory.getRodaHomePath().resolve(RodaConstants.CORE_REPORT_FOLDER); if (FSUtils.exists(reportsFolder)) { Files.createDirectories(reportsFolder); } } catch (IOException e) { LOGGER.error("Error while creating report dir", e); } return new Report(); } private Path getJobCSVTempFolder() { Path wd = RodaCoreFactory.getWorkingDirectory(); Path csvExportTempFolder = wd.resolve(InventoryReportPlugin.EXPORT_CSV_TEMP_FOLDER); return csvExportTempFolder.resolve(PluginHelper.getJobId(this)); } @Override public Report afterAllExecute(IndexService index, ModelService model, StorageService storage) throws PluginException { CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator("\n"); Path csvTempFolder = getJobCSVTempFolder(); if (csvTempFolder != null) { List<Path> partials = new ArrayList<>(); try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(csvTempFolder); FileWriter fileWriter = new FileWriter(output.toFile()); CSVPrinter csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);) { if (enableHeaders) { csvFilePrinter.printRecord(fields); } for (Path path : directoryStream) { partials.add(path); } } catch (IOException e) { LOGGER.error("Error while merging partial CSVs", e); } try { InventoryReportPluginUtils.mergeFiles(partials, output); FSUtils.deletePathQuietly(csvTempFolder); } catch (IOException e) { LOGGER.error("Error while merging partial CSVs", e); } } return new Report(); } @Override public Plugin<AIP> cloneMe() { return new InventoryReportPlugin(); } @Override public PluginType getType() { return PluginType.MISC; } @Override public boolean areParameterValuesValid() { return true; } @Override public PreservationEventType getPreservationEventType() { return PreservationEventType.VALIDATION; } @Override public String getPreservationEventDescription() { return "Created a report in CSV format"; } @Override public String getPreservationEventSuccessMessage() { return "Created a report in CSV format successfully"; } @Override public String getPreservationEventFailureMessage() { return "Create of a report in CSV format failed"; } @Override public List<String> getCategories() { return Arrays.asList(RodaConstants.PLUGIN_CATEGORY_MANAGEMENT); } @Override public List<Class<AIP>> getObjectClasses() { return Arrays.asList(AIP.class); } }