/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE file at the root of the source * tree and available online at * * https://github.com/keeps/roda */ package org.roda.core.common.validation; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import javax.xml.parsers.SAXParserFactory; import javax.xml.transform.Source; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.Validator; import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.BOMInputStream; import org.apache.commons.lang3.StringUtils; import org.roda.core.RodaCoreFactory; import org.roda.core.common.RodaEntityResolver; import org.roda.core.data.common.RodaConstants; import org.roda.core.data.exceptions.AuthorizationDeniedException; import org.roda.core.data.exceptions.GenericException; import org.roda.core.data.exceptions.NotFoundException; import org.roda.core.data.exceptions.RequestNotValidException; import org.roda.core.data.v2.ip.StoragePath; import org.roda.core.data.v2.ip.metadata.DescriptiveMetadata; import org.roda.core.data.v2.ip.metadata.PreservationMetadata; import org.roda.core.data.v2.validation.ValidationException; import org.roda.core.data.v2.validation.ValidationIssue; import org.roda.core.data.v2.validation.ValidationReport; import org.roda.core.model.ModelService; import org.roda.core.model.utils.ModelUtils; import org.roda.core.storage.Binary; import org.roda.core.storage.ContentPayload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; /** * * Set of validation methods (XML, etc) * * <p> * 1) When the following error appears (or something very similar), it means * that there is one or more xml schemas/dtds mentioned in the main xml schema * (the one being used to validate some xml) that are not accessible. This may * be due to lack of internet connection or local files that actually don't * exist. * </p> * <code>org.xml.sax.SAXParseException: src-resolve: Cannot resolve the name 'xml:lang' to a(n) 'attribute declaration' component.</code> * */ public class ValidationUtils { private static final Logger LOGGER = LoggerFactory.getLogger(ValidationUtils.class); /** Private empty constructor */ private ValidationUtils() { // do nothing } public static ValidationReport isAIPMetadataValid(boolean forceDescriptiveMetadataType, boolean validateDescriptiveMetadata, String fallbackMetadataType, String fallbackMetadataVersion, ModelService model, String aipId) throws GenericException, RequestNotValidException, AuthorizationDeniedException, NotFoundException, ValidationException { ValidationReport report = new ValidationReport(); report.setValid(true); List<DescriptiveMetadata> descriptiveMetadata = model.retrieveAIP(aipId).getDescriptiveMetadata(); for (DescriptiveMetadata dm : descriptiveMetadata) { StoragePath storagePath = ModelUtils.getDescriptiveMetadataStoragePath(dm); Binary binary = model.getStorage().getBinary(storagePath); if (forceDescriptiveMetadataType) { if (validateDescriptiveMetadata) { ValidationReport dmReport = validateDescriptiveBinary(binary.getContent(), fallbackMetadataType, fallbackMetadataVersion, false); consolidateReports(report, dmReport); } // XXX review why should a validation method update data Map<String, String> properties = new HashMap<>(); properties.put(RodaConstants.VERSION_ACTION, RodaConstants.VersionAction.METADATA_TYPE_FORCED.toString()); model.updateDescriptiveMetadata(aipId, dm.getId(), binary.getContent(), fallbackMetadataType, fallbackMetadataVersion, properties); report.setValid(true); LOGGER.debug("{} valid for metadata type {}", storagePath, fallbackMetadataType); } else if (validateDescriptiveMetadata) { String metadataType = dm.getType() != null ? dm.getType() : fallbackMetadataType; String metadataVersion = dm.getType() != null ? dm.getVersion() : fallbackMetadataVersion; ValidationReport dmReport = validateDescriptiveBinary(binary.getContent(), metadataType, metadataVersion, false); consolidateReports(report, dmReport); } } // TODO handle premis... return report; } public static ValidationReport consolidateReports(ValidationReport mainReport, ValidationReport innerReport) { mainReport.setValid(mainReport.isValid() && innerReport.isValid()); if (StringUtils.isNotBlank(mainReport.getMessage())) { mainReport.setMessage(mainReport.getMessage() + "\n" + innerReport.getMessage()); } else { mainReport.setMessage(innerReport.getMessage()); } mainReport.getIssues().addAll(innerReport.getIssues()); return mainReport; } public static ValidationReport isXMLValid(ContentPayload xmlPayload) { ValidationReport ret = new ValidationReport(); SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(false); factory.setNamespaceAware(true); RodaErrorHandler errorHandler = new RodaErrorHandler(); try (Reader reader = new InputStreamReader(new BOMInputStream(xmlPayload.createInputStream()))) { XMLReader xmlReader = XMLReaderFactory.createXMLReader(); xmlReader.setEntityResolver(new RodaEntityResolver()); InputSource inputSource = new InputSource(reader); xmlReader.setErrorHandler(errorHandler); xmlReader.parse(inputSource); ret.setValid(errorHandler.getErrors().isEmpty()); for (SAXParseException saxParseException : errorHandler.getErrors()) { ret.addIssue(convertSAXParseException(saxParseException)); } } catch (SAXException e) { ret.setValid(false); for (SAXParseException saxParseException : errorHandler.getErrors()) { ret.addIssue(convertSAXParseException(saxParseException)); } } catch (IOException e) { ret.setValid(false); ret.setMessage(e.getMessage()); } return ret; } /** * Validates all descriptive metadata files contained in the AIP * * @throws AuthorizationDeniedException * @throws NotFoundException * @throws RequestNotValidException * @throws GenericException * * @throws ValidationException */ public static ValidationReport isAIPDescriptiveMetadataValid(ModelService model, String aipId, boolean failIfNoSchema) throws GenericException, RequestNotValidException, NotFoundException, AuthorizationDeniedException { boolean valid = true; List<ValidationIssue> issues = new ArrayList<>(); List<DescriptiveMetadata> descriptiveMetadata = model.retrieveAIP(aipId).getDescriptiveMetadata(); for (DescriptiveMetadata dm : descriptiveMetadata) { ValidationReport report = isDescriptiveMetadataValid(model, dm, failIfNoSchema); valid &= report.isValid(); issues.addAll(report.getIssues()); } ValidationReport ret = new ValidationReport(); ret.setValid(valid); ret.setIssues(issues); return ret; } /** * Validates descriptive medatada (e.g. against its schema, but other * strategies may be used) * * @param failIfNoSchema * @throws AuthorizationDeniedException * @throws NotFoundException * @throws RequestNotValidException * @throws GenericException * @throws ValidationException */ public static ValidationReport isDescriptiveMetadataValid(ModelService model, DescriptiveMetadata metadata, boolean failIfNoSchema) throws GenericException, RequestNotValidException, NotFoundException, AuthorizationDeniedException { ValidationReport ret; if (metadata != null) { StoragePath storagePath = ModelUtils.getDescriptiveMetadataStoragePath(metadata.getAipId(), metadata.getRepresentationId(), metadata.getId()); Binary binary = model.getStorage().getBinary(storagePath); ret = validateDescriptiveBinary(binary.getContent(), metadata.getType(), metadata.getVersion(), failIfNoSchema); } else { ret = new ValidationReport(); ret.setValid(false); ret.setMessage("Metadata is NULL"); } return ret; } private static ValidationIssue convertSAXParseException(SAXParseException e) { ValidationIssue issue = new ValidationIssue(); issue.setMessage(e.getMessage()); issue.setLineNumber(e.getLineNumber()); issue.setColumnNumber(e.getColumnNumber()); return issue; } /** * Validates preservation medatada (e.g. against its schema, but other * strategies may be used) * * @param failIfNoSchema * @throws AuthorizationDeniedException * @throws NotFoundException * @throws RequestNotValidException * @throws GenericException * @throws ValidationException */ public static ValidationReport isPreservationMetadataValid(ModelService model, PreservationMetadata metadata, boolean failIfNoSchema) throws GenericException, RequestNotValidException, NotFoundException, AuthorizationDeniedException { StoragePath storagePath = ModelUtils.getPreservationMetadataStoragePath(metadata); Binary binary = model.getStorage().getBinary(storagePath); return validatePreservationBinary(binary, failIfNoSchema); } /** * Validates descriptive medatada (e.g. against its schema, but other * strategies may be used) * * @param descriptiveMetadataType * * @param failIfNoSchema * @throws ValidationException */ public static ValidationReport validateDescriptiveBinary(ContentPayload descriptiveMetadataPayload, String descriptiveMetadataType, String descriptiveMetadataVersion, boolean failIfNoSchema) { ValidationReport ret = new ValidationReport(); InputStream inputStream = null; Optional<Schema> xmlSchema = RodaCoreFactory.getRodaSchema(descriptiveMetadataType, descriptiveMetadataVersion); try { if (xmlSchema.isPresent()) { RodaErrorHandler errorHandler = new RodaErrorHandler(); try (InputStreamReader inputStreamReader = new InputStreamReader( new BOMInputStream(descriptiveMetadataPayload.createInputStream()))) { XMLReader xmlReader = XMLReaderFactory.createXMLReader(); xmlReader.setEntityResolver(new RodaEntityResolver()); InputSource inputSource = new InputSource(inputStreamReader); Source source = new SAXSource(xmlReader, inputSource); Validator validator = xmlSchema.get().newValidator(); validator.setErrorHandler(errorHandler); validator.validate(source); ret.setValid(errorHandler.getErrors().isEmpty()); for (SAXParseException saxParseException : errorHandler.getErrors()) { ret.addIssue(convertSAXParseException(saxParseException)); } } catch (SAXException e) { LOGGER.debug("Error validating descriptive binary " + descriptiveMetadataType, e); ret.setValid(false); for (SAXParseException saxParseException : errorHandler.getErrors()) { ret.addIssue(convertSAXParseException(saxParseException)); } } } else { if (failIfNoSchema) { LOGGER.error( "Will fail validating descriptive metadata with type '{}' and version '{}' because couldn't find its schema", descriptiveMetadataType, descriptiveMetadataVersion); ret.setValid(false); ret.setMessage("No schema to validate " + descriptiveMetadataType); } else { LOGGER.debug("Found no schema do validate descriptive metadata but will try to validate XML syntax..."); ret = isXMLValid(descriptiveMetadataPayload); } } } catch (IOException e) { LOGGER.error("Error validating descriptive metadata", e); ret.setValid(false); ret.setMessage(e.getMessage()); } finally { IOUtils.closeQuietly(inputStream); } return ret; } /** * Validates preservation medatada (e.g. against its schema, but other * strategies may be used) * * @param failIfNoSchema * * @param descriptiveMetadataId * * @param failIfNoSchema * @throws ValidationException */ public static ValidationReport validatePreservationBinary(Binary binary, boolean failIfNoSchema) { ValidationReport report = new ValidationReport(); InputStream inputStream = null; try { Optional<Schema> xmlSchema = RodaCoreFactory.getRodaSchema("premis-v2-0", null); if (xmlSchema.isPresent()) { inputStream = binary.getContent().createInputStream(); Source xmlFile = new StreamSource(inputStream); Validator validator = xmlSchema.get().newValidator(); RodaErrorHandler errorHandler = new RodaErrorHandler(); validator.setErrorHandler(errorHandler); try { validator.validate(xmlFile); report.setValid(errorHandler.getErrors().isEmpty()); for (SAXParseException saxParseException : errorHandler.getErrors()) { report.addIssue(convertSAXParseException(saxParseException)); } } catch (SAXException e) { LOGGER.error("Error validating preservation binary " + binary.getStoragePath(), e); report.setValid(false); for (SAXParseException saxParseException : errorHandler.getErrors()) { report.addIssue(convertSAXParseException(saxParseException)); } } } else if (failIfNoSchema) { report.setValid(false); report.setMessage("No schema to validate PREMIS"); } } catch (IOException e) { report.setValid(false); report.setMessage(e.getMessage()); } finally { IOUtils.closeQuietly(inputStream); } return report; } private static class RodaErrorHandler extends DefaultHandler { List<SAXParseException> errors; public RodaErrorHandler() { errors = new ArrayList<>(); } @Override public void warning(SAXParseException e) throws SAXException { errors.add(e); } @Override public void error(SAXParseException e) throws SAXException { errors.add(e); } @Override public void fatalError(SAXParseException e) throws SAXException { errors.add(e); } public List<SAXParseException> getErrors() { return errors; } @SuppressWarnings("unused") public void setErrors(List<SAXParseException> errors) { this.errors = errors; } } }