/***************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ****************************************************************************/ package org.apache.padaf.preflight.helpers; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.padaf.preflight.DocumentHandler; import org.apache.padaf.preflight.ValidationConstants; import org.apache.padaf.preflight.ValidationException; import org.apache.padaf.preflight.ValidationResult.ValidationError; import org.apache.padaf.preflight.ValidatorConfig; import org.apache.padaf.preflight.utils.COSUtils; import org.apache.padaf.preflight.xmp.PDFAIdentificationValidation; import org.apache.padaf.preflight.xmp.RDFAboutAttributeConcordanceValidation; import org.apache.padaf.preflight.xmp.RDFAboutAttributeConcordanceValidation.DifferentRDFAboutException; import org.apache.padaf.preflight.xmp.SynchronizedMetaDataValidation; import org.apache.padaf.preflight.xmp.XpacketParsingException; import org.apache.padaf.preflight.xmp.RDFAboutAttributeConcordanceValidation.DifferentRDFAboutException; import org.apache.padaf.xmpbox.XMPMetadata; import org.apache.padaf.xmpbox.parser.PDFAExtentionSchemaPreprocessor; import org.apache.padaf.xmpbox.parser.XMPDocumentBuilder; import org.apache.padaf.xmpbox.parser.XmpExpectedRdfAboutAttribute; import org.apache.padaf.xmpbox.parser.XmpParsingException; import org.apache.padaf.xmpbox.parser.XmpPropertyFormatException; import org.apache.padaf.xmpbox.parser.XmpRequiredPropertyException; import org.apache.padaf.xmpbox.parser.XmpSchemaException; import org.apache.padaf.xmpbox.parser.XmpUnexpectedNamespacePrefixException; import org.apache.padaf.xmpbox.parser.XmpUnexpectedNamespaceURIException; import org.apache.padaf.xmpbox.parser.XmpUnknownPropertyException; import org.apache.padaf.xmpbox.parser.XmpUnknownSchemaException; import org.apache.padaf.xmpbox.parser.XmpUnknownValueTypeException; import org.apache.padaf.xmpbox.parser.XmpXpacketEndException; import org.apache.padaf.xmpbox.type.BadFieldValueException; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.common.PDStream; public class MetadataValidationHelper extends AbstractValidationHelper { public MetadataValidationHelper(ValidatorConfig cfg) throws ValidationException { super(cfg); } /** * Return the xpacket from the dictionary's stream */ public static byte[] getXpacket(COSDocument cdocument) throws IOException, XpacketParsingException { COSObject catalog = cdocument.getCatalog(); COSBase cb = catalog.getDictionaryObject(COSName.METADATA); if (cb == null) { // missing Metadata Key in catalog ValidationError error = new ValidationError( ValidationConstants.ERROR_METADATA_FORMAT, "Missing Metadata Key in catalog"); throw new XpacketParsingException("Failed while retrieving xpacket", error); } // no filter key COSDictionary metadataDictionnary = COSUtils.getAsDictionary(cb, cdocument); if (metadataDictionnary.getItem(COSName.FILTER) != null) { // should not be defined ValidationError error = new ValidationError( ValidationConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER, "Filter specified in metadata dictionnary"); throw new XpacketParsingException("Failed while retrieving xpacket", error); } PDStream stream = PDStream.createFromCOS(metadataDictionnary); ByteArrayOutputStream bos = new ByteArrayOutputStream(); InputStream is = stream.createInputStream(); IOUtils.copy(is, bos); is.close(); bos.close(); return bos.toByteArray(); } public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException { try { PDDocument document = handler.getDocument(); byte[] tmp = getXpacket(document.getDocument()); XMPDocumentBuilder builder; try { builder = new XMPDocumentBuilder(); builder.addPreprocessor(new PDFAExtentionSchemaPreprocessor()); } catch (XmpSchemaException e1) { throw new ValidationException(e1.getMessage(), e1); } XMPMetadata metadata; try { metadata = builder.parse(tmp); handler.setMetadata(metadata); } catch (XmpSchemaException e) { throw new ValidationException( "Parser: Internal Problem (failed to instanciate Schema object)", e); } catch (XmpXpacketEndException e) { throw new ValidationException("Unable to parse font metadata due to : " + e.getMessage(), e); } List<ValidationError> lve = new ArrayList<ValidationError>(); // 6.7.5 no deprecated attribute in xpacket processing instruction if (metadata.getXpacketBytes() != null) { lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_XPACKET_DEPRECATED, "bytes attribute is forbidden")); } if (metadata.getXpacketEncoding() != null) { lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_XPACKET_DEPRECATED, "encoding attribute is forbidden")); } // Call metadata synchronization checking lve.addAll(new SynchronizedMetaDataValidation() .validateMetadataSynchronization(document, metadata)); // Call PDF/A Identifier checking lve.addAll(new PDFAIdentificationValidation() .validatePDFAIdentifer(metadata)); // Call rdf:about checking try { new RDFAboutAttributeConcordanceValidation() .validateRDFAboutAttributes(metadata); } catch (DifferentRDFAboutException e) { lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_INEQUAL_VALUE, e .getMessage())); } return lve; } catch (XpacketParsingException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); if (e.getError() != null) { lve.add(e.getError()); } else { lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MAIN, "Unexpected error")); } return lve; } catch (XmpPropertyFormatException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_PROPERTY_FORMAT, e.getMessage())); return lve; } catch (BadFieldValueException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_CATEGORY_PROPERTY_INVALID ,e.getMessage())); return lve; } catch (XmpExpectedRdfAboutAttribute e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_MISSING ,e.getMessage())); return lve; } catch (XmpUnknownPropertyException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_PROPERTY_UNKNOWN, e.getMessage())); return lve; } catch (XmpUnknownSchemaException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA, e .getMessage())); return lve; } catch (XmpUnexpectedNamespaceURIException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_WRONG_NS_URI, e.getMessage())); return lve; } catch (XmpUnexpectedNamespacePrefixException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA, e .getMessage())); return lve; } catch (XmpRequiredPropertyException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError( ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, e.getMessage())); return lve; } catch (XmpUnknownValueTypeException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve .add(new ValidationError( ValidationConstants.ERROR_METADATA_UNKNOWN_VALUETYPE, e .getMessage())); return lve; } catch (XmpParsingException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_FORMAT, e .getMessage())); return lve; } catch (IOException e) { throw new ValidationException("Failed while validating", e); } } /** * Check if metadata dictionary has no stream filter * * @param doc * @return */ protected List<ValidationError> checkStreamFilterUsage(PDDocument doc) { List<ValidationError> ve = new ArrayList<ValidationError>(); List<?> filters = doc.getDocumentCatalog().getMetadata().getFilters(); if (filters != null && !filters.isEmpty()) { ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MAIN, "Using stream filter on metadata dictionary is forbidden")); } return ve; } }