/******************************************************************************* * Copyright 2010 Atos Worldline SAS * * Licensed by Atos Worldline SAS under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * Atos Worldline SAS licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package net.padaf.preflight.xmp; import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; import java.util.Iterator; import java.util.List; import net.padaf.preflight.ValidationConstants; import net.padaf.preflight.ValidationException; import net.padaf.preflight.ValidationResult.ValidationError; import net.padaf.xmpbox.XMPMetadata; import net.padaf.xmpbox.parser.DateConverter; import net.padaf.xmpbox.schema.AdobePDFSchema; import net.padaf.xmpbox.schema.DublinCoreSchema; import net.padaf.xmpbox.schema.XMPBasicSchema; import net.padaf.xmpbox.type.AbstractField; import net.padaf.xmpbox.type.TextType; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; /** * Class which check if document information available in a document are * synchronized with XMP * * @author Germain Costenobel * */ public class SynchronizedMetaDataValidation { /** * Analyze if Title embedded in Document Information dictionary and in XMP * properties are synchronized * * @param dico * Document Information Dictionary * @param dc * Dublin Core Schema * @param ve * The list of validation errors */ protected void analyzeTitleProperty(PDDocumentInformation dico, DublinCoreSchema dc, List<ValidationError> ve) { String title = dico.getTitle(); if (title != null) { if (dc != null) { // Check the x-default value, if not found, check with the first value // found if (dc.getTitle() != null) { if (dc.getTitleValue("x-default") != null) { if (!dc.getTitleValue("x-default").equals(title)) { ve.add(unsynchronizedMetaDataError("Title")); } } else { // This search of first value is made just to keep compatibility // with lot of PDF documents // which use title without lang definition // REM : MAY we have to delete this option in the future Iterator<AbstractField> it = dc.getTitle().getContainer() .getAllProperties().iterator(); if (it.hasNext()) { AbstractField tmp = it.next(); if (tmp instanceof TextType) { if (!((TextType) tmp).getStringValue().equals(title)) { ve.add(unsynchronizedMetaDataError("Title")); } } else { ve.add(AbsentXMPPropertyError("Title", "Property is badly defined")); } } else { ve .add(AbsentXMPPropertyError("Title", "Property is not defined")); } } } else { ve.add(AbsentXMPPropertyError("Title", "Property is not defined")); } } else { ve.add(AbsentSchemaMetaDataError("Title", "Dublin Core")); } } } /** * Analyze if Author(s) embedded in Document Information dictionary and in XMP * properties are synchronized * * @param dico * Document Information Dictionary * @param dc * Dublin Core Schema * @param ve * The list of validation errors */ protected void analyzeAuthorProperty(PDDocumentInformation dico, DublinCoreSchema dc, List<ValidationError> ve) { String author = dico.getAuthor(); if (author != null) { if (dc != null) { if (dc.getCreator() != null) { if (dc.getCreatorValue().size() != 1) { ve .add(AbsentXMPPropertyError( "Author", "In XMP metadata, Author(s) must be represented by a single entry in a text array (dc:creator) ")); } else { if (dc.getCreatorValue().get(0) == null) { ve.add(AbsentXMPPropertyError("Author", "Property is defined as null")); } else { if (!dc.getCreatorValue().get(0).equals(author)) { ve.add(unsynchronizedMetaDataError("Author")); } } } } else { ve.add(AbsentXMPPropertyError("Author", "Property is not defined in XMP Metadata")); } } else { ve.add(AbsentSchemaMetaDataError("Author", "Dublin Core")); } } } /** * Analyze if Subject(s) embedded in Document Information dictionary and in * XMP properties are synchronized * * @param dico * Document Information Dictionary * @param dc * Dublin Core Schema * @param ve * The list of validation errors */ protected void analyzeSubjectProperty(PDDocumentInformation dico, DublinCoreSchema dc, List<ValidationError> ve) { String subject = dico.getSubject(); if (subject != null) { if (dc != null) { // PDF/A Conformance Erratum (2007) specifies XMP Subject // as a Text type embedded in the dc:description["x-default"]. if (dc.getDescription() != null) { if (dc.getDescriptionValue("x-default") == null) { ve .add(AbsentXMPPropertyError("Subject", "Subject not found in XMP (dc:description[\"x-default\"] not found)")); } else { if (!dc.getDescriptionValue("x-default").equals(subject)) { ve.add(unsynchronizedMetaDataError("Subject")); } } } else { ve.add(AbsentXMPPropertyError("Subject", "Property is defined as null")); } } else { ve.add(AbsentSchemaMetaDataError("Subject", "Dublin Core")); } } } /** * Analyze if Keyword(s) embedded in Document Information dictionary and in * XMP properties are synchronized * * @param dico * Document Information Dictionary * @param pdf * PDF Schema * @param ve * The list of validation errors */ protected void analyzeKeywordsProperty(PDDocumentInformation dico, AdobePDFSchema pdf, List<ValidationError> ve) { String keyword = dico.getKeywords(); if (keyword != null) { if (pdf != null) { if (pdf.getKeywords() == null) { ve.add(AbsentXMPPropertyError("Keywords", "Property is not defined")); } else { if (!pdf.getKeywordsValue().equals(keyword)) { ve.add(unsynchronizedMetaDataError("Keywords")); } } } else { ve.add(AbsentSchemaMetaDataError("Keywords", "PDF")); } } } /** * Analyze if Producer embedded in Document Information dictionary and in XMP * properties are synchronized * * @param dico * Document Information Dictionary * @param pdf * PDF Schema * @param ve * The list of validation errors */ protected void analyzeProducerProperty(PDDocumentInformation dico, AdobePDFSchema pdf, List<ValidationError> ve) { String producer = dico.getProducer(); if (producer != null) { if (pdf != null) { if (pdf.getProducer() == null) { ve.add(AbsentXMPPropertyError("Producer", "Property is not defined")); } else { if (!pdf.getProducerValue().equals(producer)) { ve.add(unsynchronizedMetaDataError("Producer")); } } } else { ve.add(AbsentSchemaMetaDataError("Producer", "PDF")); } } } /** * Analyze if the creator tool embedded in Document Information dictionary and * in XMP properties are synchronized * * @param dico * Document Information Dictionary * @param xmp * XMP Basic Schema * @param ve * The list of validation errors * */ protected void analyzeCreatorToolProperty(PDDocumentInformation dico, XMPBasicSchema xmp, List<ValidationError> ve) { String creatorTool = dico.getCreator(); if (creatorTool != null) { if (xmp != null) { if (xmp.getCreatorTool() == null) { ve.add(AbsentXMPPropertyError("CreatorTool", "Property is not defined")); } else { if (!xmp.getCreatorToolValue().equals(creatorTool)) { ve.add(unsynchronizedMetaDataError("CreatorTool")); } } } else { ve.add(AbsentSchemaMetaDataError("CreatorTool", "PDF")); } } } /** * Analyze if the CreationDate embedded in Document Information dictionary and * in XMP properties are synchronized * * @param dico * Document Information Dictionary * @param xmp * XMP Basic Schema * @param ve * The list of validation errors * @throws ValidationException */ protected void analyzeCreationDateProperty(PDDocumentInformation dico, XMPBasicSchema xmp, List<ValidationError> ve) throws ValidationException { Calendar creationDate; try { creationDate = dico.getCreationDate(); } catch (IOException e) { // If there is an error while converting this property to a date throw formatAccessException("Document Information", "CreationDate", e); } if (creationDate != null) { if (xmp != null) { Calendar xmpCreationDate = xmp.getCreateDateValue(); if (xmpCreationDate == null) { ve.add(AbsentXMPPropertyError("CreationDate", "Property is not defined")); } else { if (!DateConverter.toISO8601(xmpCreationDate).equals( DateConverter.toISO8601(creationDate))) { ve.add(unsynchronizedMetaDataError("CreationDate")); } } } else { ve.add(AbsentSchemaMetaDataError("CreationDate", "Basic XMP")); } } } /** * Analyze if the ModifyDate embedded in Document Information dictionary and * in XMP properties are synchronized * * @param dico * Document Information Dictionary * @param xmp * XMP Basic Schema * @param ve * The list of validation errors * @throws ValidationException */ protected void analyzeModifyDateProperty(PDDocumentInformation dico, XMPBasicSchema xmp, List<ValidationError> ve) throws ValidationException { Calendar modifyDate; try { modifyDate = dico.getModificationDate(); if (modifyDate != null) { if (xmp != null) { Calendar xmpModifyDate = xmp.getModifyDateValue(); if (xmpModifyDate == null) { ve.add(AbsentXMPPropertyError("ModifyDate", "Property is not defined")); } else { if (!DateConverter.toISO8601(xmpModifyDate).equals( DateConverter.toISO8601(modifyDate))) { ve.add(unsynchronizedMetaDataError("ModificationDate")); } } } else { ve.add(AbsentSchemaMetaDataError("ModifyDate", "Basic XMP")); } } } catch (IOException e) { // If there is an error while converting this property to a date throw formatAccessException("Document Information", "ModifyDate", e); } } /** * Check if document information entries and XMP information are synchronized * * @param document * the PDF Document * @param metadata * the XMP MetaData * @return List of validation errors * @throws ValidationException */ public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata) throws ValidationException { List<ValidationError> ve = new ArrayList<ValidationError>(); if (document == null) { throw new ValidationException("Document provided is null"); } else { PDDocumentInformation dico = document.getDocumentInformation(); if (metadata == null) { throw new ValidationException("Metadata provided are null"); } else { DublinCoreSchema dc = metadata.getDublinCoreSchema(); // TITLE analyzeTitleProperty(dico, dc, ve); // AUTHOR analyzeAuthorProperty(dico, dc, ve); // SUBJECT analyzeSubjectProperty(dico, dc, ve); AdobePDFSchema pdf = metadata.getAdobePDFSchema(); // KEYWORDS analyzeKeywordsProperty(dico, pdf, ve); // PRODUCER analyzeProducerProperty(dico, pdf, ve); XMPBasicSchema xmp = metadata.getXMPBasicSchema(); // CREATOR TOOL analyzeCreatorToolProperty(dico, xmp, ve); // CREATION DATE analyzeCreationDateProperty(dico, xmp, ve); // MODIFY DATE analyzeModifyDateProperty(dico, xmp, ve); } } return ve; } /** * Return a validationError formatted when a schema has not the expected * prefix * * @param prefFound * @param prefExpected * @param schema * @return */ protected ValidationError UnexpectedPrefixFoundError(String prefFound, String prefExpected, String schema) { StringBuilder sb = new StringBuilder(80); sb.append(schema).append(" found but prefix used is '").append(prefFound) .append("', prefix '").append(prefExpected).append("' is expected."); return new ValidationError( ValidationConstants.ERROR_METADATA_WRONG_NS_PREFIX, sb.toString()); } /** * Return an exception formatted on IOException when accessing metadata * * @param type * type of property (Document Info or XMP) * @param target * the name of the metadata * @param cause * the raised IOException * @return the generated exception */ protected ValidationException formatAccessException(String type, String target, Throwable cause) { StringBuilder sb = new StringBuilder(80); sb.append("Cannot treat ").append(type).append(" ").append(target).append( " property"); return new ValidationException(sb.toString(), cause); } /** * Return an exception formatted on IOException when accessing on metadata * schema * * @param target * the name of the schema * @param cause * the raised IOException * @return the generated exception */ protected ValidationException SchemaAccessException(String target, Throwable cause) { StringBuilder sb = new StringBuilder(80); sb.append("Cannot access to the ").append(target).append(" schema"); return new ValidationException(sb.toString(), cause); } /** * Return a formatted validation error when metadata are not synchronized * * @param target * the concerned property * @return the generated validation error */ protected ValidationError unsynchronizedMetaDataError(String target) { StringBuilder sb = new StringBuilder(80); sb .append(target) .append( " present in the document catalog dictionary doesn't match with XMP information"); return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb .toString()); } /** * Return a formatted validation error when a specific metadata schema can't * be found * * @param target * the concerned property * @param schema * the XMP schema which can't be found * @return the generated validation error */ protected ValidationError AbsentSchemaMetaDataError(String target, String schema) { StringBuilder sb = new StringBuilder(80); sb .append(target) .append( " present in the document catalog dictionary can't be found in XMP information (") .append(schema).append(" schema not declared)"); return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb .toString()); } /** * Return a formatted validation error when a specific XMP property can't be * found * * @param target * the concerned property * @param details * comments about the XMP property * @return the generated validation error */ protected ValidationError AbsentXMPPropertyError(String target, String details) { StringBuilder sb = new StringBuilder(80); sb .append(target) .append( " present in the document catalog dictionary can't be found in XMP information (") .append(details).append(")"); return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb .toString()); } }