XMPUtil.java example

Explorer
jabref-master
- src
package org.jabref.logic.xmp;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;

import javax.xml.transform.TransformerException;

import org.jabref.logic.TypedBibEntry;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseMode;
import org.jabref.model.entry.Author;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.Month;
import org.jabref.model.strings.StringUtil;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jempbox.impl.DateConverter;
import org.apache.jempbox.impl.XMLUtil;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.jempbox.xmp.XMPSchema;
import org.apache.jempbox.xmp.XMPSchemaDublinCore;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.w3c.dom.Document;

/**
 * XMPUtils provide support for reading and writing BibTex data as XMP-Metadata
 * in PDF-documents.
 */
public class XMPUtil {

    private static final Log LOGGER = LogFactory.getLog(XMPUtil.class);

    private XMPUtil() {
    }

    /**
     * Convenience method for readXMP(File).
     *
     * @param filename The filename from which to open the file.
     * @return BibtexEntryies found in the PDF or an empty list
     */
    public static List<BibEntry> readXMP(String filename, XMPPreferences xmpPreferences) throws IOException {
        return XMPUtil.readXMP(new File(filename), xmpPreferences);
    }

    /**
     * Try to write the given BibTexEntry in the XMP-stream of the given
     * PDF-file.
     *
     * Throws an IOException if the file cannot be read or written, so the user
     * can remove a lock or cancel the operation.
     *
     * The method will overwrite existing BibTeX-XMP-data, but keep other
     * existing metadata.
     *
     * This is a convenience method for writeXMP(File, BibEntry).
     *
     * @param filename The filename from which to open the file.
     * @param entry    The entry to write.
     * @param database maybenull An optional database which the given bibtex entries belong to, which will be used to
     *                 resolve strings. If the database is null the strings will not be resolved.
     * @throws TransformerException If the entry was malformed or unsupported.
     * @throws IOException          If the file could not be written to or could not be found.
     */
    public static void writeXMP(String fileName, BibEntry entry,
                                BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
        XMPUtil.writeXMP(new File(fileName), entry, database, xmpPreferences);
    }

    /**
     * Try to read the BibTexEntries from the XMP-stream of the given PDF-file.
     *
     * @param file The file to read from.
     * @throws IOException Throws an IOException if the file cannot be read, so the user than remove a lock or cancel
     *                     the operation.
     */
    public static List<BibEntry> readXMP(File file, XMPPreferences xmpPreferences) throws IOException {
        List<BibEntry> result = Collections.emptyList();
        try (FileInputStream inputStream = new FileInputStream(file)) {
            result = XMPUtil.readXMP(inputStream, xmpPreferences);
        }
        return result;
    }

    public static PDDocument loadWithAutomaticDecryption(InputStream inputStream) throws IOException {
        PDDocument doc = PDDocument.load(inputStream);

        if (doc.isEncrypted()) {
            // try the empty string as user password
            StandardDecryptionMaterial sdm = new StandardDecryptionMaterial("");
            try {
                doc.openProtection(sdm);
            } catch (BadSecurityHandlerException | CryptographyException e) {
                LOGGER.error("Cannot handle encrypted PDF: " + e.getMessage());
                throw new EncryptedPdfsNotSupportedException();
            }
        }
        return doc;
    }

    /**
     * Try to read the given BibTexEntry from the XMP-stream of the given
     * inputstream containing a PDF-file.
     *
     * @param inputStream The inputstream to read from.
     * @return list of BibEntries retrieved from the stream. May be empty, but never null
     * @throws IOException Throws an IOException if the file cannot be read, so the user than remove a lock or cancel
     *                     the operation.
     */
    public static List<BibEntry> readXMP(InputStream inputStream, XMPPreferences xmpPreferences)
            throws IOException {

        List<BibEntry> result = new LinkedList<>();

        try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
            Optional<XMPMetadata> meta = XMPUtil.getXMPMetadata(document);

            if (meta.isPresent()) {

                List<XMPSchema> schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);

                for (XMPSchema schema : schemas) {
                    XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;

                    BibEntry entry = bib.getBibtexEntry();
                    if (entry.getType() == null) {
                        entry.setType(BibEntry.DEFAULT_TYPE);
                    }
                    result.add(entry);
                }

                // If we did not find anything have a look if a Dublin Core exists
                if (result.isEmpty()) {
                    schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                    for (XMPSchema schema : schemas) {
                        XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;

                        Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDublinCore(dc,
                                xmpPreferences);

                        if (entry.isPresent()) {
                            if (entry.get().getType() == null) {
                                entry.get().setType(BibEntry.DEFAULT_TYPE);
                            }
                            result.add(entry.get());
                        }
                    }
                }
            }
            if (result.isEmpty()) {
                // If we did not find any XMP metadata, search for non XMP metadata
                PDDocumentInformation documentInformation = document.getDocumentInformation();
                Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDocumentInformation(documentInformation);
                entry.ifPresent(result::add);
            }
        }

        // return empty list, if no metadata was found
        if (result.isEmpty()) {
            return Collections.emptyList();
        }
        return result;
    }

    public static Collection<BibEntry> readXMP(Path filePath, XMPPreferences xmpPreferences) throws IOException {
        return readXMP(filePath.toFile(), xmpPreferences);
    }

    /**
     * Helper function for retrieving a BibEntry from the
     * PDDocumentInformation in a PDF file.
     *
     * To understand how to get hold of a PDDocumentInformation have a look in
     * the test cases for XMPUtil.
     *
     * The BibEntry is build by mapping individual fields in the document
     * information (like author, title, keywords) to fields in a bibtex entry.
     *
     * @param di The document information from which to build a BibEntry.
     * @return The bibtex entry found in the document information.
     */
    public static Optional<BibEntry> getBibtexEntryFromDocumentInformation(
            PDDocumentInformation di) {

        BibEntry entry = new BibEntry();
        entry.setType(BibEntry.DEFAULT_TYPE);

        String s = di.getAuthor();
        if (s != null) {
            entry.setField(FieldName.AUTHOR, s);
        }

        s = di.getTitle();
        if (s != null) {
            entry.setField(FieldName.TITLE, s);
        }

        s = di.getKeywords();
        if (s != null) {
            entry.setField(FieldName.KEYWORDS, s);
        }

        s = di.getSubject();
        if (s != null) {
            entry.setField(FieldName.ABSTRACT, s);
        }

        COSDictionary dict = di.getDictionary();
        for (Map.Entry<COSName, COSBase> o : dict.entrySet()) {
            String key = o.getKey().getName();
            if (key.startsWith("bibtex/")) {
                String value = dict.getString(key);
                key = key.substring("bibtex/".length());
                if (BibEntry.TYPE_HEADER.equals(key)) {
                    entry.setType(value);
                } else {
                    entry.setField(key, value);
                }
            }
        }

        // Return empty Optional if no values were found
        return entry.getFieldNames().isEmpty() ? Optional.empty() : Optional.of(entry);
    }

    /**
     * Helper function for retrieving a BibEntry from the DublinCore metadata
     * in a PDF file.
     *
     * To understand how to get hold of a XMPSchemaDublinCore have a look in the
     * test cases for XMPUtil.
     *
     * The BibEntry is build by mapping individual fields in the dublin core
     * (like creator, title, subject) to fields in a bibtex entry.
     *
     * @param dcSchema The document information from which to build a BibEntry.
     * @return The bibtex entry found in the document information.
     */
    public static Optional<BibEntry> getBibtexEntryFromDublinCore(XMPSchemaDublinCore dcSchema,
                                                                  XMPPreferences xmpPreferences) {

        BibEntry entry = new BibEntry();

        /*
         * Contributor -> Editor
         */
        List<String> contributors = dcSchema.getContributors();
        if ((contributors != null) && !contributors.isEmpty()) {
            entry.setField(FieldName.EDITOR, String.join(" and ", contributors));
        }

        /*
         * Author -> Creator
         */
        List<String> creators = dcSchema.getCreators();
        if ((creators != null) && !creators.isEmpty()) {
            entry.setField(FieldName.AUTHOR, String.join(" and ", creators));
        }

        /*
         * Year + Month -> Date
         */
        List<String> dates = dcSchema.getSequenceList("dc:date");
        if ((dates != null) && !dates.isEmpty()) {
            String date = dates.get(0).trim();
            Calendar c = null;
            try {
                c = DateConverter.toCalendar(date);
            } catch (IOException ignored) {
                // Ignored
            }
            if (c != null) {
                entry.setField(FieldName.YEAR, String.valueOf(c.get(Calendar.YEAR)));
                if (date.length() > 4) {
                    Optional<Month> month = Month.getMonthByNumber(c.get(Calendar.MONTH) + 1);
                    month.ifPresent(entry::setMonth);
                }
            }
        }

        /*
         * Abstract -> Description
         */
        String s = dcSchema.getDescription();
        if (s != null) {
            entry.setField(FieldName.ABSTRACT, s);
        }

        /*
         * Identifier -> DOI
         */
        s = dcSchema.getIdentifier();
        if (s != null) {
            entry.setField(FieldName.DOI, s);
        }

        /*
         * Publisher -> Publisher
         */
        List<String> publishers = dcSchema.getPublishers();
        if ((publishers != null) && !publishers.isEmpty()) {
            entry.setField(FieldName.PUBLISHER, String.join(" and ", publishers));
        }

        /*
         * Relation -> bibtexkey
         *
         * We abuse the relationship attribute to store all other values in the
         * bibtex document
         */
        List<String> relationships = dcSchema.getRelationships();
        if (relationships != null) {
            for (String r : relationships) {
                if (r.startsWith("bibtex/")) {
                    r = r.substring("bibtex/".length());
                    int i = r.indexOf('/');
                    if (i != -1) {
                        entry.setField(r.substring(0, i), r.substring(i + 1));
                    }
                }
            }
        }

        /*
         * Rights -> Rights
         */
        s = dcSchema.getRights();
        if (s != null) {
            entry.setField("rights", s);
        }

        /*
         * Source -> Source
         */
        s = dcSchema.getSource();
        if (s != null) {
            entry.setField("source", s);
        }

        /*
         * Subject -> Keywords
         */
        List<String> subjects = dcSchema.getSubjects();
        if (subjects != null) {
            entry.addKeywords(subjects, xmpPreferences.getKeywordSeparator());
        }

        /*
         * Title -> Title
         */
        s = dcSchema.getTitle();
        if (s != null) {
            entry.setField(FieldName.TITLE, s);
        }

        /*
         * Type -> Type
         */
        List<String> l = dcSchema.getTypes();
        if ((l != null) && !l.isEmpty()) {
            s = l.get(0);
            if (s != null) {
                entry.setType(s);
            }
        }

        return entry.getFieldNames().isEmpty() ? Optional.empty() : Optional.of(entry);
    }

    /**
     * Try to write the given BibTexEntry in the XMP-stream of the given
     * PDF-file.
     *
     * Throws an IOException if the file cannot be read or written, so the user
     * can remove a lock or cancel the operation.
     *
     * The method will overwrite existing BibTeX-XMP-data, but keep other
     * existing metadata.
     *
     * This is a convenience method for writeXMP(File, Collection).
     *
     * @param file     The file to write to.
     * @param entry    The entry to write.
     * @param database maybenull An optional database which the given bibtex entries belong to, which will be used to
     *                 resolve strings. If the database is null the strings will not be resolved.
     * @throws TransformerException If the entry was malformed or unsupported.
     * @throws IOException          If the file could not be written to or could not be found.
     */
    public static void writeXMP(File file, BibEntry entry,
                                BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
        List<BibEntry> l = new LinkedList<>();
        l.add(entry);
        XMPUtil.writeXMP(file, l, database, true, xmpPreferences);
    }

    /**
     * Write the given BibtexEntries as XMP-metadata text to the given stream.
     *
     * The text that is written to the stream contains a complete XMP-document.
     *
     * @param bibtexEntries The BibtexEntries to write XMP-metadata for.
     * @param database      maybenull An optional database which the given bibtex entries belong to, which will be used
     *                      to resolve strings. If the database is null the strings will not be resolved.
     * @throws TransformerException Thrown if the bibtexEntries could not transformed to XMP.
     * @throws IOException          Thrown if an IOException occured while writing to the stream.
     * @see #toXMP(java.util.Collection, BibDatabase) if you don't need strings to be resolved.
     */
    private static void toXMP(Collection<BibEntry> bibtexEntries,
                              BibDatabase database, OutputStream outputStream, XMPPreferences xmpPreferences)
            throws IOException, TransformerException {

        Collection<BibEntry> resolvedEntries;
        if (database == null) {
            resolvedEntries = bibtexEntries;
        } else {
            resolvedEntries = database.resolveForStrings(bibtexEntries, true);
        }

        XMPMetadata x = new XMPMetadata();

        for (BibEntry e : resolvedEntries) {
            XMPSchemaBibtex schema = new XMPSchemaBibtex(x);
            x.addSchema(schema);
            schema.setBibtexEntry(e, xmpPreferences);
        }

        x.save(outputStream);
    }

    /**
     * Convenience method for toXMP(Collection<BibEntry>, BibDatabase,
     * OutputStream) returning a String containing the XMP-metadata of the given
     * collection of BibtexEntries.
     *
     * The resulting metadata string is wrapped as a complete XMP-document.
     *
     * @param bibtexEntries The BibtexEntries to return XMP-metadata for.
     * @param database      An optional database which the given bibtex entries belong to, which will be used
     *                      to resolve strings. If the database is null the strings will not be resolved.
     * @return The XMP representation of the given bibtexEntries.
     * @throws TransformerException Thrown if the bibtexEntries could not transformed to XMP.
     */
    public static String toXMP(Collection<BibEntry> bibtexEntries,
                               BibDatabase database, XMPPreferences xmpPreferences) throws TransformerException {
        try {
            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            XMPUtil.toXMP(bibtexEntries, database, bs, xmpPreferences);
            return bs.toString();
        } catch (IOException e) {
            throw new TransformerException(e);
        }
    }

    /**
     * Will read the XMPMetadata from the given pdf file, closing the file
     * afterwards.
     *
     * @param inputStream The inputStream representing a PDF-file to read the XMPMetadata from.
     * @return The XMPMetadata object found in the file
     */
    private static Optional<XMPMetadata> readRawXMP(InputStream inputStream) throws IOException {
        try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
            return XMPUtil.getXMPMetadata(document);
        }
    }

    /**
     * @return empty Optional if no metadata has been found
     */
    private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        if (metaRaw == null) {
            return Optional.empty();
        }

        Document parseResult;
        try (InputStream is = metaRaw.createInputStream()) {
            parseResult = XMLUtil.parse(is);
        }
        XMPMetadata meta = new XMPMetadata(parseResult);
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
        return Optional.of(meta);
    }

    /**
     * Will read the XMPMetadata from the given pdf file, closing the file
     * afterwards.
     *
     * @param file The file to read the XMPMetadata from.
     * @return The XMPMetadata object found in the file
     */
    public static Optional<XMPMetadata> readRawXMP(File file) throws IOException {
        try (FileInputStream inputStream = new FileInputStream(file)) {
            return XMPUtil.readRawXMP(inputStream);
        }
    }

    private static void writeToDCSchema(XMPSchemaDublinCore dcSchema, BibEntry entry, BibDatabase database,
                                        XMPPreferences xmpPreferences) {

        BibEntry resolvedEntry;
        if (database == null) {
            resolvedEntry = entry;
        } else {
            resolvedEntry = database.resolveForStrings(entry, false);
        }

        // Query privacy filter settings
        boolean useXmpPrivacyFilter = xmpPreferences.isUseXMPPrivacyFilter();
        // Fields for which not to write XMP data later on:
        Set<String> filters = new TreeSet<>(xmpPreferences.getXmpPrivacyFilter());

        // Set all the values including key and entryType

        for (Entry<String, String> field : resolvedEntry.getFieldMap().entrySet()) {

            if (useXmpPrivacyFilter && filters.contains(field.getKey())) {
                continue;
            }

            if (FieldName.EDITOR.equals(field.getKey())) {
                String authors = field.getValue();

                /*
                 * Editor -> Contributor
                 *
                 * Field: dc:contributor
                 *
                 * Type: bag ProperName
                 *
                 * Category: External
                 *
                 * Description: Contributors to the resource (other than the
                 * authors).
                 *
                 * Bibtex-Fields used: editor
                 */

                AuthorList list = AuthorList.parse(authors);

                for (Author author : list.getAuthors()) {
                    dcSchema.addContributor(author.getFirstLast(false));
                }
                continue;
            }

            /*
             * ? -> Coverage
             *
             * Unmapped
             *
             * dc:coverage Text External The extent or scope of the resource.
             *
             * Author -> Creator
             *
             * Field: dc:creator
             *
             * Type: seq ProperName
             *
             * Category: External
             *
             * Description: The authors of the resource (listed in order of
             * precedence, if significant).
             *
             * Bibtex-Fields used: author
             */
            if (FieldName.AUTHOR.equals(field.getKey())) {
                String authors = field.getValue();
                AuthorList list = AuthorList.parse(authors);

                for (Author author : list.getAuthors()) {
                    dcSchema.addCreator(author.getFirstLast(false));
                }
                continue;
            }

            if (FieldName.MONTH.equals(field.getKey())) {
                // Dealt with in year
                continue;
            }

            if (FieldName.YEAR.equals(field.getKey())) {

                /*
                 * Year + Month -> Date
                 *
                 * Field: dc:date
                 *
                 * Type: seq Date
                 *
                 * Category: External
                 *
                 * Description: Date(s) that something interesting happened to
                 * the resource.
                 *
                 * Bibtex-Fields used: year, month
                 */
                entry.getPublicationDate()
                        .ifPresent(publicationDate -> dcSchema.addSequenceValue("dc:date", publicationDate));
                continue;
            }

            /*
             * Abstract -> Description
             *
             * Field: dc:description
             *
             * Type: Lang Alt
             *
             * Category: External
             *
             * Description: A textual description of the content of the
             * resource. Multiple values may be present for different languages.
             *
             * Bibtex-Fields used: abstract
             */
            if (FieldName.ABSTRACT.equals(field.getKey())) {
                dcSchema.setDescription(field.getValue());
                continue;
            }

            /*
             * DOI -> identifier
             *
             * Field: dc:identifier
             *
             * Type: Text
             *
             * Category: External
             *
             * Description: Unique identifier of the resource.
             *
             * Bibtex-Fields used: doi
             */
            if (FieldName.DOI.equals(field.getKey())) {
                dcSchema.setIdentifier(field.getValue());
                continue;
            }

            /*
             * ? -> Language
             *
             * Unmapped
             *
             * dc:language bag Locale Internal An unordered array specifying the
             * languages used in the resource.
             */

            /*
             * Publisher -> Publisher
             *
             * Field: dc:publisher
             *
             * Type: bag ProperName
             *
             * Category: External
             *
             * Description: Publishers.
             *
             * Bibtex-Fields used: doi
             */
            if (FieldName.PUBLISHER.equals(field.getKey())) {
                dcSchema.addPublisher(field.getValue());
                continue;
            }

            /*
             * ? -> Rights
             *
             * Unmapped
             *
             * dc:rights Lang Alt External Informal rights statement, selected
             * by language.
             */

            /*
             * ? -> Source
             *
             * Unmapped
             *
             * dc:source Text External Unique identifier of the work from which
             * this resource was derived.
             */

            /*
             * Keywords -> Subject
             *
             * Field: dc:subject
             *
             * Type: bag Text
             *
             * Category: External
             *
             * Description: An unordered array of descriptive phrases or
             * keywords that specify the topic of the content of the resource.
             *
             * Bibtex-Fields used: doi
             */
            if (FieldName.KEYWORDS.equals(field.getKey())) {
                String o = field.getValue();
                String[] keywords = o.split(",");
                for (String keyword : keywords) {
                    dcSchema.addSubject(keyword.trim());
                }
                continue;
            }

            /*
             * Title -> Title
             *
             * Field: dc:title
             *
             * Type: Lang Alt
             *
             * Category: External
             *
             * Description: The title of the document, or the name given to the
             * resource. Typically, it will be a name by which the resource is
             * formally known.
             *
             * Bibtex-Fields used: title
             */
            if (FieldName.TITLE.equals(field.getKey())) {
                dcSchema.setTitle(field.getValue());
                continue;
            }

            /*
             * All others (including the bibtex key) get packaged in the
             * relation attribute
             */
            String o = field.getValue();
            dcSchema.addRelation("bibtex/" + field.getKey() + '/' + o);
        }

        /*
         * ? -> Format
         *
         * Unmapped
         *
         * dc:format MIMEType Internal The file format used when saving the
         * resource. Tools and applications should set this property to the save
         * format of the data. It may include appropriate qualifiers.
         */
        dcSchema.setFormat("application/pdf");

        /*
         * entrytype -> Type
         *
         * Field: dc:type
         *
         * Type: bag open Choice
         *
         * Category: External
         *
         * Description: A document type; for example, novel, poem, or working
         * paper.
         *
         * Bibtex-Fields used: entrytype
         */
        TypedBibEntry typedEntry = new TypedBibEntry(entry, BibDatabaseMode.BIBTEX);
        String o = typedEntry.getTypeForDisplay();
        if (!o.isEmpty()) {
            dcSchema.addType(o);
        }
    }

    /**
     * Try to write the given BibTexEntry as a DublinCore XMP Schema
     *
     * Existing DublinCore schemas in the document are not modified.
     *
     * @param document The pdf document to write to.
     * @param entry    The BibTeX entry that is written as a schema.
     * @param database maybenull An optional database which the given BibTeX entries belong to, which will be used to
     *                 resolve strings. If the database is null the strings will not be resolved.
     */
    public static void writeDublinCore(PDDocument document, BibEntry entry,
                                       BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {

        List<BibEntry> entries = new ArrayList<>();
        entries.add(entry);

        XMPUtil.writeDublinCore(document, entries, database, xmpPreferences);
    }

    /**
     * Try to write the given BibTexEntries as DublinCore XMP Schemas
     *
     * Existing DublinCore schemas in the document are removed
     *
     * @param document The pdf document to write to.
     * @param entries  The BibTeX entries that are written as schemas
     * @param database maybenull An optional database which the given BibTeX entries belong to, which will be used to
     *                 resolve strings. If the database is null the strings will not be resolved.
     */
    private static void writeDublinCore(PDDocument document,
                                        Collection<BibEntry> entries, BibDatabase database, XMPPreferences xmpPreferences)
            throws IOException, TransformerException {

        Collection<BibEntry> resolvedEntries;
        if (database == null) {
            resolvedEntries = entries;
        } else {
            resolvedEntries = database.resolveForStrings(entries, false);
        }

        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        XMPMetadata meta;
        if (metaRaw == null) {
            meta = new XMPMetadata();
        } else {
            meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        }

        // Remove all current Dublin-Core schemas
        List<XMPSchema> schemas = meta
                .getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
        for (XMPSchema schema : schemas) {
            schema.getElement().getParentNode().removeChild(schema.getElement());
        }

        for (BibEntry entry : resolvedEntries) {
            XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta);
            XMPUtil.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
            meta.addSchema(dcSchema);
        }

        // Save to stream and then input that stream to the PDF
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        meta.save(os);
        ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
        PDMetadata metadataStream = new PDMetadata(document, is, false);
        catalog.setMetadata(metadataStream);
    }

    /**
     * Try to write the given BibTexEntry in the Document Information (the
     * properties of the pdf).
     *
     * Existing fields values are overriden if the bibtex entry has the
     * corresponding value set.
     *
     * @param document The pdf document to write to.
     * @param entry    The Bibtex entry that is written into the PDF properties. *
     * @param database maybenull An optional database which the given bibtex entries belong to, which will be used to
     *                 resolve strings. If the database is null the strings will not be resolved.
     */
    private static void writeDocumentInformation(PDDocument document,
                                                 BibEntry entry, BibDatabase database, XMPPreferences xmpPreferences) {

        PDDocumentInformation di = document.getDocumentInformation();

        BibEntry resolvedEntry;
        if (database == null) {
            resolvedEntry = entry;
        } else {
            resolvedEntry = database.resolveForStrings(entry, false);
        }

        // Query privacy filter settings
        boolean useXmpPrivacyFilter = xmpPreferences.isUseXMPPrivacyFilter();
        // Fields for which not to write XMP data later on:
        Set<String> filters = new TreeSet<>(xmpPreferences.getXmpPrivacyFilter());

        // Set all the values including key and entryType
        for (Entry<String, String> field : resolvedEntry.getFieldMap().entrySet()) {

            String fieldName = field.getKey();
            String fieldContent = field.getValue();

            if (useXmpPrivacyFilter && filters.contains(fieldName)) {
                // erase field instead of adding it
                if (FieldName.AUTHOR.equals(fieldName)) {
                    di.setAuthor(null);
                } else if (FieldName.TITLE.equals(fieldName)) {
                    di.setTitle(null);
                } else if (FieldName.KEYWORDS.equals(fieldName)) {
                    di.setKeywords(null);
                } else if (FieldName.ABSTRACT.equals(fieldName)) {
                    di.setSubject(null);
                } else {
                    di.setCustomMetadataValue("bibtex/" + fieldName, null);
                }
                continue;
            }

            if (FieldName.AUTHOR.equals(fieldName)) {
                di.setAuthor(fieldContent);
            } else if (FieldName.TITLE.equals(fieldName)) {
                di.setTitle(fieldContent);
            } else if (FieldName.KEYWORDS.equals(fieldName)) {
                di.setKeywords(fieldContent);
            } else if (FieldName.ABSTRACT.equals(fieldName)) {
                di.setSubject(fieldContent);
            } else {
                di.setCustomMetadataValue("bibtex/" + fieldName, fieldContent);
            }
        }
        di.setCustomMetadataValue("bibtex/entrytype", StringUtil.capitalizeFirst(resolvedEntry.getType()));
    }

    /**
     * Try to write the given BibTexEntry in the XMP-stream of the given
     * PDF-file.
     *
     * Throws an IOException if the file cannot be read or written, so the user
     * can remove a lock or cancel the operation.
     *
     * The method will overwrite existing BibTeX-XMP-data, but keep other
     * existing metadata.
     *
     * @param file          The file to write the entries to.
     * @param bibtexEntries The entries to write to the file. *
     * @param database      maybenull An optional database which the given bibtex entries belong to, which will be used
     *                      to resolve strings. If the database is null the strings will not be resolved.
     * @param writePDFInfo  Write information also in PDF document properties
     * @throws TransformerException If the entry was malformed or unsupported.
     * @throws IOException          If the file could not be written to or could not be found.
     */
    public static void writeXMP(File file,
                                Collection<BibEntry> bibtexEntries, BibDatabase database,
                                boolean writePDFInfo, XMPPreferences xmpPreferences) throws IOException, TransformerException {

        Collection<BibEntry> resolvedEntries;
        if (database == null) {
            resolvedEntries = bibtexEntries;
        } else {
            resolvedEntries = database.resolveForStrings(bibtexEntries, false);
        }

        try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) {
            if (document.isEncrypted()) {
                throw new EncryptedPdfsNotSupportedException();
            }

            if (writePDFInfo && (resolvedEntries.size() == 1)) {
                XMPUtil.writeDocumentInformation(document, resolvedEntries
                        .iterator().next(), null, xmpPreferences);
                XMPUtil.writeDublinCore(document, resolvedEntries, null, xmpPreferences);
            }

            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDMetadata metaRaw = catalog.getMetadata();

            XMPMetadata meta;
            if (metaRaw == null) {
                meta = new XMPMetadata();
            } else {
                meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
            }
            meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
                    XMPSchemaBibtex.class);

            // Remove all current Bibtex-schemas
            List<XMPSchema> schemas = meta
                    .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
            for (XMPSchema schema : schemas) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
                bib.getElement().getParentNode().removeChild(bib.getElement());
            }

            for (BibEntry e : resolvedEntries) {
                XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
                meta.addSchema(bibtex);
                bibtex.setBibtexEntry(e, xmpPreferences);
            }

            // Save to stream and then input that stream to the PDF
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            meta.save(os);
            ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
            PDMetadata metadataStream = new PDMetadata(document, is, false);
            catalog.setMetadata(metadataStream);

            // Save
            try {
                document.save(file.getAbsolutePath());
            } catch (COSVisitorException e) {
                LOGGER.debug("Could not write XMP metadata", e);
                throw new TransformerException("Could not write XMP metadata: " + e.getLocalizedMessage(), e);
            }
        }
    }

    /**
     * see XMPUtil.hasMetadata(InputStream)
     */
    public static boolean hasMetadata(Path path, XMPPreferences xmpPreferences) {
        try (InputStream inputStream = Files.newInputStream(path, StandardOpenOption.READ)) {
            return hasMetadata(inputStream, xmpPreferences);
        } catch (IOException e) {
            LOGGER.error("XMP reading failed", e);
            return false;
        }
    }

    /**
     * Will try to read XMP metadata from the given file, returning whether
     * metadata was found.
     *
     * Caution: This method is as expensive as it is reading the actual metadata
     * itself from the PDF.
     *
     * @param inputStream The inputStream to read the PDF from.
     * @return whether a BibEntry was found in the given PDF.
     */
    public static boolean hasMetadata(InputStream inputStream, XMPPreferences xmpPreferences) {
        try {
            List<BibEntry> bibEntries = XMPUtil.readXMP(inputStream, xmpPreferences);
            return !bibEntries.isEmpty();
        } catch (EncryptedPdfsNotSupportedException ex) {
            LOGGER.info("Encryption not supported by XMPUtil");
            return false;
        } catch (IOException e) {
            LOGGER.error("XMP reading failed", e);
            return false;
        }
    }
}