package org.jabref.logic.xmp;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.transform.TransformerException;
import org.jabref.logic.TypedBibEntry;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseMode;
import org.jabref.model.entry.Author;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.Month;
import org.jabref.model.strings.StringUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jempbox.impl.DateConverter;
import org.apache.jempbox.impl.XMLUtil;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.jempbox.xmp.XMPSchema;
import org.apache.jempbox.xmp.XMPSchemaDublinCore;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.w3c.dom.Document;
/**
* XMPUtils provide support for reading and writing BibTex data as XMP-Metadata
* in PDF-documents.
*/
public class XMPUtil {
private static final Log LOGGER = LogFactory.getLog(XMPUtil.class);
private XMPUtil() {
}
/**
* Convenience method for readXMP(File).
*
* @param filename The filename from which to open the file.
* @return BibtexEntryies found in the PDF or an empty list
*/
public static List<BibEntry> readXMP(String filename, XMPPreferences xmpPreferences) throws IOException {
return XMPUtil.readXMP(new File(filename), xmpPreferences);
}
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* This is a convenience method for writeXMP(File, BibEntry).
*
* @param filename The filename from which to open the file.
* @param entry The entry to write.
* @param database maybenull An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
* @throws TransformerException If the entry was malformed or unsupported.
* @throws IOException If the file could not be written to or could not be found.
*/
public static void writeXMP(String fileName, BibEntry entry,
BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
XMPUtil.writeXMP(new File(fileName), entry, database, xmpPreferences);
}
/**
* Try to read the BibTexEntries from the XMP-stream of the given PDF-file.
*
* @param file The file to read from.
* @throws IOException Throws an IOException if the file cannot be read, so the user than remove a lock or cancel
* the operation.
*/
public static List<BibEntry> readXMP(File file, XMPPreferences xmpPreferences) throws IOException {
List<BibEntry> result = Collections.emptyList();
try (FileInputStream inputStream = new FileInputStream(file)) {
result = XMPUtil.readXMP(inputStream, xmpPreferences);
}
return result;
}
public static PDDocument loadWithAutomaticDecryption(InputStream inputStream) throws IOException {
PDDocument doc = PDDocument.load(inputStream);
if (doc.isEncrypted()) {
// try the empty string as user password
StandardDecryptionMaterial sdm = new StandardDecryptionMaterial("");
try {
doc.openProtection(sdm);
} catch (BadSecurityHandlerException | CryptographyException e) {
LOGGER.error("Cannot handle encrypted PDF: " + e.getMessage());
throw new EncryptedPdfsNotSupportedException();
}
}
return doc;
}
/**
* Try to read the given BibTexEntry from the XMP-stream of the given
* inputstream containing a PDF-file.
*
* @param inputStream The inputstream to read from.
* @return list of BibEntries retrieved from the stream. May be empty, but never null
* @throws IOException Throws an IOException if the file cannot be read, so the user than remove a lock or cancel
* the operation.
*/
public static List<BibEntry> readXMP(InputStream inputStream, XMPPreferences xmpPreferences)
throws IOException {
List<BibEntry> result = new LinkedList<>();
try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
Optional<XMPMetadata> meta = XMPUtil.getXMPMetadata(document);
if (meta.isPresent()) {
List<XMPSchema> schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
BibEntry entry = bib.getBibtexEntry();
if (entry.getType() == null) {
entry.setType(BibEntry.DEFAULT_TYPE);
}
result.add(entry);
}
// If we did not find anything have a look if a Dublin Core exists
if (result.isEmpty()) {
schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;
Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDublinCore(dc,
xmpPreferences);
if (entry.isPresent()) {
if (entry.get().getType() == null) {
entry.get().setType(BibEntry.DEFAULT_TYPE);
}
result.add(entry.get());
}
}
}
}
if (result.isEmpty()) {
// If we did not find any XMP metadata, search for non XMP metadata
PDDocumentInformation documentInformation = document.getDocumentInformation();
Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDocumentInformation(documentInformation);
entry.ifPresent(result::add);
}
}
// return empty list, if no metadata was found
if (result.isEmpty()) {
return Collections.emptyList();
}
return result;
}
public static Collection<BibEntry> readXMP(Path filePath, XMPPreferences xmpPreferences) throws IOException {
return readXMP(filePath.toFile(), xmpPreferences);
}
/**
* Helper function for retrieving a BibEntry from the
* PDDocumentInformation in a PDF file.
*
* To understand how to get hold of a PDDocumentInformation have a look in
* the test cases for XMPUtil.
*
* The BibEntry is build by mapping individual fields in the document
* information (like author, title, keywords) to fields in a bibtex entry.
*
* @param di The document information from which to build a BibEntry.
* @return The bibtex entry found in the document information.
*/
public static Optional<BibEntry> getBibtexEntryFromDocumentInformation(
PDDocumentInformation di) {
BibEntry entry = new BibEntry();
entry.setType(BibEntry.DEFAULT_TYPE);
String s = di.getAuthor();
if (s != null) {
entry.setField(FieldName.AUTHOR, s);
}
s = di.getTitle();
if (s != null) {
entry.setField(FieldName.TITLE, s);
}
s = di.getKeywords();
if (s != null) {
entry.setField(FieldName.KEYWORDS, s);
}
s = di.getSubject();
if (s != null) {
entry.setField(FieldName.ABSTRACT, s);
}
COSDictionary dict = di.getDictionary();
for (Map.Entry<COSName, COSBase> o : dict.entrySet()) {
String key = o.getKey().getName();
if (key.startsWith("bibtex/")) {
String value = dict.getString(key);
key = key.substring("bibtex/".length());
if (BibEntry.TYPE_HEADER.equals(key)) {
entry.setType(value);
} else {
entry.setField(key, value);
}
}
}
// Return empty Optional if no values were found
return entry.getFieldNames().isEmpty() ? Optional.empty() : Optional.of(entry);
}
/**
* Helper function for retrieving a BibEntry from the DublinCore metadata
* in a PDF file.
*
* To understand how to get hold of a XMPSchemaDublinCore have a look in the
* test cases for XMPUtil.
*
* The BibEntry is build by mapping individual fields in the dublin core
* (like creator, title, subject) to fields in a bibtex entry.
*
* @param dcSchema The document information from which to build a BibEntry.
* @return The bibtex entry found in the document information.
*/
public static Optional<BibEntry> getBibtexEntryFromDublinCore(XMPSchemaDublinCore dcSchema,
XMPPreferences xmpPreferences) {
BibEntry entry = new BibEntry();
/*
* Contributor -> Editor
*/
List<String> contributors = dcSchema.getContributors();
if ((contributors != null) && !contributors.isEmpty()) {
entry.setField(FieldName.EDITOR, String.join(" and ", contributors));
}
/*
* Author -> Creator
*/
List<String> creators = dcSchema.getCreators();
if ((creators != null) && !creators.isEmpty()) {
entry.setField(FieldName.AUTHOR, String.join(" and ", creators));
}
/*
* Year + Month -> Date
*/
List<String> dates = dcSchema.getSequenceList("dc:date");
if ((dates != null) && !dates.isEmpty()) {
String date = dates.get(0).trim();
Calendar c = null;
try {
c = DateConverter.toCalendar(date);
} catch (IOException ignored) {
// Ignored
}
if (c != null) {
entry.setField(FieldName.YEAR, String.valueOf(c.get(Calendar.YEAR)));
if (date.length() > 4) {
Optional<Month> month = Month.getMonthByNumber(c.get(Calendar.MONTH) + 1);
month.ifPresent(entry::setMonth);
}
}
}
/*
* Abstract -> Description
*/
String s = dcSchema.getDescription();
if (s != null) {
entry.setField(FieldName.ABSTRACT, s);
}
/*
* Identifier -> DOI
*/
s = dcSchema.getIdentifier();
if (s != null) {
entry.setField(FieldName.DOI, s);
}
/*
* Publisher -> Publisher
*/
List<String> publishers = dcSchema.getPublishers();
if ((publishers != null) && !publishers.isEmpty()) {
entry.setField(FieldName.PUBLISHER, String.join(" and ", publishers));
}
/*
* Relation -> bibtexkey
*
* We abuse the relationship attribute to store all other values in the
* bibtex document
*/
List<String> relationships = dcSchema.getRelationships();
if (relationships != null) {
for (String r : relationships) {
if (r.startsWith("bibtex/")) {
r = r.substring("bibtex/".length());
int i = r.indexOf('/');
if (i != -1) {
entry.setField(r.substring(0, i), r.substring(i + 1));
}
}
}
}
/*
* Rights -> Rights
*/
s = dcSchema.getRights();
if (s != null) {
entry.setField("rights", s);
}
/*
* Source -> Source
*/
s = dcSchema.getSource();
if (s != null) {
entry.setField("source", s);
}
/*
* Subject -> Keywords
*/
List<String> subjects = dcSchema.getSubjects();
if (subjects != null) {
entry.addKeywords(subjects, xmpPreferences.getKeywordSeparator());
}
/*
* Title -> Title
*/
s = dcSchema.getTitle();
if (s != null) {
entry.setField(FieldName.TITLE, s);
}
/*
* Type -> Type
*/
List<String> l = dcSchema.getTypes();
if ((l != null) && !l.isEmpty()) {
s = l.get(0);
if (s != null) {
entry.setType(s);
}
}
return entry.getFieldNames().isEmpty() ? Optional.empty() : Optional.of(entry);
}
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* This is a convenience method for writeXMP(File, Collection).
*
* @param file The file to write to.
* @param entry The entry to write.
* @param database maybenull An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
* @throws TransformerException If the entry was malformed or unsupported.
* @throws IOException If the file could not be written to or could not be found.
*/
public static void writeXMP(File file, BibEntry entry,
BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
List<BibEntry> l = new LinkedList<>();
l.add(entry);
XMPUtil.writeXMP(file, l, database, true, xmpPreferences);
}
/**
* Write the given BibtexEntries as XMP-metadata text to the given stream.
*
* The text that is written to the stream contains a complete XMP-document.
*
* @param bibtexEntries The BibtexEntries to write XMP-metadata for.
* @param database maybenull An optional database which the given bibtex entries belong to, which will be used
* to resolve strings. If the database is null the strings will not be resolved.
* @throws TransformerException Thrown if the bibtexEntries could not transformed to XMP.
* @throws IOException Thrown if an IOException occured while writing to the stream.
* @see #toXMP(java.util.Collection, BibDatabase) if you don't need strings to be resolved.
*/
private static void toXMP(Collection<BibEntry> bibtexEntries,
BibDatabase database, OutputStream outputStream, XMPPreferences xmpPreferences)
throws IOException, TransformerException {
Collection<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = bibtexEntries;
} else {
resolvedEntries = database.resolveForStrings(bibtexEntries, true);
}
XMPMetadata x = new XMPMetadata();
for (BibEntry e : resolvedEntries) {
XMPSchemaBibtex schema = new XMPSchemaBibtex(x);
x.addSchema(schema);
schema.setBibtexEntry(e, xmpPreferences);
}
x.save(outputStream);
}
/**
* Convenience method for toXMP(Collection<BibEntry>, BibDatabase,
* OutputStream) returning a String containing the XMP-metadata of the given
* collection of BibtexEntries.
*
* The resulting metadata string is wrapped as a complete XMP-document.
*
* @param bibtexEntries The BibtexEntries to return XMP-metadata for.
* @param database An optional database which the given bibtex entries belong to, which will be used
* to resolve strings. If the database is null the strings will not be resolved.
* @return The XMP representation of the given bibtexEntries.
* @throws TransformerException Thrown if the bibtexEntries could not transformed to XMP.
*/
public static String toXMP(Collection<BibEntry> bibtexEntries,
BibDatabase database, XMPPreferences xmpPreferences) throws TransformerException {
try {
ByteArrayOutputStream bs = new ByteArrayOutputStream();
XMPUtil.toXMP(bibtexEntries, database, bs, xmpPreferences);
return bs.toString();
} catch (IOException e) {
throw new TransformerException(e);
}
}
/**
* Will read the XMPMetadata from the given pdf file, closing the file
* afterwards.
*
* @param inputStream The inputStream representing a PDF-file to read the XMPMetadata from.
* @return The XMPMetadata object found in the file
*/
private static Optional<XMPMetadata> readRawXMP(InputStream inputStream) throws IOException {
try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
return XMPUtil.getXMPMetadata(document);
}
}
/**
* @return empty Optional if no metadata has been found
*/
private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
if (metaRaw == null) {
return Optional.empty();
}
Document parseResult;
try (InputStream is = metaRaw.createInputStream()) {
parseResult = XMLUtil.parse(is);
}
XMPMetadata meta = new XMPMetadata(parseResult);
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
return Optional.of(meta);
}
/**
* Will read the XMPMetadata from the given pdf file, closing the file
* afterwards.
*
* @param file The file to read the XMPMetadata from.
* @return The XMPMetadata object found in the file
*/
public static Optional<XMPMetadata> readRawXMP(File file) throws IOException {
try (FileInputStream inputStream = new FileInputStream(file)) {
return XMPUtil.readRawXMP(inputStream);
}
}
private static void writeToDCSchema(XMPSchemaDublinCore dcSchema, BibEntry entry, BibDatabase database,
XMPPreferences xmpPreferences) {
BibEntry resolvedEntry;
if (database == null) {
resolvedEntry = entry;
} else {
resolvedEntry = database.resolveForStrings(entry, false);
}
// Query privacy filter settings
boolean useXmpPrivacyFilter = xmpPreferences.isUseXMPPrivacyFilter();
// Fields for which not to write XMP data later on:
Set<String> filters = new TreeSet<>(xmpPreferences.getXmpPrivacyFilter());
// Set all the values including key and entryType
for (Entry<String, String> field : resolvedEntry.getFieldMap().entrySet()) {
if (useXmpPrivacyFilter && filters.contains(field.getKey())) {
continue;
}
if (FieldName.EDITOR.equals(field.getKey())) {
String authors = field.getValue();
/*
* Editor -> Contributor
*
* Field: dc:contributor
*
* Type: bag ProperName
*
* Category: External
*
* Description: Contributors to the resource (other than the
* authors).
*
* Bibtex-Fields used: editor
*/
AuthorList list = AuthorList.parse(authors);
for (Author author : list.getAuthors()) {
dcSchema.addContributor(author.getFirstLast(false));
}
continue;
}
/*
* ? -> Coverage
*
* Unmapped
*
* dc:coverage Text External The extent or scope of the resource.
*
* Author -> Creator
*
* Field: dc:creator
*
* Type: seq ProperName
*
* Category: External
*
* Description: The authors of the resource (listed in order of
* precedence, if significant).
*
* Bibtex-Fields used: author
*/
if (FieldName.AUTHOR.equals(field.getKey())) {
String authors = field.getValue();
AuthorList list = AuthorList.parse(authors);
for (Author author : list.getAuthors()) {
dcSchema.addCreator(author.getFirstLast(false));
}
continue;
}
if (FieldName.MONTH.equals(field.getKey())) {
// Dealt with in year
continue;
}
if (FieldName.YEAR.equals(field.getKey())) {
/*
* Year + Month -> Date
*
* Field: dc:date
*
* Type: seq Date
*
* Category: External
*
* Description: Date(s) that something interesting happened to
* the resource.
*
* Bibtex-Fields used: year, month
*/
entry.getPublicationDate()
.ifPresent(publicationDate -> dcSchema.addSequenceValue("dc:date", publicationDate));
continue;
}
/*
* Abstract -> Description
*
* Field: dc:description
*
* Type: Lang Alt
*
* Category: External
*
* Description: A textual description of the content of the
* resource. Multiple values may be present for different languages.
*
* Bibtex-Fields used: abstract
*/
if (FieldName.ABSTRACT.equals(field.getKey())) {
dcSchema.setDescription(field.getValue());
continue;
}
/*
* DOI -> identifier
*
* Field: dc:identifier
*
* Type: Text
*
* Category: External
*
* Description: Unique identifier of the resource.
*
* Bibtex-Fields used: doi
*/
if (FieldName.DOI.equals(field.getKey())) {
dcSchema.setIdentifier(field.getValue());
continue;
}
/*
* ? -> Language
*
* Unmapped
*
* dc:language bag Locale Internal An unordered array specifying the
* languages used in the resource.
*/
/*
* Publisher -> Publisher
*
* Field: dc:publisher
*
* Type: bag ProperName
*
* Category: External
*
* Description: Publishers.
*
* Bibtex-Fields used: doi
*/
if (FieldName.PUBLISHER.equals(field.getKey())) {
dcSchema.addPublisher(field.getValue());
continue;
}
/*
* ? -> Rights
*
* Unmapped
*
* dc:rights Lang Alt External Informal rights statement, selected
* by language.
*/
/*
* ? -> Source
*
* Unmapped
*
* dc:source Text External Unique identifier of the work from which
* this resource was derived.
*/
/*
* Keywords -> Subject
*
* Field: dc:subject
*
* Type: bag Text
*
* Category: External
*
* Description: An unordered array of descriptive phrases or
* keywords that specify the topic of the content of the resource.
*
* Bibtex-Fields used: doi
*/
if (FieldName.KEYWORDS.equals(field.getKey())) {
String o = field.getValue();
String[] keywords = o.split(",");
for (String keyword : keywords) {
dcSchema.addSubject(keyword.trim());
}
continue;
}
/*
* Title -> Title
*
* Field: dc:title
*
* Type: Lang Alt
*
* Category: External
*
* Description: The title of the document, or the name given to the
* resource. Typically, it will be a name by which the resource is
* formally known.
*
* Bibtex-Fields used: title
*/
if (FieldName.TITLE.equals(field.getKey())) {
dcSchema.setTitle(field.getValue());
continue;
}
/*
* All others (including the bibtex key) get packaged in the
* relation attribute
*/
String o = field.getValue();
dcSchema.addRelation("bibtex/" + field.getKey() + '/' + o);
}
/*
* ? -> Format
*
* Unmapped
*
* dc:format MIMEType Internal The file format used when saving the
* resource. Tools and applications should set this property to the save
* format of the data. It may include appropriate qualifiers.
*/
dcSchema.setFormat("application/pdf");
/*
* entrytype -> Type
*
* Field: dc:type
*
* Type: bag open Choice
*
* Category: External
*
* Description: A document type; for example, novel, poem, or working
* paper.
*
* Bibtex-Fields used: entrytype
*/
TypedBibEntry typedEntry = new TypedBibEntry(entry, BibDatabaseMode.BIBTEX);
String o = typedEntry.getTypeForDisplay();
if (!o.isEmpty()) {
dcSchema.addType(o);
}
}
/**
* Try to write the given BibTexEntry as a DublinCore XMP Schema
*
* Existing DublinCore schemas in the document are not modified.
*
* @param document The pdf document to write to.
* @param entry The BibTeX entry that is written as a schema.
* @param database maybenull An optional database which the given BibTeX entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
*/
public static void writeDublinCore(PDDocument document, BibEntry entry,
BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
List<BibEntry> entries = new ArrayList<>();
entries.add(entry);
XMPUtil.writeDublinCore(document, entries, database, xmpPreferences);
}
/**
* Try to write the given BibTexEntries as DublinCore XMP Schemas
*
* Existing DublinCore schemas in the document are removed
*
* @param document The pdf document to write to.
* @param entries The BibTeX entries that are written as schemas
* @param database maybenull An optional database which the given BibTeX entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
*/
private static void writeDublinCore(PDDocument document,
Collection<BibEntry> entries, BibDatabase database, XMPPreferences xmpPreferences)
throws IOException, TransformerException {
Collection<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = entries;
} else {
resolvedEntries = database.resolveForStrings(entries, false);
}
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
XMPMetadata meta;
if (metaRaw == null) {
meta = new XMPMetadata();
} else {
meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
}
// Remove all current Dublin-Core schemas
List<XMPSchema> schemas = meta
.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
for (XMPSchema schema : schemas) {
schema.getElement().getParentNode().removeChild(schema.getElement());
}
for (BibEntry entry : resolvedEntries) {
XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta);
XMPUtil.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
meta.addSchema(dcSchema);
}
// Save to stream and then input that stream to the PDF
ByteArrayOutputStream os = new ByteArrayOutputStream();
meta.save(os);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
PDMetadata metadataStream = new PDMetadata(document, is, false);
catalog.setMetadata(metadataStream);
}
/**
* Try to write the given BibTexEntry in the Document Information (the
* properties of the pdf).
*
* Existing fields values are overriden if the bibtex entry has the
* corresponding value set.
*
* @param document The pdf document to write to.
* @param entry The Bibtex entry that is written into the PDF properties. *
* @param database maybenull An optional database which the given bibtex entries belong to, which will be used to
* resolve strings. If the database is null the strings will not be resolved.
*/
private static void writeDocumentInformation(PDDocument document,
BibEntry entry, BibDatabase database, XMPPreferences xmpPreferences) {
PDDocumentInformation di = document.getDocumentInformation();
BibEntry resolvedEntry;
if (database == null) {
resolvedEntry = entry;
} else {
resolvedEntry = database.resolveForStrings(entry, false);
}
// Query privacy filter settings
boolean useXmpPrivacyFilter = xmpPreferences.isUseXMPPrivacyFilter();
// Fields for which not to write XMP data later on:
Set<String> filters = new TreeSet<>(xmpPreferences.getXmpPrivacyFilter());
// Set all the values including key and entryType
for (Entry<String, String> field : resolvedEntry.getFieldMap().entrySet()) {
String fieldName = field.getKey();
String fieldContent = field.getValue();
if (useXmpPrivacyFilter && filters.contains(fieldName)) {
// erase field instead of adding it
if (FieldName.AUTHOR.equals(fieldName)) {
di.setAuthor(null);
} else if (FieldName.TITLE.equals(fieldName)) {
di.setTitle(null);
} else if (FieldName.KEYWORDS.equals(fieldName)) {
di.setKeywords(null);
} else if (FieldName.ABSTRACT.equals(fieldName)) {
di.setSubject(null);
} else {
di.setCustomMetadataValue("bibtex/" + fieldName, null);
}
continue;
}
if (FieldName.AUTHOR.equals(fieldName)) {
di.setAuthor(fieldContent);
} else if (FieldName.TITLE.equals(fieldName)) {
di.setTitle(fieldContent);
} else if (FieldName.KEYWORDS.equals(fieldName)) {
di.setKeywords(fieldContent);
} else if (FieldName.ABSTRACT.equals(fieldName)) {
di.setSubject(fieldContent);
} else {
di.setCustomMetadataValue("bibtex/" + fieldName, fieldContent);
}
}
di.setCustomMetadataValue("bibtex/entrytype", StringUtil.capitalizeFirst(resolvedEntry.getType()));
}
/**
* Try to write the given BibTexEntry in the XMP-stream of the given
* PDF-file.
*
* Throws an IOException if the file cannot be read or written, so the user
* can remove a lock or cancel the operation.
*
* The method will overwrite existing BibTeX-XMP-data, but keep other
* existing metadata.
*
* @param file The file to write the entries to.
* @param bibtexEntries The entries to write to the file. *
* @param database maybenull An optional database which the given bibtex entries belong to, which will be used
* to resolve strings. If the database is null the strings will not be resolved.
* @param writePDFInfo Write information also in PDF document properties
* @throws TransformerException If the entry was malformed or unsupported.
* @throws IOException If the file could not be written to or could not be found.
*/
public static void writeXMP(File file,
Collection<BibEntry> bibtexEntries, BibDatabase database,
boolean writePDFInfo, XMPPreferences xmpPreferences) throws IOException, TransformerException {
Collection<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = bibtexEntries;
} else {
resolvedEntries = database.resolveForStrings(bibtexEntries, false);
}
try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) {
if (document.isEncrypted()) {
throw new EncryptedPdfsNotSupportedException();
}
if (writePDFInfo && (resolvedEntries.size() == 1)) {
XMPUtil.writeDocumentInformation(document, resolvedEntries
.iterator().next(), null, xmpPreferences);
XMPUtil.writeDublinCore(document, resolvedEntries, null, xmpPreferences);
}
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();
XMPMetadata meta;
if (metaRaw == null) {
meta = new XMPMetadata();
} else {
meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
}
meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
XMPSchemaBibtex.class);
// Remove all current Bibtex-schemas
List<XMPSchema> schemas = meta
.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
for (XMPSchema schema : schemas) {
XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
bib.getElement().getParentNode().removeChild(bib.getElement());
}
for (BibEntry e : resolvedEntries) {
XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
meta.addSchema(bibtex);
bibtex.setBibtexEntry(e, xmpPreferences);
}
// Save to stream and then input that stream to the PDF
ByteArrayOutputStream os = new ByteArrayOutputStream();
meta.save(os);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
PDMetadata metadataStream = new PDMetadata(document, is, false);
catalog.setMetadata(metadataStream);
// Save
try {
document.save(file.getAbsolutePath());
} catch (COSVisitorException e) {
LOGGER.debug("Could not write XMP metadata", e);
throw new TransformerException("Could not write XMP metadata: " + e.getLocalizedMessage(), e);
}
}
}
/**
* see XMPUtil.hasMetadata(InputStream)
*/
public static boolean hasMetadata(Path path, XMPPreferences xmpPreferences) {
try (InputStream inputStream = Files.newInputStream(path, StandardOpenOption.READ)) {
return hasMetadata(inputStream, xmpPreferences);
} catch (IOException e) {
LOGGER.error("XMP reading failed", e);
return false;
}
}
/**
* Will try to read XMP metadata from the given file, returning whether
* metadata was found.
*
* Caution: This method is as expensive as it is reading the actual metadata
* itself from the PDF.
*
* @param inputStream The inputStream to read the PDF from.
* @return whether a BibEntry was found in the given PDF.
*/
public static boolean hasMetadata(InputStream inputStream, XMPPreferences xmpPreferences) {
try {
List<BibEntry> bibEntries = XMPUtil.readXMP(inputStream, xmpPreferences);
return !bibEntries.isEmpty();
} catch (EncryptedPdfsNotSupportedException ex) {
LOGGER.info("Encryption not supported by XMPUtil");
return false;
} catch (IOException e) {
LOGGER.error("XMP reading failed", e);
return false;
}
}
}