package org.rr.jeborker.metadata;
import static org.rr.commons.utils.StringUtil.EMPTY;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.io.Charsets;
import org.apache.jempbox.xmp.Thumbnail;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.jempbox.xmp.XMPSchema;
import org.apache.jempbox.xmp.XMPSchemaBasic;
import org.apache.jempbox.xmp.XMPUtils;
import org.rr.commons.log.LoggerFactory;
import org.rr.commons.mufs.IResourceHandler;
import org.rr.commons.utils.Base64;
import org.rr.commons.utils.CommonUtils;
import org.rr.commons.utils.DateConversionUtils;
import org.rr.commons.utils.HTMLEntityConverter;
import org.rr.commons.utils.ListUtils;
import org.rr.commons.utils.StringUtil;
import org.rr.jeborker.db.item.EbookPropertyItem;
import org.rr.jeborker.metadata.pdf.PDFDocument;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
class PDFCommonMetadataReader extends APDFCommonMetadataHandler implements IMetadataReader {
private IResourceHandler ebookResource;
private PDFDocument pdfDoc;
PDFCommonMetadataReader(final IResourceHandler ebookResource) {
this.ebookResource = ebookResource;
this.pdfDoc = PDFDocument.getPDFCommonDocumentInstance(PDFDocument.ITEXT, ebookResource);
}
@Override
public List<IResourceHandler> getEbookResource() {
return Collections.singletonList(this.ebookResource);
}
@Override
public List<MetadataProperty> readMetadata() {
try {
final ArrayList<MetadataProperty> result = new ArrayList<>();
final byte[] xmpMetadataBytes = getXmpMetadata();
if(xmpMetadataBytes != null) {
final Document document = getDocument(xmpMetadataBytes, ebookResource);
final XMPMetadata metadata = document != null ? new XMPMetadata(document) : new XMPMetadata();
List<XMPSchema> schemas = metadata.getSchemas();
for (XMPSchema xmpSchema : schemas) {
this.addSchemaProperties(result, xmpSchema);
}
}
final Map<String, String> pdfInfo = getInfo();
if(pdfInfo != null) {
for (Entry<String, String> entry : pdfInfo.entrySet()) {
final String key = entry.getKey();
final String value = entry.getValue();
try {
if(value != null && !value.trim().isEmpty()) { //no sense having empty entries.
if(key.endsWith("Date") || key.endsWith("SourceModified")) {
final Date dateValue = DateConversionUtils.toDate(value);
if(dateValue != null) {
result.add(new MetadataProperty(key, dateValue, Date.class));
} else {
result.add(new MetadataProperty(key, value));
}
} else {
result.add(new MetadataProperty(key, value));
}
}
} catch(Exception e) {
LoggerFactory.logWarning(this, "could not handle property " + key + " and value " + value, e);
}
}
} else {
LoggerFactory.logWarning(this, "Could not get metadata from " + ebookResource, new RuntimeException("dumpstack"));
}
try {
byte[] fetchThumbnail = fetchXMPThumbnail(ebookResource);
if(fetchThumbnail == null) {
fetchThumbnail = pdfDoc.fetchCoverFromPDFContent();
}
if(fetchThumbnail != null) {
result.add(new MetadataProperty(IMetadataReader.COMMON_METADATA_TYPES.COVER.getName(), fetchThumbnail));
}
} catch (Exception e) {
LoggerFactory.logWarning(getClass(), "Could not read cover for pdf " + ebookResource, e);
}
return result;
} catch (Throwable e) {
LoggerFactory.logWarning(getClass(), "Could not read metadata for pdf " + ebookResource, e);
}
return new ArrayList<MetadataProperty>(0);
}
private byte[] getXmpMetadata() {
try {
return pdfDoc.getXMPMetadata();
} catch (IOException e) {
LoggerFactory.logWarning(getClass(), "Could not read xmp metadata for pdf " + ebookResource, e);
}
return null;
}
private Map<String, String> getInfo() {
try {
return pdfDoc.getInfo();
} catch (IOException e) {
LoggerFactory.logWarning(getClass(), "Could not read info metadata for pdf " + ebookResource, e);
}
return null;
}
private void addSchemaProperties(final ArrayList<MetadataProperty> result, final XMPSchema schema) throws IOException {
if(schema == null) {
return;
}
final Element schemaElement = schema.getElement();
final List<Element> schemaChildren = getChildren(schemaElement);
for (Element schemaChild : schemaChildren) {
final String tagName = schemaChild.getTagName();
final List<Element> rdfChildren = getChildren(schemaChild); //Need to handle <rdf:Alt> or <rdf:Seq>
if(!rdfChildren.isEmpty()) {
if(rdfChildren.size()==1) {
final Element rdfChild = rdfChildren.get(0);
final String rdfChildName = rdfChild.getTagName();
final PDFMetadataProperty pdfMetadataProperty = new PDFMetadataProperty(tagName, null, rdfChildName);
final List<Element> valueChilds = getChildren(rdfChild);
for (Element valueChild : valueChilds) {
//<rdf:li> childs
final PDFMetadataProperty valueChildProperty = new PDFMetadataProperty(valueChild.getTagName(), valueChild.getTextContent(), null);
final NamedNodeMap attributes = valueChild.getAttributes();
for (int i = 0; i < attributes.getLength(); i++) {
final Node item = attributes.item(i);
valueChildProperty.addAttribute(item.getNodeName(), item.getNodeValue());
}
pdfMetadataProperty.addChild(valueChildProperty);
}
}
} else {
Object value = schemaChild.getTextContent();
if(tagName.endsWith("Date") || tagName.endsWith("SourceModified")) {
final String stringValue = StringUtil.toString(value);
if(stringValue.trim().isEmpty()) {
continue; //no sense to add an empty Date.
} else {
try {
Date dateValue = DateConversionUtils.toDate(stringValue);
if(value != null && value.toString().isEmpty()) {
//2004-12-11T00:00:+0Z
value = dateValue;
}
} catch(java.lang.NumberFormatException e) {
e.printStackTrace();
}
}
}
if(!StringUtil.toString(value).trim().isEmpty()) {
final PDFMetadataProperty pdfMetadataProperty = new PDFMetadataProperty(tagName, value, null);
result.add(pdfMetadataProperty);
}
}
}
}
@Override
public void fillEbookPropertyItem(List<MetadataProperty> metadataProperties, EbookPropertyItem item) {
item.clearMetadata();
List<MetadataProperty> authorMetadataProperty = new ArrayList<>(2);
List<MetadataProperty> creatorMetadataProperty = new ArrayList<>(2);
for (MetadataProperty metadataProperty : metadataProperties) {
final String name = metadataProperty.getName().toLowerCase();
if(name.equals("title")) {
item.setTitle(metadataProperty.getValueAsString());
} else if(name.equals("author")) {
authorMetadataProperty.add(metadataProperty);
} else if(authorMetadataProperty.isEmpty() && name.equals("creator")) {
creatorMetadataProperty.add(metadataProperty);
} else if(name.equals("keywords")) {
List<String> keywords = ListUtils.split(metadataProperty.getValueAsString(), ",");
item.setKeywords(keywords);
} else if(name.equals("description")) {
item.setDescription(metadataProperty.getValueAsString());
} else if(name.equals("creationdate")) {
item.setCreationDate(DateConversionUtils.toDate(metadataProperty.getValueAsString()));
} else if(name.equals("subject")) {
item.setGenre(metadataProperty.getValueAsString());
} else if(name.equals("agesuggestion")) {
item.setAgeSuggestion(metadataProperty.getValueAsString());
} else if(name.equals("rating")) {
Number number = CommonUtils.toNumber(metadataProperty.getValueAsString());
item.setRating(number != null ? number.intValue() : null);
} else if(name.equals("seriesindex")) {
item.setSeriesIndex(metadataProperty.getValueAsString());
} else if(name.equals("seriesname")) {
item.setSeriesName(metadataProperty.getValueAsString());
} else if(name.equals(IMetadataReader.COMMON_METADATA_TYPES.COVER.getName())) {
IMetadataReader.COMMON_METADATA_TYPES.COVER.fillItem(metadataProperty, item);
}
}
if(!authorMetadataProperty.isEmpty()) {
for(MetadataProperty property : authorMetadataProperty) {
COMMON_METADATA_TYPES.AUTHOR.fillItem(property, item);
}
} else {
for(MetadataProperty property : creatorMetadataProperty) {
COMMON_METADATA_TYPES.AUTHOR.fillItem(property, item);
}
}
}
/**
* Fetches the thumbnail from the xmp metadata.
* @param pdfReader The reader instance to be used to read the XMP data
* @return The thumbnail or <code>null</code> if not thumbnail is embedded.
* @throws Exception
*/
byte[] fetchXMPThumbnail(final IResourceHandler ebookResource) throws Exception {
if(ebookResource == null) {
return null;
}
final byte[] xmpMetadataBytes = pdfDoc.getXMPMetadata();
byte[] result = null;
if(XMPUtils.isValidXMP(xmpMetadataBytes)) {
final Document document = getDocument(xmpMetadataBytes, ebookResource);
final XMPMetadata xmp = new XMPMetadata(document);
final XMPSchemaBasic xmpBasicSchema = xmp.getBasicSchema(); //same as getXMPSchema("xap", xmp);
if(xmpBasicSchema != null) {
// Thumbnails could have xap: or xmp: namespace in the BasicSchema.
Thumbnail thumbnail = xmpBasicSchema.getThumbnail(null, "xap");
if(thumbnail == null) {
thumbnail = xmpBasicSchema.getThumbnail(null, "xmp");
}
if (thumbnail != null) {
String image = thumbnail.getImage();
if(image != null) {
byte[] decodeBase64 = Base64.decode(image);
if(decodeBase64 != null && decodeBase64.length > 5) {
result = decodeBase64;
}
}
}
}
}
return result;
}
@Override
public String getPlainMetadata() {
try {
final byte[] xmpMetadataBytes = pdfDoc.getXMPMetadata();
if(xmpMetadataBytes != null && xmpMetadataBytes.length > 0) {
String xml = new String(xmpMetadataBytes, Charsets.UTF_8);
xml = new HTMLEntityConverter(xml, -1).decodeEntities();
return xml;
} else {
LoggerFactory.logInfo(this, "Could not get plain metadata for " + ebookResource, null);
}
} catch (Exception e) {
LoggerFactory.logWarning(this, "Could not get plain metadata for " + ebookResource, e);
}
return null;
}
@Override
public List<MetadataProperty> getSupportedMetadata() {
final ArrayList<MetadataProperty> result = new ArrayList<>();
result.add(new MetadataProperty("Author", EMPTY));
result.add(new MetadataProperty("Title", EMPTY));
result.add(new MetadataProperty("Creator", EMPTY));
result.add(new MetadataProperty("Subject", EMPTY));
result.add(new MetadataProperty("Producer", EMPTY));
result.add(new MetadataProperty("AgeSuggestion", EMPTY));
result.add(new MetadataProperty("Rating", EMPTY));
result.add(new MetadataProperty("SeriesIndex", EMPTY));
result.add(new MetadataProperty("SeriesName", EMPTY));
result.add(new MetadataProperty("Description", EMPTY));
result.add(new MetadataProperty("ModDate", EMPTY, Date.class));
result.add(new MetadataProperty("CreationDate", EMPTY, Date.class));
result.add(new MetadataProperty("SourceModified", EMPTY, Date.class));
return result;
}
@Override
public String getPlainMetadataMime() {
return "text/xml";
}
private List<MetadataProperty> getAuthorMetadata(boolean create, List<MetadataProperty> props) {
final ArrayList<MetadataProperty> result = new ArrayList<>(2);
final List<MetadataProperty> metadataProperties;
if(props != null) {
metadataProperties = props;
} else {
metadataProperties = readMetadata();
}
MetadataProperty authorProperty = null;
for (MetadataProperty property : metadataProperties) {
if(property.getName().equalsIgnoreCase("Author")) {
result.add(property);
authorProperty = property;
}
}
//if the list is empty and a new property should be created, add a new, empty author property to the result.
if(create && result.isEmpty()) {
authorProperty = new MetadataProperty("Author", EMPTY);
result.add(authorProperty);
}
return Collections.unmodifiableList(result);
}
@Override
public List<MetadataProperty> getMetadataByType(boolean create, List<MetadataProperty> props, COMMON_METADATA_TYPES type) {
final String search;
final String name;
switch(type) {
case GENRE:
search = "subject";
name = "Subject";
break;
case TITLE:
search = "title";
name = "Title";
break;
case SERIES_NAME:
search = "seriesname";
name = "seriesname";
break;
case RATING:
search = "rating";
name = "Rating";
break;
case AUTHOR:
return this.getAuthorMetadata(create, props);
case AGE_SUGGESTION:
search = "agesuggestion";
name = "AgeSuggestion";
break;
case DESCRIPTION:
search = "description";
name = "Description";
break;
case ISBN:
search = "isbn";
name = "Isbn";
break;
case LANGUAGE:
search = "language";
name = "Language";
break;
case COVER:
search = "cover";
name = "Cover";
break;
default:
return null;
}
final ArrayList<MetadataProperty> result = new ArrayList<>(2);
final List<MetadataProperty> metadataProperties;
if(props != null) {
metadataProperties = props;
} else {
metadataProperties = readMetadata();
}
for (MetadataProperty property : metadataProperties) {
if(property.getName().equalsIgnoreCase(search)) {
result.add(property);
}
}
//if the list is empty and a new property should be created, add a new, empty author property to the result.
if(create && result.isEmpty()) {
result.add(new MetadataProperty(name, EMPTY));
}
return Collections.unmodifiableList(result);
}
}