package org.rr.jeborker.metadata.pdf; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.logging.Level; import org.apache.commons.io.IOUtils; import org.apache.jempbox.xmp.XMPUtils; import org.rr.commons.log.LoggerFactory; import org.rr.commons.mufs.IResourceHandler; import org.rr.commons.mufs.ResourceHandlerFactory; import org.rr.commons.utils.CommonUtils; import com.itextpdf.text.DocumentException; import com.itextpdf.text.pdf.PRStream; import com.itextpdf.text.pdf.PdfName; import com.itextpdf.text.pdf.PdfObject; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.PdfStamper; import com.itextpdf.text.pdf.PdfStream; public abstract class PDFDocument { public static final int ITEXT = 0; private static final double MIN_IMAGE_COVER_WIDTH = 1.4d; private static final double MAX_IMAGE_COVER_WIDTH = 1.7d; private IResourceHandler pdfFile; /** * Store the PDF key/value info. If it's null, the Info in the * pdf file didn't get touched. */ protected Map<String, String> moreInfo; protected byte[] xmpMetadata; public static PDFDocument getPDFCommonDocumentInstance(int type, IResourceHandler pdfFile) { PDFDocument result = null; switch(type) { case ITEXT: result = new PDFDocument.ItextPDFDocument(); break; } result.setResourceHandler(pdfFile); return result; } /** * Set the pdf file containing the pdf data used for the {@link PDFDocument} instance. * @param pdfFile The pdf file to be used. */ protected void setResourceHandler(IResourceHandler pdfFile) { this.pdfFile = pdfFile; } /** * get the pdf file for this {@link PDFDocument} instance. * @return The desired {@link PDFDocument}. */ public IResourceHandler getResourceHandler() { return this.pdfFile; } /** * Read the xmp metadata as byte array. * @return The desired xmp bytes. */ public abstract byte[] getXMPMetadata() throws IOException; /** * Get a map with values contained in the pdf info block. * @return The desired info values. */ public abstract Map<String, String> getInfo() throws IOException; /** * Set the map values for the pdf. * @param info The metadata key/values to be written to the pdf. */ public void setInfo(Map<String, String> info) { this.moreInfo = info; } public void setXMPMetadata(byte[] xmpMetadata) { this.xmpMetadata = xmpMetadata; } public abstract byte[] fetchCoverFromPDFContent() throws IOException; /** * Write the previously set metadata to the file. * @throws IOException */ public abstract void write() throws IOException; private static class ItextPDFDocument extends PDFDocument { private PdfReader pdfReaderI; @Override public byte[] getXMPMetadata() throws IOException { if(this.xmpMetadata == null) { final PdfReader reader = getReader(); try { final byte[] xmpMetadataBytes = reader.getMetadata(); if(XMPUtils.isValidXMP(xmpMetadataBytes)) { this.xmpMetadata = xmpMetadataBytes; } } finally { dispose(); } } return this.xmpMetadata; } @Override public Map<String, String> getInfo() throws IOException { if(moreInfo == null) { final PdfReader reader = getReader(); try { moreInfo = reader.getInfo(); return moreInfo; } finally { dispose(); } } return moreInfo; } @Override public void write() throws IOException { final IResourceHandler ebookResource = getResourceHandler(); final IResourceHandler tmpEbookResourceLoader = ResourceHandlerFactory.getUniqueResourceHandler(ebookResource, "tmp"); PdfStamper stamper = null; OutputStream ebookResourceOutputStream = null; final PdfReader reader = this.getReader(); try { ebookResourceOutputStream = tmpEbookResourceLoader.getContentOutputStream(false); stamper = new PdfStamper(reader, ebookResourceOutputStream); byte[] xmp = this.xmpMetadata != null ? this.xmpMetadata : getXMPMetadata(); stamper.setXmpMetadata(XMPUtils.handleMissingXMPRootTag(xmp)); Map<String, String> info = this.moreInfo != null ? this.moreInfo : reader.getInfo(); if(this.moreInfo != null) { //to delete old entries, itext need to null them. HashMap<String, String> oldInfo = reader.getInfo(); HashMap<String, String> newInfo = new HashMap<String, String>(oldInfo.size() + this.moreInfo.size()); for (Iterator<String> it = oldInfo.keySet().iterator(); it.hasNext();) { newInfo.put(it.next(), null); } newInfo.putAll(info); stamper.setMoreInfo(newInfo); } } catch(Exception e) { throw new IOException(e); } finally { if (stamper != null) { try { stamper.close(); } catch (DocumentException e) { LoggerFactory.logWarning(this, "Could not close pdf stamper for " + ebookResource, e); } catch (IOException e) { LoggerFactory.logWarning(this, "Could not close pdf stamper for " + ebookResource, e); } } if (ebookResourceOutputStream != null) { try { ebookResourceOutputStream.flush(); } catch (IOException e) { } IOUtils.closeQuietly(ebookResourceOutputStream); } dispose(); //unlock the target pdf before moving it. if(tmpEbookResourceLoader.size() > 0) { //new temp pdf looks good. Move the new temp one over the old one. tmpEbookResourceLoader.moveTo(ebookResource, true); } else { tmpEbookResourceLoader.delete(); } } } public byte[] fetchCoverFromPDFContent() throws IOException { try { PDFRenderer renderer = PDFRenderer.getPDFRendererInstance(getResourceHandler()); return renderer.renderPagetoJpeg(1); } catch (Throwable e) { LoggerFactory.log(Level.WARNING, this, "could not render PDF " + getResourceHandler()); } final PdfReader reader = getReader(); try { int xrefSize = reader.getXrefSize(); for (int i = 0; i < xrefSize; i++) { //process the first ten xrefs. PdfObject pdfobj = reader.getPdfObject(i); if(pdfobj != null) { if (pdfobj.isStream()) { PdfStream stream = (PdfStream) pdfobj; PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE); if (pdfsubtype == null || !pdfsubtype.toString().equals(PdfName.IMAGE.toString())) { continue; } // now you have a PDF stream object with an image byte[] img = PdfReader.getStreamBytesRaw((PRStream) stream); if(img.length > 1000) { int width = 0; int height = 0; try { width = Integer.parseInt(stream.get(PdfName.WIDTH).toString()); height = Integer.parseInt(stream.get(PdfName.HEIGHT).toString()); if(width <= 0 || height <= 0) { continue; } PdfObject bitspercomponent = stream.get(PdfName.BITSPERCOMPONENT); if(bitspercomponent!=null) { Number bitspercomponentNum = CommonUtils.toNumber(bitspercomponent.toString()); if(bitspercomponentNum!=null && bitspercomponentNum.intValue()==1) { //no b/w images continue; } } } catch(Exception e) {} double aspectRatio = ((double)height) / ((double)width); if(width > 150 && aspectRatio > MIN_IMAGE_COVER_WIDTH && aspectRatio < MAX_IMAGE_COVER_WIDTH) { return img; } } } } } } finally { dispose(); } return null; } private PdfReader getReader() throws IOException { if(this.pdfReaderI == null) { this.pdfReaderI = PDFUtils.getReader(getResourceHandler().toFile()); } return this.pdfReaderI; } private void dispose() { if(this.pdfReaderI != null) { this.pdfReaderI.close(); this.pdfReaderI = null; } } } }