package com.tom_roush.pdfbox.pdfparser; import android.util.Log; import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.cos.COSDocument; import com.tom_roush.pdfbox.cos.COSName; import com.tom_roush.pdfbox.cos.COSNull; import com.tom_roush.pdfbox.cos.COSObject; import com.tom_roush.pdfbox.io.IOUtils; import com.tom_roush.pdfbox.io.RandomAccessRead; import com.tom_roush.pdfbox.io.ScratchFile; import com.tom_roush.pdfbox.pdmodel.PDDocument; import com.tom_roush.pdfbox.pdmodel.encryption.AccessPermission; import com.tom_roush.pdfbox.pdmodel.encryption.DecryptionMaterial; import com.tom_roush.pdfbox.pdmodel.encryption.PDEncryption; import com.tom_roush.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; import com.tom_roush.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import java.io.IOException; import java.io.InputStream; import java.security.KeyStore; public class PDFParser extends COSParser { private String password = ""; private InputStream keyStoreInputStream = null; private String keyAlias = null; private AccessPermission accessPermission; /** * Constructor. * * @param source input representing the pdf. * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source) throws IOException { this(source, "", false); } /** * Constructor. * * @param source input representing the pdf. * @param useScratchFiles use a fiel based buffer for temporary storage. * * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source, boolean useScratchFiles) throws IOException { this(source, "", useScratchFiles); } /** * Constructor. * * @param source input representing the pdf. * @param decryptionPassword password to be used for decryption. * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source, String decryptionPassword) throws IOException { this(source, decryptionPassword, false); } /** * Constructor. * * @param source input representing the pdf. * @param decryptionPassword password to be used for decryption. * @param useScratchFiles use a buffer for temporary storage. * * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source, String decryptionPassword, boolean useScratchFiles) throws IOException { this(source, decryptionPassword, null, null, useScratchFiles); } /** * Constructor. * * @param source input representing the pdf. * @param decryptionPassword password to be used for decryption. * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, String alias) throws IOException { this(source, decryptionPassword, keyStore, alias, false); } /** * Constructor. * * @param source input representing the pdf. * @param decryptionPassword password to be used for decryption. * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * @param useScratchFiles use a buffer for temporary storage. * * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, String alias, boolean useScratchFiles) throws IOException { super(source); fileLen = source.length(); password = decryptionPassword; keyStoreInputStream = keyStore; keyAlias = alias; init(useScratchFiles); } /** * Constructor. * * @param source input representing the pdf. * @param decryptionPassword password to be used for decryption. * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * @param scratchFile buffer handler for temporary storage; it will be closed on * {@link COSDocument#close()} * @throws IOException If something went wrong. */ public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, String alias, ScratchFile scratchFile) throws IOException { super(source); fileLen = source.length(); password = decryptionPassword; keyStoreInputStream = keyStore; keyAlias = alias; init(scratchFile); } private void init(ScratchFile scratchFile) throws IOException { String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE); if (eofLookupRangeStr != null) { try { setEOFLookupRange(Integer.parseInt(eofLookupRangeStr)); } catch (NumberFormatException nfe) { Log.w("PdfBox-Android", "System property " + SYSPROP_EOFLOOKUPRANGE + " does not contain an integer value, but: '" + eofLookupRangeStr + "'"); } } document = new COSDocument(scratchFile); } private void init(boolean useScratchFiles) throws IOException { String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE); if (eofLookupRangeStr != null) { try { setEOFLookupRange(Integer.parseInt(eofLookupRangeStr)); } catch (NumberFormatException nfe) { Log.w("PdfBox-Android", "System property " + SYSPROP_EOFLOOKUPRANGE + " does not contain an integer value, but: '" + eofLookupRangeStr + "'"); } } document = new COSDocument(useScratchFiles); } /** * This will get the PD document that was parsed. When you are done with * this document you must call close() on it to release resources. * * @return The document at the PD layer. * * @throws IOException If there is an error getting the document. */ public PDDocument getPDDocument() throws IOException { return new PDDocument(getDocument(), source, accessPermission); } /** * The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset) * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref * at the beginning of the file. Last the root object is parsed. * * @throws IOException If something went wrong. */ protected void initialParse() throws IOException { COSDictionary trailer = null; // parse startxref long startXRefOffset = getStartxrefOffset(); if (startXRefOffset > -1) { trailer = parseXref(startXRefOffset); } else if (isLenient()) { trailer = rebuildTrailer(); } // prepare decryption if necessary prepareDecryption(); parseTrailerValuesDynamically(trailer); COSObject catalogObj = document.getCatalog(); if (catalogObj != null && catalogObj.getObject() instanceof COSDictionary) { parseDictObjects((COSDictionary) catalogObj.getObject(), (COSName[]) null); document.setDecrypted(); } initialParseDone = true; } /** * This will parse the stream and populate the COSDocument object. This will close * the stream when it is done parsing. * * @throws IOException If there is an error reading from the stream or corrupt data * is found. */ public void parse() throws IOException { // set to false if all is processed boolean exceptionOccurred = true; try { // PDFBOX-1922 read the version header and rewind if (!parsePDFHeader() && !parseFDFHeader()) { throw new IOException( "Error: Header doesn't contain versioninfo" ); } if (!initialParseDone) { initialParse(); } exceptionOccurred = false; } finally { IOUtils.closeQuietly(keyStoreInputStream); if (exceptionOccurred && document != null) { IOUtils.closeQuietly(document); document = null; } } } /** * Prepare for decryption. * * @throws IOException if something went wrong */ private void prepareDecryption() throws IOException { COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) { if (trailerEncryptItem instanceof COSObject) { COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; parseDictionaryRecursive(trailerEncryptObj); } try { PDEncryption encryption = new PDEncryption(document.getEncryptionDictionary()); DecryptionMaterial decryptionMaterial; if (keyStoreInputStream != null) { KeyStore ks = KeyStore.getInstance("PKCS12"); ks.load(keyStoreInputStream, password.toCharArray()); decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password); } else { decryptionMaterial = new StandardDecryptionMaterial(password); } securityHandler = encryption.getSecurityHandler(); securityHandler.prepareForDecryption(encryption, document.getDocumentID(), decryptionMaterial); accessPermission = securityHandler.getCurrentAccessPermission(); } catch (IOException e) { throw e; } catch (Exception e) { throw new IOException("Error (" + e.getClass().getSimpleName() + ") while creating security handler for decryption", e); } } } /** * Resolves all not already parsed objects of a dictionary recursively. * * @param dictionaryObject dictionary to be parsed * @throws IOException if something went wrong * */ private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException { parseObjectDynamically(dictionaryObject, true); COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject(); for(COSBase value : dictionary.getValues()) { if (value instanceof COSObject) { COSObject object = (COSObject)value; if (object.getObject() == null) { parseDictionaryRecursive(object); } } } } }