package com.tom_roush.pdfbox.pdfparser;
import android.util.Log;
import com.tom_roush.pdfbox.cos.COSBase;
import com.tom_roush.pdfbox.cos.COSDictionary;
import com.tom_roush.pdfbox.cos.COSDocument;
import com.tom_roush.pdfbox.cos.COSName;
import com.tom_roush.pdfbox.cos.COSNull;
import com.tom_roush.pdfbox.cos.COSObject;
import com.tom_roush.pdfbox.io.IOUtils;
import com.tom_roush.pdfbox.io.RandomAccessRead;
import com.tom_roush.pdfbox.io.ScratchFile;
import com.tom_roush.pdfbox.pdmodel.PDDocument;
import com.tom_roush.pdfbox.pdmodel.encryption.AccessPermission;
import com.tom_roush.pdfbox.pdmodel.encryption.DecryptionMaterial;
import com.tom_roush.pdfbox.pdmodel.encryption.PDEncryption;
import com.tom_roush.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
import com.tom_roush.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import java.io.IOException;
import java.io.InputStream;
import java.security.KeyStore;
public class PDFParser extends COSParser
{
private String password = "";
private InputStream keyStoreInputStream = null;
private String keyAlias = null;
private AccessPermission accessPermission;
/**
* Constructor.
*
* @param source input representing the pdf.
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source) throws IOException
{
this(source, "", false);
}
/**
* Constructor.
*
* @param source input representing the pdf.
* @param useScratchFiles use a fiel based buffer for temporary storage.
*
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source, boolean useScratchFiles) throws IOException
{
this(source, "", useScratchFiles);
}
/**
* Constructor.
*
* @param source input representing the pdf.
* @param decryptionPassword password to be used for decryption.
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source, String decryptionPassword) throws IOException
{
this(source, decryptionPassword, false);
}
/**
* Constructor.
*
* @param source input representing the pdf.
* @param decryptionPassword password to be used for decryption.
* @param useScratchFiles use a buffer for temporary storage.
*
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source, String decryptionPassword, boolean useScratchFiles)
throws IOException
{
this(source, decryptionPassword, null, null, useScratchFiles);
}
/**
* Constructor.
*
* @param source input representing the pdf.
* @param decryptionPassword password to be used for decryption.
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
*
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore,
String alias) throws IOException
{
this(source, decryptionPassword, keyStore, alias, false);
}
/**
* Constructor.
*
* @param source input representing the pdf.
* @param decryptionPassword password to be used for decryption.
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
* @param useScratchFiles use a buffer for temporary storage.
*
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore,
String alias, boolean useScratchFiles) throws IOException
{
super(source);
fileLen = source.length();
password = decryptionPassword;
keyStoreInputStream = keyStore;
keyAlias = alias;
init(useScratchFiles);
}
/**
* Constructor.
*
* @param source input representing the pdf.
* @param decryptionPassword password to be used for decryption.
* @param keyStore key store to be used for decryption when using public key security
* @param alias alias to be used for decryption when using public key security
* @param scratchFile buffer handler for temporary storage; it will be closed on
* {@link COSDocument#close()}
* @throws IOException If something went wrong.
*/
public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore,
String alias, ScratchFile scratchFile) throws IOException
{
super(source);
fileLen = source.length();
password = decryptionPassword;
keyStoreInputStream = keyStore;
keyAlias = alias;
init(scratchFile);
}
private void init(ScratchFile scratchFile) throws IOException
{
String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
if (eofLookupRangeStr != null)
{
try
{
setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
}
catch (NumberFormatException nfe)
{
Log.w("PdfBox-Android", "System property " + SYSPROP_EOFLOOKUPRANGE
+ " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
}
}
document = new COSDocument(scratchFile);
}
private void init(boolean useScratchFiles) throws IOException
{
String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
if (eofLookupRangeStr != null)
{
try
{
setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
}
catch (NumberFormatException nfe)
{
Log.w("PdfBox-Android", "System property " + SYSPROP_EOFLOOKUPRANGE
+ " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
}
}
document = new COSDocument(useScratchFiles);
}
/**
* This will get the PD document that was parsed. When you are done with
* this document you must call close() on it to release resources.
*
* @return The document at the PD layer.
*
* @throws IOException If there is an error getting the document.
*/
public PDDocument getPDDocument() throws IOException
{
return new PDDocument(getDocument(), source, accessPermission);
}
/**
* The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset)
* to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref
* at the beginning of the file. Last the root object is parsed.
*
* @throws IOException If something went wrong.
*/
protected void initialParse() throws IOException
{
COSDictionary trailer = null;
// parse startxref
long startXRefOffset = getStartxrefOffset();
if (startXRefOffset > -1)
{
trailer = parseXref(startXRefOffset);
}
else if (isLenient())
{
trailer = rebuildTrailer();
}
// prepare decryption if necessary
prepareDecryption();
parseTrailerValuesDynamically(trailer);
COSObject catalogObj = document.getCatalog();
if (catalogObj != null && catalogObj.getObject() instanceof COSDictionary)
{
parseDictObjects((COSDictionary) catalogObj.getObject(), (COSName[]) null);
document.setDecrypted();
}
initialParseDone = true;
}
/**
* This will parse the stream and populate the COSDocument object. This will close
* the stream when it is done parsing.
*
* @throws IOException If there is an error reading from the stream or corrupt data
* is found.
*/
public void parse() throws IOException
{
// set to false if all is processed
boolean exceptionOccurred = true;
try
{
// PDFBOX-1922 read the version header and rewind
if (!parsePDFHeader() && !parseFDFHeader())
{
throw new IOException( "Error: Header doesn't contain versioninfo" );
}
if (!initialParseDone)
{
initialParse();
}
exceptionOccurred = false;
}
finally
{
IOUtils.closeQuietly(keyStoreInputStream);
if (exceptionOccurred && document != null)
{
IOUtils.closeQuietly(document);
document = null;
}
}
}
/**
* Prepare for decryption.
*
* @throws IOException if something went wrong
*/
private void prepareDecryption() throws IOException
{
COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT);
if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull))
{
if (trailerEncryptItem instanceof COSObject)
{
COSObject trailerEncryptObj = (COSObject) trailerEncryptItem;
parseDictionaryRecursive(trailerEncryptObj);
}
try
{
PDEncryption encryption = new PDEncryption(document.getEncryptionDictionary());
DecryptionMaterial decryptionMaterial;
if (keyStoreInputStream != null)
{
KeyStore ks = KeyStore.getInstance("PKCS12");
ks.load(keyStoreInputStream, password.toCharArray());
decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password);
}
else
{
decryptionMaterial = new StandardDecryptionMaterial(password);
}
securityHandler = encryption.getSecurityHandler();
securityHandler.prepareForDecryption(encryption, document.getDocumentID(),
decryptionMaterial);
accessPermission = securityHandler.getCurrentAccessPermission();
}
catch (IOException e)
{
throw e;
}
catch (Exception e)
{
throw new IOException("Error (" + e.getClass().getSimpleName()
+ ") while creating security handler for decryption", e);
}
}
}
/**
* Resolves all not already parsed objects of a dictionary recursively.
*
* @param dictionaryObject dictionary to be parsed
* @throws IOException if something went wrong
*
*/
private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException
{
parseObjectDynamically(dictionaryObject, true);
COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject();
for(COSBase value : dictionary.getValues())
{
if (value instanceof COSObject)
{
COSObject object = (COSObject)value;
if (object.getObject() == null)
{
parseDictionaryRecursive(object);
}
}
}
}
}