Document.java example

Explorer
lenient-pdf-compare-master
- src
/*
 * Copyright 2006-2012 ICEsoft Technologies Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the
 * License. You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS
 * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.icepdf.core.pobjects;

import org.icepdf.core.SecurityCallback;
import org.icepdf.core.application.ProductInfo;
import org.icepdf.core.exceptions.PDFException;
import org.icepdf.core.exceptions.PDFSecurityException;
import org.icepdf.core.io.*;
import org.icepdf.core.pobjects.graphics.text.PageText;
import org.icepdf.core.pobjects.security.SecurityManager;
import org.icepdf.core.util.Defs;
import org.icepdf.core.util.LazyObjectLoader;
import org.icepdf.core.util.Library;
import org.icepdf.core.util.Parser;
import org.icepdf.core.util.IncrementalUpdater;

import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.util.Hashtable;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * <p>The <code>Document</code> class represents a PDF document and provides
 * access to the hierarchy of objects contained in the body section of the
 * PDF document.  Most of the objects in the hierarchy are dictionaries which
 * contain references to page content and other objects such such as annotations.
 * For more information on the document object hierarchy, see the <i>ICEpdf
 * Developer's Guide</i>.</p>
 * <p/>
 * <p>The <code>Document</code> class also provides access to methods responsible
 * for rendering PDF document content.  Methods are available to capture page
 * content to a graphics context or extract image and text data on a page-by-page
 * basis.</p>
 * <p/>
 * <p>If your PDF rendering application will be accessing encrypted documents,
 * it is important to implement the SecurityCallback.  This interface provides
 * methods for getting password data from a user if needed.<p>
 *
 * @since 1.0
 */
public class Document {

    private static final Logger logger =
            Logger.getLogger(Document.class.toString());

    /**
     * Gets the version number of ICEpdf rendering core.  This is not the version
     * number of the PDF format used to encode this document.
     *
     * @return version number of ICEpdf's rendering core.
     */
    public static String getLibraryVersion() {
        return new StringBuilder().append(ProductInfo.PRIMARY).append(".").
                append(ProductInfo.SECONDARY).append(".").
                append(ProductInfo.TERTIARY).append(" ").
                append(ProductInfo.RELEASE_TYPE).toString();
    }

    // core catalog, root of the document hierarchy.
    private Catalog catalog;

    // We used to keep the document main PTrailer's PInfo,
    //  but now that's lazily loaded, so instead we keep the
    //  PTrailer itself, which can get us the PInfo whenever
    private PTrailer pTrailer;

    // state manager for tracking object that have been touched in some way
    // for editing purposes,
    private StateManager stateManager;

    // This is the original file or url path of where the PDF document was load
    // from
    private String origin;

    // This is the location of the file when it is saved to the hard drive.  This
    // is usually only different from the origin if the the PDF document
    // was loaded from a URL
    private String cachedFilePath;

    // callback for password dialogs, or command line access.
    private SecurityCallback securityCallback;

    // disable/enable file caching, overrides fileCachingSize.
    private static boolean isCachingEnabled;

    // repository of all PDF object associated with this document.
    private Library library = null;

    private SeekableInput documentSeekableInput;

    static {
        // sets if file caching is enabled or disabled.
        isCachingEnabled =
                Defs.sysPropertyBoolean("org.icepdf.core.streamcache.enabled",
                        true);
    }

    /**
     * Creates a new instance of a Document.  A Document class represents
     * one PDF document.
     */
    public Document() {
    }

    /**
     * Utility method for setting the origin (filepath or URL) of this Document
     *
     * @param o new origin value
     * @see #getDocumentOrigin()
     */
    private void setDocumentOrigin(String o) {
        origin = o;
        if (logger.isLoggable(Level.CONFIG)) {
            logger.config(
                    "MEMFREE: " + Runtime.getRuntime().freeMemory() + " of " +
                            Runtime.getRuntime().totalMemory());
            logger.config("LOADING: " + o);
        }
    }

    /**
     * Sets the cached file path in the case of opening a file from a URL.
     *
     * @param o new cached file path value
     * @see #getDocumentCachedFilePath
     */
    private void setDocumentCachedFilePath(String o) {
        cachedFilePath = o;
    }

    /**
     * Returns the cached file path in the case of opening a file from a URL.
     *
     * @return file path
     */
    private String getDocumentCachedFilePath() {
        return cachedFilePath;
    }

    /**
     * Load a PDF file from the given path and initiates the document's Catalog.
     *
     * @param filepath path of PDF document.
     * @throws PDFException         if an invalid file encoding.
     * @throws PDFSecurityException if a security provider cannot be found
     *                              or there is an error decrypting the file.
     * @throws IOException          if a problem setting up, or parsing the file.
     */
    public void setFile(String filepath)
            throws PDFException, PDFSecurityException, IOException {
        setDocumentOrigin(filepath);
        RandomAccessFileInputStream rafis =
                RandomAccessFileInputStream.build(new File(filepath));

        /*
        // Test code for setByteArray(-)
        if( true ) {
            byte[] buffer = new byte[4096];
            int read = buffer.length;
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream( 40960 );
            while ((read = rafis.read(buffer, 0, buffer.length)) > 0){
                byteArrayOutputStream.write(buffer, 0, read);
            }
            byteArrayOutputStream.flush();
            byteArrayOutputStream.close();
            rafis.close();
            int length = byteArrayOutputStream.size();
            byte[] data = byteArrayOutputStream.toByteArray();
            setByteArray( data, 0, length, null );
            return;
        }
        */

        setInputStream(rafis);
    }

    /**
     * Load a PDF file from the given URL and initiates the document's Catalog.
     * If the system property org.icepdf.core.streamcache.enabled=true, the file
     * will be cached to a temp file; otherwise, the complete document stream will
     * be stored in memory.
     *
     * @param url location of file.
     * @throws PDFException         an invalid file encoding.
     * @throws PDFSecurityException if a security provider can not be found
     *                              or there is an error decrypting the file.
     * @throws IOException          if a problem downloading, setting up, or parsing the file.
     */
    public void setUrl(URL url)
            throws PDFException, PDFSecurityException, IOException {
        InputStream in = null;
        try {
            // make a connection
            URLConnection urlConnection = url.openConnection();

            // Create a stream on the URL connection
            in = urlConnection.getInputStream();

            String pathOrURL = url.toString();

            setInputStream(in, pathOrURL);
        }
        finally {
            if (in != null) {
                in.close();
            }
        }
    }

    /**
     * Load a PDF file from the given input stream and initiates the document's Catalog.
     * If the system property org.icepdf.core.streamcache.enabled=true, the file
     * will be cached to a temp file; otherwise, the complete document stream will
     * be stored in memory.
     *
     * @param in        input stream containing PDF data
     * @param pathOrURL value assigned to document origin
     * @throws PDFException         an invalid stream or file encoding
     * @throws PDFSecurityException if a security provider can not be found
     *                              or there is an error decrypting the file.
     * @throws IOException          if a problem setting up, or parsing the SeekableInput.
     */
    public void setInputStream(InputStream in, String pathOrURL)
            throws PDFException, PDFSecurityException, IOException {
        setDocumentOrigin(pathOrURL);

        if (!isCachingEnabled) {
//System.out.println("Started  downloading PDF to memory : " + (new java.util.Date()));
            // read into memory first
            ConservativeSizingByteArrayOutputStream byteArrayOutputStream =
                    new ConservativeSizingByteArrayOutputStream(100 * 1024, null);

            // write the bytes.
            byte[] buffer = new byte[4096];
            int length;
//                int pdfFileSize = 0;
            // in.read will block until the end of the file is read.
            while ((length = in.read(buffer, 0, buffer.length)) > 0) {
                byteArrayOutputStream.write(buffer, 0, length);
//                    pdfFileSize += length;
            }
            byteArrayOutputStream.flush();
            byteArrayOutputStream.close();
            int size = byteArrayOutputStream.size();
            byteArrayOutputStream.trim();
            byte[] data = byteArrayOutputStream.relinquishByteArray();
//System.out.println("Finished downloading PDF to memory : " + (new java.util.Date()) + "  pdfFileSize: " + pdfFileSize);

            // finally read the cached file
            SeekableByteArrayInputStream byteArrayInputStream =
                    new SeekableByteArrayInputStream(data, 0, size);
            setInputStream(byteArrayInputStream);
        }
        // if caching is allowed cache the url to file
        else {
//System.out.println("Started  downloading PDF to disk : " + (new java.util.Date()));
            // create tmp file and write bytes to it.
            File tempFile = File.createTempFile(
                    "ICEpdfTempFile" + getClass().hashCode(),
                    ".tmp");
            // Delete temp file on exit
            tempFile.deleteOnExit();

            // Write the data to the temp file.
            FileOutputStream fileOutputStream =
                    new FileOutputStream(tempFile.getAbsolutePath(), true);

            // write the bytes.
            byte[] buffer = new byte[4096];
            int length;
//                int pdfFileSize = 0;
            while ((length = in.read(buffer, 0, buffer.length)) > 0) {
                fileOutputStream.write(buffer, 0, length);
//                    pdfFileSize += length;
            }
            fileOutputStream.flush();
            fileOutputStream.close();
//System.out.println("Finished downloading PDF to disk : " + (new java.util.Date()) + "  pdfFileSize: " + pdfFileSize);

            setDocumentCachedFilePath(tempFile.getAbsolutePath());

            // finally read the cached file
            RandomAccessFileInputStream rafis =
                    RandomAccessFileInputStream.build(tempFile);
            setInputStream(rafis);
        }
    }

    /**
     * Load a PDF file from the given byte array and initiates the document's Catalog.
     * If the system propertyorg.icepdf.core.streamcache.enabled=true, the file
     * will be cached to a temp file; otherwise, the complete document stream will
     * be stored in memory.
     * The given byte array is not necessarily copied, and will try to be directly
     * used, so do not modify it after passing it to this method.
     *
     * @param data      byte array containing PDF data
     * @param offset    the index into the byte array where the PDF data begins
     * @param length    the number of bytes in the byte array belonging to the PDF data
     * @param pathOrURL value assigned to document origin
     * @throws PDFException         an invalid stream or file encoding
     * @throws PDFSecurityException if a security provider can not be found
     *                              or there is an error decrypting the file.
     * @throws IOException          if a problem setting up, or parsing the SeekableInput.
     */
    public void setByteArray(byte[] data, int offset, int length, String pathOrURL)
            throws PDFException, PDFSecurityException, IOException {
        setDocumentOrigin(pathOrURL);

        if (!isCachingEnabled) {
            // finally read the cached file
            SeekableByteArrayInputStream byteArrayInputStream =
                    new SeekableByteArrayInputStream(data, offset, length);
            setInputStream(byteArrayInputStream);
        }
        // if caching is allowed cache the url to file
        else {
//System.out.println("Started  downloading PDF to disk : " + (new java.util.Date()));
            // create tmp file and write bytes to it.
            File tempFile = File.createTempFile(
                    "ICEpdfTempFile" + getClass().hashCode(),
                    ".tmp");
            // Delete temp file on exit
            tempFile.deleteOnExit();

            // Write the data to the temp file.
            FileOutputStream fileOutputStream =
                    new FileOutputStream(tempFile.getAbsolutePath(), true);

            // write the bytes.
//                int pdfFileSize = 0;
            fileOutputStream.write(data, offset, length);
//                pdfFileSize += length;
            fileOutputStream.flush();
            fileOutputStream.close();
//System.out.println("Finished downloading PDF to disk : " + (new java.util.Date()) + "  pdfFileSize: " + pdfFileSize);

            setDocumentCachedFilePath(tempFile.getAbsolutePath());

            // finally read the cached file
            RandomAccessFileInputStream rafis =
                    RandomAccessFileInputStream.build(tempFile);
            setInputStream(rafis);
        }
    }

    /**
     * Load a PDF file from the given SeekableInput stream and initiates the
     * document's Catalog.
     *
     * @param in        input stream containing PDF data
     * @param pathOrURL value assigned to document origin
     * @throws PDFException         an invalid stream or file encoding
     * @throws PDFSecurityException if a security provider can not be found
     *                              or there is an error decrypting the file.
     * @throws IOException          if a problem setting up, or parsing the SeekableInput.
     */
    public void setInputStream(SeekableInput in, String pathOrURL)
            throws PDFException, PDFSecurityException, IOException {
        setDocumentOrigin(pathOrURL);
        setInputStream(in);
    }

    /**
     * Sets the input stream of the PDF file to be rendered.
     *
     * @param in inputstream containing PDF data stream
     * @throws PDFException         if error occurs
     * @throws PDFSecurityException security error
     * @throws IOException          io error during stream handling
     */
    private void setInputStream(final SeekableInput in)
            throws PDFException, PDFSecurityException, IOException {
        try {
            documentSeekableInput = in;

            // create library to hold all document objects
            library = new Library();

            // if interactive show visual progress bar
            //ProgressMonitorInputStream monitor = null;

            boolean loaded = false;
            try {
                loadDocumentViaXRefs(in);
                loaded = true;
            }
            catch (PDFException e) {
                throw e;
            }
            catch (PDFSecurityException e) {
                throw e;
            }
            catch (Exception e) {
                if (logger.isLoggable(Level.WARNING)) {
                    logger.warning("Cross reference deferred loading failed, will fall back to linear reading.");
                }
            }

            if (!loaded) {
                // Cleanup any bits left behind by the failed xref loading
                if (catalog != null) {
                    catalog.dispose(false);
                    catalog = null;
                }
                if (library != null) {
                    library.dispose();
                    library = null;
                }
                library = new Library();
                pTrailer = null;

                in.seekAbsolute(0L);
                loadDocumentViaLinearTraversal(in.getInputStream());
            }

            // initiate the catalog, build the outline for the document
            catalog.init();

            // create new instance of state manager and add it to the library
            stateManager = new StateManager(pTrailer);
            library.setStateManager(stateManager);
        }
        catch (PDFException e) {
            logger.log(Level.FINE, "Error loading PDF file during linear parse.", e);
            dispose();
            throw e;
        }
        catch (PDFSecurityException e) {
            dispose();
            throw e;
        }
        catch (IOException e) {
            dispose();
            throw e;
        }
        catch (Exception e) {
            dispose();
            logger.log(Level.SEVERE, "Error loading PDF Document.", e);
            throw new IOException(e.getMessage());
        }
    }

    /**
     * Uitility method for loading the documents objects from the Xref table.
     *
     * @param in input stream to parse
     * @throws IOException          an i/o problem
     * @throws PDFException         an invalid stream or file encoding
     * @throws PDFSecurityException if a security provider can not be found
     *                              or there is an error decrypting the file.
     */
    private void loadDocumentViaXRefs(SeekableInput in)
            throws PDFException, PDFSecurityException, IOException {
        //if( true ) throw new RuntimeException("Fallback to linear traversal");
        long xrefPosition = getInitialCrossReferencePosition(in);
        PTrailer documentTrailer = null;
        while (xrefPosition > 0L) {
            in.seekAbsolute(xrefPosition);

            Parser parser = new Parser(in);
            Object obj = parser.getObject(library);
            if (obj instanceof PObject)
                obj = ((PObject) obj).getObject();
            PTrailer trailer = (PTrailer) obj;
            //PTrailer trailer = (PTrailer) parser.getObject( library );
            if (trailer == null)
                throw new RuntimeException("Could not find trailer");
            if (trailer.getPrimaryCrossReference() == null)
                throw new RuntimeException("Could not find cross reference");
            trailer.setPosition(xrefPosition);

            if (documentTrailer == null)
                documentTrailer = trailer;
            else
                documentTrailer.addPreviousTrailer(trailer);

            // If this trailer has everything we need to get started,
            //   then we can lazily load other trailers later
            if (true) //TODO So should we make this not be a loop then?
                break;
            xrefPosition = trailer.getPrev();
        }
        if (documentTrailer == null)
            throw new RuntimeException("Could not find document trailer");

        LazyObjectLoader lol = new LazyObjectLoader(
                library, in, documentTrailer.getPrimaryCrossReference());
        library.setLazyObjectLoader(lol);

        pTrailer = documentTrailer;
        catalog = documentTrailer.getRootCatalog();
        library.setCatalog(catalog);

        if (catalog == null)
            throw new NullPointerException("Loading via xref failed to find catalog");

        boolean madeSecurityManager = makeSecurityManager(documentTrailer);
        if (madeSecurityManager)
            attemptAuthorizeSecurityManager();
    }

    private long getInitialCrossReferencePosition(SeekableInput in) throws IOException {
        in.seekEnd();

        long endOfFile = in.getAbsolutePosition();
        long currentPosition = endOfFile - 1;
        long afterStartxref = -1;
        String startxref = "startxref";
        int startxrefIndexToMatch = startxref.length() - 1;

        while (currentPosition >= 0 && (endOfFile - currentPosition) < 2048) {
            in.seekAbsolute(currentPosition);
            int curr = in.read();
            if (curr < 0)
                throw new EOFException("Could not find startxref at end of file");
            if (curr == startxref.charAt(startxrefIndexToMatch)) {
                // If we've matched the whole string
                if (startxrefIndexToMatch == 0) {
                    afterStartxref = currentPosition + startxref.length();
                    break;
                }
                startxrefIndexToMatch--;
            } else
                startxrefIndexToMatch = startxref.length() - 1;
            currentPosition--;
        }
        if (afterStartxref < 0)
            throw new EOFException("Could not find startxref near end of file");

        in.seekAbsolute(afterStartxref);
        Parser parser = new Parser(in);
        Number xrefPositionObj = (Number) parser.getToken();
        if (xrefPositionObj == null)
            throw new RuntimeException("Could not find ending cross reference position");
        return xrefPositionObj.longValue();
    }

    /**
     * Uitily method for parsing a PDF documents object.  This should only be
     * called when the xref lookup fails.
     *
     * @param in stream representing whole pdf document
     * @throws PDFException         an invalid stream or file encoding
     * @throws PDFSecurityException if a security provider can not be found
     *                              or there is an error decrypting the file.
     */
    private void loadDocumentViaLinearTraversal(InputStream in)
            throws PDFException, PDFSecurityException {
        skipPastAnyPrefixJunk(in);

        library.setLinearTraversal();
        Parser parser = new Parser(in);

        // document Trailer, holds encryption info
        PTrailer documentTrailer = null;

        // Loop through all objects that where parsed from the data stream
        Object pdfObject;
        while (true) {
            // parse all of the objects in the stream,  objects are added
            // to the library object.
            pdfObject = parser.getObject(library);

            // eof or io error result in break
            if (pdfObject == null) {
                break;
            }

            // display object information in debug mode
            if (logger.isLoggable(Level.FINER)) {
                logger.finer(pdfObject.getClass().getName() + " " + pdfObject);
            }

            // Add any PObjects to the vector, so they can be decrypted
            if (pdfObject instanceof PObject) {
                PObject tmp = (PObject) pdfObject;
                Object obj = tmp.getObject();
                if (obj != null)
                    pdfObject = obj;
            }

            // find the catalog which has information on outlines
            // which is need by the gui
            if (pdfObject instanceof Catalog) {
                catalog = (Catalog) pdfObject;
            }

            // Find the trailer object so that we can get the encryption information
            // trailer information is not a PObject and thus there should
            if (pdfObject instanceof PTrailer) {
                if (documentTrailer == null) {
                    documentTrailer = (PTrailer) pdfObject;
                } else {
                    // add more trailer data to the original
                    PTrailer nextTrailer = (PTrailer) pdfObject;
                    documentTrailer.addNextTrailer(nextTrailer);
                    documentTrailer = nextTrailer;
                }
            }
        }

        // The LazyObjectLoader is used for both reading from a SeekableInput,
        //  and also accessing ObjectStreams.
        // So, even with linear traversal, we still need it for PDF 1.5 documents
        if (documentTrailer != null) {
            LazyObjectLoader lol = new LazyObjectLoader(
                    library, null, documentTrailer.getPrimaryCrossReference());
            library.setLazyObjectLoader(lol);
        }

        pTrailer = documentTrailer;
        library.setCatalog(catalog);

        // Add Document information object to catalog
        if (documentTrailer != null) {
            boolean madeSecurityManager = makeSecurityManager(documentTrailer);
            if (madeSecurityManager)
                attemptAuthorizeSecurityManager();
        }
    }

    /**
     * Typically, if we're doing a linear traversal, it's because the PDF file
     * is corrupted, usually by junk being appended to it, or the ending
     * being truncated, or, in this case, from junk being inserted into the
     * beginning of the file, skewing all the xref object offsets.
     * <p/>
     * We're going to look for the "%PDF-1." string that most PDF files start
     * with. If we do find it, then leave the InputStream after the next
     * whitespace, else rewind back to the beginning, in case the file was
     * never encoded with the PDF version comment.
     *
     * @param in InputStream derived from SeekableInput.getInputStream()
     */
    private void skipPastAnyPrefixJunk(InputStream in) {
        if (!in.markSupported())
            return;
        try {
            final int scanLength = 2048;
            final String scanFor = "%PDF-1.";
            int scanForIndex = 0;
            boolean scanForWhiteSpace = false;
            in.mark(scanLength);
            for (int i = 0; i < scanLength; i++) {
                int data = in.read();
                if (data < 0) {
                    in.reset();
                    return;
                }
                if (scanForWhiteSpace) {
                    if (Parser.isWhitespace((char) data)) {
                        return;
                    }
                } else {
                    if (data == scanFor.charAt(scanForIndex)) {
                        scanForIndex++;
                        if (scanForIndex == scanFor.length()) {
                            // Now read until we find white space
                            scanForWhiteSpace = true;
                        }
                    } else
                        scanForIndex = 0;
                }
            }
            // Searched through scanLength number of bytes and didn't find it,
            //  so reset, in case it was never there to find
            in.reset();
        }
        catch (IOException e) {
            try {
                in.reset();
            }
            catch (IOException e2) {
            }
        }
    }

    /**
     * Utility method for building the SecurityManager if the document
     * contains a crypt entry in the PTrailer.
     *
     * @param documentTrailer document trailer
     * @return Whether or not a SecurityManager was made, and set in the Library
     * @throws PDFSecurityException if there is an issue finding encryption libraries.
     */
    private boolean makeSecurityManager(PTrailer documentTrailer) throws PDFSecurityException {
        /**
         * Before a securtiy manager can be created or needs to be created
         * we need the following
         *      1.  The trailer object must have an encrypt entry
         *      2.  The trailer object must have an ID entry
         */
        boolean madeSecurityManager = false;
        Hashtable encryptDictionary = documentTrailer.getEncrypt();
        Vector fileID = documentTrailer.getID();
        if (encryptDictionary != null && fileID != null) {
            // create new security manager
            library.securityManager = new SecurityManager(
                    library, encryptDictionary, fileID);
            madeSecurityManager = true;
        }
        return madeSecurityManager;
    }

    /**
     * If the document has a SecurityManager it is encrypted and as a result the
     * following method is used with the SecurityCallback to prompt a user for
     * a password if needed.
     *
     * @throws PDFSecurityException error during authorization manager setup
     */
    private void attemptAuthorizeSecurityManager() throws PDFSecurityException {
        // check if pdf is password protected, by passing in black
        // password
        if (!library.securityManager.isAuthorized("")) {
            // count password tries
            int count = 1;
            // store temporary password
            String password;

            // Give user 3 chances to type the correct password
            // before throwing security exceptions
            while (true) {
                // Display password dialog
                // make sure a callback has been set.
                if (securityCallback != null) {
                    password = securityCallback.requestPassword(this);
                    if (password == null) {
                        throw new PDFSecurityException("Encryption error");
                    }
                } else {
                    throw new PDFSecurityException("Encryption error");
                }

                // Verify new password,  proceed if authorized,
                //    fatal exception otherwise.
                if (library.securityManager.isAuthorized(password)) {
                    break;
                }
                count++;
                // after 3 tries throw the the error.
                if (count > 3) {
                    throw new PDFSecurityException("Encryption error");
                }
            }
        }

        // set the encryption flag on catalog
        library.setEncrypted(true);
    }

    /**
     * Gets the page dimension of the indicated page number using the specified
     * rotation factor.
     *
     * @param pageNumber   Page number for the given dimension.  The page
     *                     number is zero-based.
     * @param userRotation Rotation, in degrees, that has been applied to page
     *                     when calculating the dimension.
     * @return page dimension for the specified page number
     * @see #getPageDimension(int, float, float)
     */
    public PDimension getPageDimension(int pageNumber, float userRotation) {
        Page page = catalog.getPageTree().getPage(pageNumber, this);
        PDimension pd = page.getSize(userRotation);
        catalog.getPageTree().releasePage(page, this);
        return pd;
    }

    /**
     * Gets the page dimension of the indicated page number using the specified
     * rotation and zoom settings.  If the page does not exist then a zero
     * dimension is returned.
     *
     * @param pageNumber   Page number for the given dimension.  The page
     *                     number is zero-based.
     * @param userRotation Rotation, in degrees, that has been applied to page
     *                     when calculating the dimension.
     * @param userZoom     Any deviation from the page's actual size, by zooming in or out.
     * @return page dimension for the specified page number.
     * @see #getPageDimension(int, float)
     */
    public PDimension getPageDimension(int pageNumber, float userRotation, float userZoom) {
        Page page = catalog.getPageTree().getPage(pageNumber, this);
        if (page != null) {
            PDimension pd = page.getSize(userRotation, userZoom);
            catalog.getPageTree().releasePage(page, this);
            return pd;
        } else {
            return new PDimension(0, 0);
        }
    }

    /**
     * Returns the origin (filepath or URL) of this Document.  This is the original
     * location of the file where the method getDocumentLocation returns the actual
     * location of the file.  The origin and location of the document will only
     * be different if it was loaded from a URL or an input stream.
     *
     * @return file path or URL
     * @see #getDocumentLocation
     */
    public String getDocumentOrigin() {
        return origin;
    }

    /**
     * Returns the file location or URL of this Document. This location may be different
     * from the file origin if the document was loaded from a URL or input stream.
     * If the file was loaded from a URL or input stream the file location is
     * the path to where the document content is cached.
     *
     * @return file path
     * @see #getDocumentOrigin()
     */
    public String getDocumentLocation() {
        if (cachedFilePath != null)
            return cachedFilePath;
        return origin;
    }

    /**
     * Gets an instance of the the document state manager which stores references
     * of object that need to be written to file.
     * @return stateManager instance for this document. 
     */
    public StateManager getStateManager(){
        return stateManager;
    }

    /**
     * Returns the total number of pages in this document.
     *
     * @return number of pages in the document
     */
    public int getNumberOfPages() {
        try {
            return catalog.getPageTree().getNumberOfPages();
        } catch (Exception e) {
            logger.log(Level.FINE, "Error getting number of pages.", e);
        }
        return 0;
    }

    /**
     * Paints the contents of the given page number to the graphics context using
     * the specified rotation, zoom, rendering hints and page boundary.
     *
     * @param pageNumber     Page number to paint.  The page number is zero-based.
     * @param g              graphics context to which the page content will be painted.
     * @param renderHintType Constant specified by the GraphicsRenderingHints class.
     *                       There are two possible entries, SCREEN and PRINT, each with configurable
     *                       rendering hints settings.
     * @param pageBoundary   Constant specifying the page boundary to use when
     *                       painting the page content.
     * @param userRotation   Rotation factor, in degrees, to be applied to the rendered page.
     * @param userZoom       Zoom factor to be applied to the rendered page.
     */
    public void paintPage(int pageNumber, Graphics g, final int renderHintType,
                          final int pageBoundary, float userRotation, float userZoom) {
        Page page = catalog.getPageTree().getPage(pageNumber, this);
        PDimension sz = page.getSize(userRotation, userZoom);
        int pageWidth = (int) sz.getWidth();
        int pageHeight = (int) sz.getHeight();

        Graphics gg = g.create(0, 0, pageWidth, pageHeight);
        page.paint(gg, renderHintType, pageBoundary, userRotation, userZoom);

        gg.dispose();
        catalog.getPageTree().releasePage(page, this);
    }

    /**
     * Dispose of Document, freeing up all used resources.
     */
    public void dispose() {
        if (catalog != null) {
            catalog.dispose(false);
            catalog = null;
        }
        if (library != null) {
            library.dispose();
            library = null;
        }
        pTrailer = null;
        if (documentSeekableInput != null) {
            try {
                documentSeekableInput.close();
            }
            catch (IOException e) {
                logger.log(Level.FINE, "Error closing document input stream.", e);
            }
            documentSeekableInput = null;
        }
        String fileToDelete = getDocumentCachedFilePath();
        if (fileToDelete != null) {
            File file = new File(fileToDelete);
            boolean success = file.delete();
            if (!success && logger.isLoggable(Level.WARNING)) {
                logger.warning("Error deleting URL cached to file " + fileToDelete);
            }
        }
    }

    /**
     * Takes the internal PDF data, which may be in a file or in RAM,
     * and write it to the provided OutputStream.
     * The OutputStream is not flushed or closed, in case this method's
     * caller requires otherwise.
     *
     * @param out OutputStream to which the PDF file bytes are written.
     * @throws IOException if there is some problem reading or writing the PDF data
     * @return The length of the PDF file copied
     */
    public long writeToOutputStream(OutputStream out) throws IOException {
        long documentLength = documentSeekableInput.getLength();
        SeekableInputConstrainedWrapper wrapper = new SeekableInputConstrainedWrapper(
                documentSeekableInput, 0L, documentLength, false);
        try {
            wrapper.prepareForCurrentUse();

            byte[] buffer = new byte[4096];
            int length;
            while ((length = wrapper.read(buffer, 0, buffer.length)) > 0) {
                out.write(buffer, 0, length);
            }
        }
        catch (Throwable e) {
            logger.log(Level.FINE, "Error writting PDF output stream.", e);
            throw new IOException(e.getMessage());
        }
        finally {
            try {
                wrapper.close();
            }
            catch (IOException e) {
            }
        }
        return documentLength;
    }

    /**
     * Copies the pre-existing PDF file, and appends an incremental update for
     * any edits, to the specified OutputStream. For the pre-existing PDF
     * content copying, writeToOutputStream(OutputStream out) is used.
     *
     * @param out OutputStream to which the PDF file bytes are written.
     * @throws IOException if there is some problem reading or writing the PDF data
     * @return The length of the PDF file saved
     */
    public long saveToOutputStream(OutputStream out) throws IOException {
        long documentLength = writeToOutputStream(out);
        long appendedLength = appendIncrementalUpdate(out, documentLength);
        return documentLength + appendedLength;
    }

    /**
     * If ICEpdf Pro, then use append an incremental update of any edits.
     *
     * @param out OutputStream to which the incremental update bytes are written.
     * @param documentLength Length of the PDF file sp far, before the incremental update.
     * @return The number of bytes written for the incremental update.
     * @throws IOException
     */
    protected long appendIncrementalUpdate(OutputStream out, long documentLength)
            throws IOException {
        return IncrementalUpdater.appendIncrementalUpdate(this, out, documentLength);
    }

    /**
     * Gets an Image of the specified page.  The image size is automatically
     * calculated given the page boundary, user rotation and zoom.  The rendering
     * quality is defined by GraphicsRenderingHints.SCREEN.
     *
     * @param pageNumber     Page number of the page to capture the image rendering.
     *                       The page number is zero-based.
     * @param renderHintType Constant specified by the GraphicsRenderingHints class.
     *                       There are two possible entries, SCREEN and PRINT each with configurable
     *                       rendering hints settings.
     * @param pageBoundary   Constant specifying the page boundary to use when
     *                       painting the page content. Typically use Page.BOUNDARY_CROPBOX.
     * @param userRotation   Rotation factor, in degrees, to be applied to the rendered page.
     *                       Arbitrary rotations are not currently supported for this method,
     *                       so only the following values are valid: 0.0f, 90.0f, 180.0f, 270.0f.
     * @param userZoom       Zoom factor to be applied to the rendered page.
     * @return an Image object of the current page.
     */
    public Image getPageImage(int pageNumber,
                              final int renderHintType, final int pageBoundary,
                              float userRotation, float userZoom) {
        Page page = catalog.getPageTree().getPage(pageNumber, this);
        PDimension sz = page.getSize(pageBoundary, userRotation, userZoom);

        int pageWidth = (int) sz.getWidth();
        int pageHeight = (int) sz.getHeight();

        BufferedImage image = new BufferedImage(pageWidth,
                pageHeight,
                BufferedImage.TYPE_INT_RGB);
        Graphics g = image.createGraphics();

        page.paint(g, renderHintType,
                pageBoundary, userRotation, userZoom);
        g.dispose();
        catalog.getPageTree().releasePage(page, this);

        return image;
    }

    /**
     * Exposes a page's PageText object which can be used to get text with
     * in the PDF document.  The PageText.toString() is the simplest way to
     * get a pages text.  This utility call does not parse the whole stream
     * and is best suited for text extraction functionality as it faster then
     * #getPageViewText(int).
     *
     * @param pageNumber Page number of page in which text extraction will act on.
     *                   The page number is zero-based.
     * @return page PageText data Structure.
     * @see #getPageViewText(int).
     */
    public PageText getPageText(int pageNumber) {
        PageTree pageTree = catalog.getPageTree();
        if (pageNumber >= 0 && pageNumber < pageTree.getNumberOfPages()) {
            Page pg = pageTree.getPage(pageNumber, this);
            PageText text = pg.getText();
            catalog.getPageTree().releasePage(pg, this);
            return text;
        } else {
            return null;
        }
    }

    /**
     * Exposes a page's PageText object which can be used to get text with
     * in the PDF document.  The PageText.toString() is the simplest way to
     * get a pages text.  The pageText hierarchy can be used to search for
     * selected text or used to set text as highlighted.
     *
     * @param pageNumber Page number of page in which text extraction will act on.
     *                   The page number is zero-based.
     * @return page PageText data Structure.
     */
    public PageText getPageViewText(int pageNumber) {
        PageTree pageTree = catalog.getPageTree();
        if (pageNumber >= 0 && pageNumber < pageTree.getNumberOfPages()) {
            Page pg = pageTree.getPage(pageNumber, this);
            PageText text = pg.getViewText();
            catalog.getPageTree().releasePage(pg, this);
            return text;
        } else {
            return null;
        }
    }

    /**
     * Gets the security manager for this document. If the document has no
     * security manager null is returned.
     *
     * @return security manager for document if available.
     */
    public SecurityManager getSecurityManager() {
        return library.securityManager;
    }

    /**
     * Sets the security callback to be used for this document.  The security
     * callback allows a mechanism for prompting a user for a password if the
     * document is password protected.
     *
     * @param securityCallback a class which implements the SecurityCallback
     *                         interface.
     */
    public void setSecurityCallback(SecurityCallback securityCallback) {
        this.securityCallback = securityCallback;
    }

    /**
     * Gets the document's information as specified in the PTrailer in the document
     * hierarchy.
     *
     * @return document information
     * @see org.icepdf.core.pobjects.PInfo for more information.
     */
    public PInfo getInfo() {
        if (pTrailer == null)
            return null;
        return pTrailer.getInfo();
    }

    /**
     * Gets a vector of Images where each index represents an image  inside
     * the specified page.  The images are returned in the size in which they
     * where embedded in the PDF document, which may be different than the
     * size displayed when the complete PDF page is rendered.
     *
     * @param pageNumber page number to act on.  Zero-based page number.
     * @return vector of Images inside the current page
     */
    public Vector getPageImages(int pageNumber) {
        Page pg = catalog.getPageTree().getPage(pageNumber, this);
        Vector images = pg.getImages();
        catalog.getPageTree().releasePage(pg, this);
        return images;
    }

    /**
     * Gets the Document Catalog's PageTree entry as specified by the Document
     * hierarchy.  The PageTree can be used to obtain detailed information about
     * the Page object which makes up the document.
     *
     * @return PageTree specified by the document hierarchy.
     */
    public PageTree getPageTree() {
        return catalog.getPageTree();
    }

    /**
     * Gets the Document's Catalog as specified by the Document hierarchy. The
     * Catalog can be used to traverse the Document's hierarchy.
     *
     * @return document's Catalog object; null, if one does not exist.
     */
    public Catalog getCatalog() {
        return catalog;
    }
}