/* * Copyright 2006-2017 ICEsoft Technologies Canada Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.icepdf.core.pobjects; import org.icepdf.core.SecurityCallback; import org.icepdf.core.application.ProductInfo; import org.icepdf.core.exceptions.PDFException; import org.icepdf.core.exceptions.PDFSecurityException; import org.icepdf.core.io.*; import org.icepdf.core.pobjects.acroform.FieldDictionary; import org.icepdf.core.pobjects.acroform.InteractiveForm; import org.icepdf.core.pobjects.annotations.AbstractWidgetAnnotation; import org.icepdf.core.pobjects.graphics.WatermarkCallback; import org.icepdf.core.pobjects.graphics.text.PageText; import org.icepdf.core.pobjects.security.SecurityManager; import org.icepdf.core.util.Defs; import org.icepdf.core.util.LazyObjectLoader; import org.icepdf.core.util.Library; import org.icepdf.core.util.Parser; import java.awt.*; import java.awt.image.BufferedImage; import java.io.*; import java.lang.reflect.Method; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * <p>The <code>Document</code> class represents a PDF document and provides * access to the hierarchy of objects contained in the body section of the * PDF document. Most of the objects in the hierarchy are dictionaries which * contain references to page content and other objects such such as annotations. * For more information on the document object hierarchy, see the <i>ICEpdf * Developer's Guide</i>.</p> * <br> * <p>The <code>Document</code> class also provides access to methods responsible * for rendering PDF document content. Methods are available to capture page * content to a graphics context or extract image and text data on a page-by-page * basis.</p> * <br> * <p>If your PDF rendering application will be accessing encrypted documents, * it is important to implement the SecurityCallback. This interface provides * methods for getting password data from a user if needed.<p> * * @since 1.0 */ public class Document { private static final Logger logger = Logger.getLogger(Document.class.toString()); /** * Gets the version number of ICEpdf rendering core. This is not the version * number of the PDF format used to encode this document. * * @return version number of ICEpdf's rendering core. */ public static String getLibraryVersion() { return ProductInfo.VERSION + (ProductInfo.RELEASE_TYPE != null ? "-" + ProductInfo.RELEASE_TYPE : ""); } private static final String INCREMENTAL_UPDATER = "org.icepdf.core.util.IncrementalUpdater"; public static boolean foundIncrementalUpdater; static { // check class bath for NFont library, and declare results. try { Class.forName(INCREMENTAL_UPDATER); foundIncrementalUpdater = true; } catch (ClassNotFoundException e) { logger.log(Level.WARNING, "PDF write support was not found on the class path"); } } // optional watermark callback private WatermarkCallback watermarkCallback; // core catalog, root of the document hierarchy. private Catalog catalog; // We used to keep the document main PTrailer's PInfo, // but now that's lazily loaded, so instead we keep the // PTrailer itself, which can get us the PInfo whenever private PTrailer pTrailer; // state manager for tracking object that have been touched in some way // for editing purposes, private StateManager stateManager; // This is the original file or url path of where the PDF document was load // from private String origin; // This is the location of the file when it is saved to the hard drive. This // is usually only different from the origin if the the PDF document // was loaded from a URL private String cachedFilePath; // callback for password dialogs, or command line access. private SecurityCallback securityCallback; // disable/enable file caching, overrides fileCachingSize. private static boolean isCachingEnabled; private static boolean isFileCachingEnabled; private static int fileCacheMaxSize; // repository of all PDF object associated with this document. private Library library = null; private SeekableInput documentSeekableInput; static { // sets if file caching is enabled or disabled. isCachingEnabled = Defs.sysPropertyBoolean("org.icepdf.core.streamcache.enabled", false); isFileCachingEnabled = Defs.sysPropertyBoolean("org.icepdf.core.filecache.enabled", true); fileCacheMaxSize = Defs.intProperty("org.icepdf.core.filecache.size", 200000000); } /** * Creates a new instance of a Document. A Document class represents * one PDF document. */ public Document() { } /** * Sets a page watermark implementation to be painted on top of the page * content. Watermark can be specified for each page or once by calling * document.setWatermark(). * * @param watermarkCallback watermark implementation. */ public void setWatermarkCallback(WatermarkCallback watermarkCallback) { this.watermarkCallback = watermarkCallback; } /** * Utility method for setting the origin (filepath or URL) of this Document * * @param o new origin value * {@link #getDocumentOrigin} */ private void setDocumentOrigin(String o) { origin = o; if (logger.isLoggable(Level.CONFIG)) { logger.config( "MEMFREE: " + Runtime.getRuntime().freeMemory() + " of " + Runtime.getRuntime().totalMemory()); logger.config("LOADING: " + o); } } /** * Sets the cached file path in the case of opening a file from a URL. * * @param o new cached file path value * {@link #getDocumentCachedFilePath} */ private void setDocumentCachedFilePath(String o) { cachedFilePath = o; } /** * Returns the cached file path in the case of opening a file from a URL. * * @return file path */ private String getDocumentCachedFilePath() { return cachedFilePath; } /** * Load a PDF file from the given path and initiates the document's Catalog. * * @param filepath path of PDF document. * @throws PDFException if an invalid file encoding. * @throws PDFSecurityException if a security provider cannot be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the file. */ public void setFile(String filepath) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(filepath); File file = new File(filepath); FileInputStream inputStream = new FileInputStream(file); int fileLength = inputStream.available(); if (isFileCachingEnabled && file.length() > 0 && fileLength <= fileCacheMaxSize) { // copy the file contents into byte[], for direct memory mapping. byte[] data = new byte[fileLength]; inputStream.read(data); setByteArray(data, 0, fileLength, filepath); } else { RandomAccessFileInputStream rafis = RandomAccessFileInputStream.build(new File(filepath)); setInputStream(rafis); } if (inputStream != null) { inputStream.close(); } } /** * Load a PDF file from the given URL and initiates the document's Catalog. * If the system property org.icepdf.core.streamcache.enabled=true, the file * will be cached to a temp file; otherwise, the complete document stream will * be stored in memory. * * @param url location of file. * @throws PDFException an invalid file encoding. * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem downloading, setting up, or parsing the file. */ public void setUrl(URL url) throws PDFException, PDFSecurityException, IOException { InputStream in = null; try { // make a connection URLConnection urlConnection = url.openConnection(); // Create a stream on the URL connection in = urlConnection.getInputStream(); String pathOrURL = url.toString(); setInputStream(in, pathOrURL); } finally { if (in != null) { in.close(); } } } /** * Load a PDF file from the given input stream and initiates the document's Catalog. * If the system property org.icepdf.core.streamcache.enabled=true, the file * will be cached to a temp file; otherwise, the complete document stream will * be stored in memory. * * @param in input stream containing PDF data * @param pathOrURL value assigned to document origin * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the SeekableInput. */ public void setInputStream(InputStream in, String pathOrURL) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(pathOrURL); if (!isCachingEnabled) { //System.out.println("Started downloading PDF to memory : " + (new java.util.Date())); // read into memory first ConservativeSizingByteArrayOutputStream byteArrayOutputStream = new ConservativeSizingByteArrayOutputStream(100 * 1024); // write the bytes. byte[] buffer = new byte[4096]; int length; // int pdfFileSize = 0; // in.read will block until the end of the file is read. while ((length = in.read(buffer, 0, buffer.length)) > 0) { byteArrayOutputStream.write(buffer, 0, length); // pdfFileSize += length; } byteArrayOutputStream.flush(); byteArrayOutputStream.close(); int size = byteArrayOutputStream.size(); byteArrayOutputStream.trim(); byte[] data = byteArrayOutputStream.relinquishByteArray(); //System.out.println("Finished downloading PDF to memory : " + (new java.util.Date()) + " pdfFileSize: " + pdfFileSize); // finally read the cached file SeekableByteArrayInputStream byteArrayInputStream = new SeekableByteArrayInputStream(data, 0, size); setInputStream(byteArrayInputStream); } // if caching is allowed cache the url to file else { //System.out.println("Started downloading PDF to disk : " + (new java.util.Date())); // create tmp file and write bytes to it. File tempFile = File.createTempFile( "ICEpdfTempFile" + getClass().hashCode(), ".tmp"); // Delete temp file on exit tempFile.deleteOnExit(); // Write the data to the temp file. FileOutputStream fileOutputStream = new FileOutputStream(tempFile.getAbsolutePath(), true); // write the bytes. byte[] buffer = new byte[4096]; int length; // int pdfFileSize = 0; while ((length = in.read(buffer, 0, buffer.length)) > 0) { fileOutputStream.write(buffer, 0, length); // pdfFileSize += length; } fileOutputStream.flush(); fileOutputStream.close(); //System.out.println("Finished downloading PDF to disk : " + (new java.util.Date()) + " pdfFileSize: " + pdfFileSize); setDocumentCachedFilePath(tempFile.getAbsolutePath()); // finally read the cached file RandomAccessFileInputStream rafis = RandomAccessFileInputStream.build(tempFile); setInputStream(rafis); } } /** * Load a PDF file from the given byte array and initiates the document's Catalog. * If the system propertyorg.icepdf.core.streamcache.enabled=true, the file * will be cached to a temp file; otherwise, the complete document stream will * be stored in memory. * The given byte array is not necessarily copied, and will try to be directly * used, so do not modify it after passing it to this method. * * @param data byte array containing PDF data * @param offset the index into the byte array where the PDF data begins * @param length the number of bytes in the byte array belonging to the PDF data * @param pathOrURL value assigned to document origin * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the SeekableInput. */ public void setByteArray(byte[] data, int offset, int length, String pathOrURL) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(pathOrURL); if (!isCachingEnabled) { // finally read the cached file SeekableByteArrayInputStream byteArrayInputStream = new SeekableByteArrayInputStream(data, offset, length); setInputStream(byteArrayInputStream); } // if caching is allowed cache the url to file else { //System.out.println("Started downloading PDF to disk : " + (new java.util.Date())); // create tmp file and write bytes to it. File tempFile = File.createTempFile( "ICEpdfTempFile" + getClass().hashCode(), ".tmp"); // Delete temp file on exit tempFile.deleteOnExit(); // Write the data to the temp file. FileOutputStream fileOutputStream = new FileOutputStream(tempFile.getAbsolutePath(), true); // write the bytes. // int pdfFileSize = 0; fileOutputStream.write(data, offset, length); // pdfFileSize += length; fileOutputStream.flush(); fileOutputStream.close(); //System.out.println("Finished downloading PDF to disk : " + (new java.util.Date()) + " pdfFileSize: " + pdfFileSize); setDocumentCachedFilePath(tempFile.getAbsolutePath()); // finally read the cached file RandomAccessFileInputStream rafis = RandomAccessFileInputStream.build(tempFile); setInputStream(rafis); } } /** * Load a PDF file from the given SeekableInput stream and initiates the * document's Catalog. * * @param in input stream containing PDF data * @param pathOrURL value assigned to document origin * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the SeekableInput. */ public void setInputStream(SeekableInput in, String pathOrURL) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(pathOrURL); setInputStream(in); } /** * Sets the input stream of the PDF file to be rendered. * * @param in inputStream containing PDF data stream * @throws PDFException if error occurs * @throws PDFSecurityException security error * @throws IOException io error during stream handling */ private void setInputStream(final SeekableInput in) throws PDFException, PDFSecurityException, IOException { try { documentSeekableInput = in; // create library to hold all document objects library = new Library(); // reference the stream and origin with library so we can handle verification and writing of signatures. library.setDocumentInput(documentSeekableInput); // if interactive show visual progress bar //ProgressMonitorInputStream monitor = null; boolean loaded = false; try { loadDocumentViaXRefs(in); // initiate the catalog, build the outline for the document // this is the best test to see if everything is in order. if (catalog != null) { catalog.init(); } loaded = true; } catch (PDFException e) { throw e; } catch (PDFSecurityException e) { throw e; } catch (Exception e) { if (logger.isLoggable(Level.WARNING)) { logger.warning("Cross reference deferred loading failed, will fall back to linear reading."); } } if (!loaded) { // Cleanup any bits left behind by the failed xref loading if (catalog != null) { catalog = null; } if (library != null) { library = null; } library = new Library(); pTrailer = null; in.seekAbsolute(0L); loadDocumentViaLinearTraversal(in); // initiate the catalog, build the outline for the document if (catalog != null) { catalog.init(); } } // create new instance of state manager and add it to the library stateManager = new StateManager(pTrailer); library.setStateManager(stateManager); } catch (PDFException e) { logger.log(Level.FINE, "Error loading PDF file during linear parse.", e); dispose(); throw e; } catch (PDFSecurityException e) { dispose(); throw e; } catch (IOException e) { dispose(); throw e; } catch (Exception e) { dispose(); logger.log(Level.SEVERE, "Error loading PDF Document.", e); throw new IOException(e.getMessage()); } } /** * Uitility method for loading the documents objects from the Xref table. * * @param in input stream to parse * @throws IOException an i/o problem * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. */ private void loadDocumentViaXRefs(SeekableInput in) throws PDFException, PDFSecurityException, IOException { //if( true ) throw new RuntimeException("Fallback to linear traversal"); int offset = skipPastAnyPrefixJunk(in); long xrefPosition = getInitialCrossReferencePosition(in) + offset; PTrailer documentTrailer = null; if (xrefPosition > 0L) { in.seekAbsolute(xrefPosition); Parser parser = new Parser(in); Object obj = parser.getObject(library); if (obj instanceof PObject) obj = ((PObject) obj).getObject(); PTrailer trailer = (PTrailer) obj; //PTrailer trailer = (PTrailer) parser.getObject( library ); if (trailer == null) throw new RuntimeException("Could not find trailer"); if (trailer.getPrimaryCrossReference() == null) throw new RuntimeException("Could not find cross reference"); trailer.setPosition(xrefPosition); documentTrailer = trailer; // any prev/next trails are loaded lazily } if (documentTrailer == null) throw new RuntimeException("Could not find document trailer"); if (offset > 0) { // mark the offset, so that it can be correct for later during // object retrieval. documentTrailer.getCrossReferenceTable().setOffset(offset); } LazyObjectLoader lol = new LazyObjectLoader( library, in, documentTrailer.getPrimaryCrossReference()); library.setLazyObjectLoader(lol); pTrailer = documentTrailer; catalog = documentTrailer.getRootCatalog(); library.setCatalog(catalog); if (catalog == null) throw new NullPointerException("Loading via xref failed to find catalog"); boolean madeSecurityManager = makeSecurityManager(documentTrailer); if (madeSecurityManager) { attemptAuthorizeSecurityManager(); } // setup a signature permission dictionary configurePermissions(); } private long getInitialCrossReferencePosition(SeekableInput in) throws IOException { in.seekEnd(); long endOfFile = in.getAbsolutePosition(); long currentPosition = endOfFile - 1; long afterStartxref = -1; String startxref = "startxref"; int startxrefIndexToMatch = startxref.length() - 1; while (currentPosition >= 0 && (endOfFile - currentPosition) < 2048) { in.seekAbsolute(currentPosition); int curr = in.read(); if (curr < 0) throw new EOFException("Could not find startxref at end of file"); if (curr == startxref.charAt(startxrefIndexToMatch)) { // If we've matched the whole string if (startxrefIndexToMatch == 0) { afterStartxref = currentPosition + startxref.length(); break; } startxrefIndexToMatch--; } else startxrefIndexToMatch = startxref.length() - 1; currentPosition--; } if (afterStartxref < 0) throw new EOFException("Could not find startxref near end of file"); in.seekAbsolute(afterStartxref); Parser parser = new Parser(in); Number xrefPositionObj = (Number) parser.getToken(); if (xrefPositionObj == null) throw new RuntimeException("Could not find ending cross reference position"); return xrefPositionObj.longValue(); } /** * Uitily method for parsing a PDF documents object. This should only be * called when the xref lookup fails or the file is being loaded * via byte input because file caching is not enabled. * * @param seekableInput stream representing whole pdf document * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. */ private void loadDocumentViaLinearTraversal(SeekableInput seekableInput) throws PDFException, PDFSecurityException, IOException { InputStream in = seekableInput.getInputStream(); int objectsOffset = skipPastAnyPrefixJunk(in); library.setLinearTraversal(); // NOTE: when we implement linerized document we should be able to // rework this method. Parser parser = new Parser(in); // document Trailer, holds encryption info PTrailer documentTrailer = null; // Loop through all objects that where parsed from the data stream List<PObject> documentObjects = new ArrayList<PObject>(); Object pdfObject; while (true) { // parse all of the objects in the stream, objects are added // to the library object. pdfObject = parser.getObject(library); // eof or io error result in break if (pdfObject == null) { break; } // unwrap pObject for catalog and ptrailer lookups. if (pdfObject instanceof PObject) { PObject tmp = (PObject) pdfObject; // apply the offset value of the object. tmp.setLinearTraversalOffset(objectsOffset + parser.getLinearTraversalOffset()); // store reference so we can rebuild the xref table. documentObjects.add(tmp); Object obj = tmp.getObject(); if (obj != null) pdfObject = obj; } // find the catalog which has information on outlines // which is need by the gui if (pdfObject instanceof Catalog) { catalog = (Catalog) pdfObject; } // Find the trailer object so that we can get the encryption information // trailer information is not a PObject and thus there should if (pdfObject instanceof PTrailer) { if (documentTrailer == null) { documentTrailer = (PTrailer) pdfObject; } else { // add more trailer data to the original PTrailer nextTrailer = (PTrailer) pdfObject; if (nextTrailer.getPrev() > 0) { documentTrailer.addNextTrailer(nextTrailer); documentTrailer = nextTrailer; } } } } // apply the new object offset values so that the object can be retrieved // using the actual index in the file CrossReference refs = documentTrailer.getPrimaryCrossReference(); Object entry; for (PObject pObject : documentObjects) { entry = refs.getEntryForObject(pObject.getReference().getObjectNumber()); if (entry != null && entry instanceof CrossReference.UsedEntry) { ((CrossReference.UsedEntry) entry).setFilePositionOfObject( pObject.getLinearTraversalOffset()); } else { refs.addUsedEntry(pObject.getReference().getObjectNumber(), pObject.getLinearTraversalOffset(), pObject.getReference().getGenerationNumber()); } } if (logger.isLoggable(Level.FINER)) { for (PObject pobjects : documentObjects) { // display object information in debug mode logger.finer(pobjects.getClass().getName() + " " + pobjects.getLinearTraversalOffset() + " " + pobjects); } } // The LazyObjectLoader is used for both reading from a SeekableInput, // and also accessing ObjectStreams. // So, even with linear traversal, we still need it for PDF 1.5 documents if (documentTrailer != null) { LazyObjectLoader lol = new LazyObjectLoader( library, seekableInput, documentTrailer.getPrimaryCrossReference()); library.setLazyObjectLoader(lol); } pTrailer = documentTrailer; library.setCatalog(catalog); // Add Document information object to catalog if (documentTrailer != null) { boolean madeSecurityManager = makeSecurityManager(documentTrailer); if (madeSecurityManager) attemptAuthorizeSecurityManager(); } // setup a signature handler configurePermissions(); } /** * Typically, if we're doing a linear traversal, it's because the PDF file * is corrupted, usually by junk being appended to it, or the ending * being truncated, or, in this case, from junk being inserted into the * beginning of the file, skewing all the xref object offsets. * <br> * We're going to look for the "%PDF-1." string that most PDF files start * with. If we do find it, then leave the InputStream after the next * whitespace, else rewind back to the beginning, in case the file was * never encoded with the PDF version comment. * * @param in InputStream derived from SeekableInput.getInputStream() */ private int skipPastAnyPrefixJunk(InputStream in) { if (!in.markSupported()) return 0; try { final int scanLength = 2048; final String scanFor = "%PDF-"; final int scanForLength = scanFor.length(); int scanForIndex = 0; boolean scanForWhiteSpace = false; in.mark(scanLength); for (int i = 0; i < scanLength; i++) { int data = in.read(); if (data < 0) { in.reset(); return 0; } // scan to the end of the comment line and return the offset if (scanForWhiteSpace) { scanForIndex++; if (Parser.isWhitespace((char) data)) { return scanForIndex; } } else { if (data == scanFor.charAt(scanForIndex)) { scanForIndex++; if (scanForIndex == scanForLength) { // Now read until we find white space scanForWhiteSpace = true; } } else scanForIndex = 0; } } // Searched through scanLength number of bytes and didn't find it, // so reset, in case it was never there to find in.reset(); } catch (IOException e) { try { in.reset(); } catch (IOException e2) { // forget about it. } } return 0; } /** * Skips junk and keeps track of the offset so that later corrections can * be made for object seeks. * * @param in input stream to parse. * @return 0 if file header is well formed, otherwise the offset to where * the document header starts. */ private int skipPastAnyPrefixJunk(SeekableInput in) { if (!in.markSupported()) return 0; try { final int scanLength = 2048; final String scanFor = "%PDF-1."; int scanForIndex = 0; in.mark(scanLength); for (int i = 0; i < scanLength; i++) { int data = in.read(); if (data < 0) { in.reset(); return 0; } if (data == scanFor.charAt(scanForIndex)) { return i; } else { scanForIndex = 0; } } // Searched through scanLength number of bytes and didn't find it, // so reset, in case it was never there to find in.reset(); } catch (IOException e) { try { in.reset(); } catch (IOException e2) { // forget about it. } } return 0; } /** * Utility method for building the SecurityManager if the document * contains a crypt entry in the PTrailer. * * @param documentTrailer document trailer * @return Whether or not a SecurityManager was made, and set in the Library * @throws PDFSecurityException if there is an issue finding encryption libraries. */ private boolean makeSecurityManager(PTrailer documentTrailer) throws PDFSecurityException { /** * Before a security manager can be created or needs to be created * we need the following * 1. The trailer object must have an encrypt entry * 2. The trailer object must have an ID entry */ boolean madeSecurityManager = false; HashMap<Object, Object> encryptDictionary = documentTrailer.getEncrypt(); List fileID = documentTrailer.getID(); // check for a missing file ID. if (fileID == null) { // we have a couple malformed documents that don't specify a FILE ID. // but proving two empty string allows the document to be decrypted. fileID = new ArrayList(2); fileID.add(new LiteralStringObject("")); fileID.add(new LiteralStringObject("")); } if (encryptDictionary != null && fileID != null) { // create new security manager library.setSecurityManager(new SecurityManager( library, encryptDictionary, fileID)); madeSecurityManager = true; } return madeSecurityManager; } /** * Initializes permission object as it is uses with encrypt permission to define * document characteristics at load time. * * @return true if permissions where found, false otherwise. */ private boolean configurePermissions() { if (catalog != null) { Permissions permissions = catalog.getPermissions(); if (permissions != null) { library.setPermissions(permissions); if (logger.isLoggable(Level.FINER)) { logger.finer("Document perms dictionary found and configured. "); } return true; } } return false; } /** * If the document has a SecurityManager it is encrypted and as a result the * following method is used with the SecurityCallback to prompt a user for * a password if needed. * * @throws PDFSecurityException error during authorization manager setup */ private void attemptAuthorizeSecurityManager() throws PDFSecurityException { // check if pdf is password protected, by passing in black // password if (!library.getSecurityManager().isAuthorized("")) { // count password tries int count = 1; // store temporary password String password; // Give user 3 chances to type the correct password // before throwing security exceptions while (true) { // Display password dialog // make sure a callback has been set. if (securityCallback != null) { password = securityCallback.requestPassword(this); if (password == null) { throw new PDFSecurityException("Encryption error"); } } else { throw new PDFSecurityException("Encryption error"); } // Verify new password, proceed if authorized, // fatal exception otherwise. if (library.getSecurityManager().isAuthorized(password)) { break; } count++; // after 3 tries throw the the error. if (count > 3) { throw new PDFSecurityException("Encryption error"); } } } // set the encryption flag on catalog library.setEncrypted(true); } /** * Gets the page dimension of the indicated page number using the specified * rotation factor. * * @param pageNumber Page number for the given dimension. The page * number is zero-based. * @param userRotation Rotation, in degrees, that has been applied to page * when calculating the dimension. * @return page dimension for the specified page number * {@link #getPageDimension(int, float, float)} */ public PDimension getPageDimension(int pageNumber, float userRotation) { Page page = catalog.getPageTree().getPage(pageNumber); return page.getSize(userRotation); } /** * Gets the page dimension of the indicated page number using the specified * rotation and zoom settings. If the page does not exist then a zero * dimension is returned. * * @param pageNumber Page number for the given dimension. The page * number is zero-based. * @param userRotation Rotation, in degrees, that has been applied to page * when calculating the dimension. * @param userZoom Any deviation from the page's actual size, by zooming in or out. * @return page dimension for the specified page number. * {@link #getPageDimension(int, float)} */ public PDimension getPageDimension(int pageNumber, float userRotation, float userZoom){ Page page = catalog.getPageTree().getPage(pageNumber); if (page != null) { return page.getSize(userRotation, userZoom); } else { return new PDimension(0, 0); } } /** * Returns the origin (filepath or URL) of this Document. This is the original * location of the file where the method getDocumentLocation returns the actual * location of the file. The origin and location of the document will only * be different if it was loaded from a URL or an input stream. * * @return file path or URL * {@link #getDocumentLocation} */ public String getDocumentOrigin() { return origin; } /** * Returns the file location or URL of this Document. This location may be different * from the file origin if the document was loaded from a URL or input stream. * If the file was loaded from a URL or input stream the file location is * the path to where the document content is cached. * * @return file path * {@link #getDocumentOrigin()} */ public String getDocumentLocation() { if (cachedFilePath != null) return cachedFilePath; return origin; } /** * Gets an instance of the the document state manager which stores references * of object that need to be written to file. * * @return stateManager instance for this document. */ public StateManager getStateManager() { return stateManager; } /** * Returns the total number of pages in this document. * * @return number of pages in the document */ public int getNumberOfPages() { try { return catalog.getPageTree().getNumberOfPages(); } catch (Exception e) { logger.log(Level.FINE, "Error getting number of pages.", e); } return 0; } /** * Paints the contents of the given page number to the graphics context using * the specified rotation, zoom, rendering hints and page boundary. * * @param pageNumber Page number to paint. The page number is zero-based. * @param g graphics context to which the page content will be painted. * @param renderHintType Constant specified by the GraphicsRenderingHints class. * There are two possible entries, SCREEN and PRINT, each with configurable * rendering hints settings. * @param pageBoundary Constant specifying the page boundary to use when * painting the page content. * @param userRotation Rotation factor, in degrees, to be applied to the rendered page. * @param userZoom Zoom factor to be applied to the rendered page. */ public void paintPage(int pageNumber, Graphics g, final int renderHintType, final int pageBoundary, float userRotation, float userZoom) throws InterruptedException { Page page = catalog.getPageTree().getPage(pageNumber); page.init(); PDimension sz = page.getSize(userRotation, userZoom); int pageWidth = (int) sz.getWidth(); int pageHeight = (int) sz.getHeight(); Graphics gg = g.create(0, 0, pageWidth, pageHeight); page.paint(gg, renderHintType, pageBoundary, userRotation, userZoom); gg.dispose(); } /** * Dispose of Document, freeing up all used resources. */ public void dispose() { if (documentSeekableInput != null) { try { documentSeekableInput.close(); } catch (IOException e) { logger.log(Level.FINE, "Error closing document input stream.", e); } documentSeekableInput = null; } String fileToDelete = getDocumentCachedFilePath(); if (fileToDelete != null) { File file = new File(fileToDelete); boolean success = file.delete(); if (!success && logger.isLoggable(Level.WARNING)) { logger.warning("Error deleting URL cached to file " + fileToDelete); } } } /** * Takes the internal PDF data, which may be in a file or in RAM, * and write it to the provided OutputStream. * The OutputStream is not flushed or closed, in case this method's * caller requires otherwise. * * @param out OutputStream to which the PDF file bytes are written. * @return The length of the PDF file copied * @throws IOException if there is some problem reading or writing the PDF data */ public long writeToOutputStream(OutputStream out) throws IOException { long documentLength = documentSeekableInput.getLength(); SeekableInputConstrainedWrapper wrapper = new SeekableInputConstrainedWrapper( documentSeekableInput, 0L, documentLength); try { byte[] buffer = new byte[4096]; int length; while ((length = wrapper.read(buffer, 0, buffer.length)) > 0) { out.write(buffer, 0, length); } } catch (Throwable e) { logger.log(Level.FINE, "Error writing PDF output stream.", e); throw new IOException(e.getMessage()); } finally { try { wrapper.close(); } catch (IOException e) { // forget about it. } } return documentLength; } /** * Copies the pre-existing PDF file, and appends an incremental update for * any edits, to the specified OutputStream. For the pre-existing PDF * content copying, writeToOutputStream(OutputStream out) is used. * * @param out OutputStream to which the PDF file bytes are written. * @return The length of the PDF file saved * @throws IOException if there is some problem reading or writing the PDF data */ public long saveToOutputStream(OutputStream out) throws IOException { long documentLength = writeToOutputStream(out); if (foundIncrementalUpdater) { try { Class<?> incrementalUpdaterClass = Class.forName(INCREMENTAL_UPDATER); Object[] argValues = {this, out, documentLength}; Method method = incrementalUpdaterClass.getDeclaredMethod( "appendIncrementalUpdate", new Class[]{Document.class, OutputStream.class, Long.TYPE}); long appendedLength = (Long) method.invoke(null, argValues); return documentLength + appendedLength; } catch (Throwable e) { logger.log(Level.FINE, "Could not call incremental updater.", e); } } return documentLength; } /** * Gets an Image of the specified page. The image size is automatically * calculated given the page boundary, user rotation and zoom. The rendering * quality is defined by GraphicsRenderingHints.SCREEN. * * @param pageNumber Page number of the page to capture the image rendering. * The page number is zero-based. * @param renderHintType Constant specified by the GraphicsRenderingHints class. * There are two possible entries, SCREEN and PRINT each with configurable * rendering hints settings. * @param pageBoundary Constant specifying the page boundary to use when * painting the page content. Typically use Page.BOUNDARY_CROPBOX. * @param userRotation Rotation factor, in degrees, to be applied to the rendered page. * Arbitrary rotations are not currently supported for this method, * so only the following values are valid: 0.0f, 90.0f, 180.0f, 270.0f. * @param userZoom Zoom factor to be applied to the rendered page. * @return an Image object of the current page. */ public Image getPageImage(int pageNumber, final int renderHintType, final int pageBoundary, float userRotation, float userZoom) throws InterruptedException { Page page = catalog.getPageTree().getPage(pageNumber); page.init(); PDimension sz = page.getSize(pageBoundary, userRotation, userZoom); int pageWidth = (int) sz.getWidth(); int pageHeight = (int) sz.getHeight(); BufferedImage image = ImageUtility.createCompatibleImage(pageWidth, pageHeight); Graphics g = image.createGraphics(); page.paint(g, renderHintType, pageBoundary, userRotation, userZoom); g.dispose(); return image; } /** * Exposes a page's PageText object which can be used to get text with * in the PDF document. The PageText.toString() is the simplest way to * get a pages text. This utility call does not parse the whole stream * and is best suited for text extraction functionality as it faster then * #getPageViewText(int). * * @param pageNumber Page number of page in which text extraction will act on. * The page number is zero-based. * @return page PageText data Structure. * {@link #getPageViewText(int)} */ public PageText getPageText(int pageNumber) throws InterruptedException { PageTree pageTree = catalog.getPageTree(); if (pageNumber >= 0 && pageNumber < pageTree.getNumberOfPages()) { Page pg = pageTree.getPage(pageNumber); return pg.getText(); } else { return null; } } /** * Exposes a page's PageText object which can be used to get text with * in the PDF document. The PageText.toString() is the simplest way to * get a pages text. The pageText hierarchy can be used to search for * selected text or used to set text as highlighted. * * @param pageNumber Page number of page in which text extraction will act on. * The page number is zero-based. * @return page PageText data Structure. */ public PageText getPageViewText(int pageNumber) throws InterruptedException{ PageTree pageTree = catalog.getPageTree(); if (pageNumber >= 0 && pageNumber < pageTree.getNumberOfPages()) { Page pg = pageTree.getPage(pageNumber); return pg.getViewText(); } else { return null; } } /** * Gets the security manager for this document. If the document has no * security manager null is returned. * * @return security manager for document if available. */ public SecurityManager getSecurityManager() { return library.getSecurityManager(); } /** * Sets the security callback to be used for this document. The security * callback allows a mechanism for prompting a user for a password if the * document is password protected. * * @param securityCallback a class which implements the SecurityCallback * interface. */ public void setSecurityCallback(SecurityCallback securityCallback) { this.securityCallback = securityCallback; } /** * Gets the document's information as specified in the PTrailer in the document * hierarchy. * * @return document information * {@link PInfo} */ public PInfo getInfo() { if (pTrailer == null) return null; return pTrailer.getInfo(); } /** * Enables or disables the form widget annotation highlighting. Generally not use for print but can be very * useful for highlight input fields in a Viewer application. * * @param highlight true to enable highlight mode, otherwise; false. */ public void setFormHighlight(boolean highlight) { // iterate over the document annotations and set the appropriate highlight value. if (catalog != null && catalog.getInteractiveForm() != null) { InteractiveForm interactiveForm = catalog.getInteractiveForm(); ArrayList<Object> widgets = interactiveForm.getFields(); if (widgets != null) { for (Object widget : widgets) { descendFormTree(widget, highlight); } } } } /** * Recursively set highlight on all the form fields. * * @param formNode root form node. */ private void descendFormTree(Object formNode, boolean highLight) { if (formNode instanceof AbstractWidgetAnnotation) { ((AbstractWidgetAnnotation) formNode).setEnableHighlightedWidget(highLight); } else if (formNode instanceof FieldDictionary) { // iterate over the kid's array. FieldDictionary child = (FieldDictionary) formNode; formNode = child.getKids(); if (formNode != null) { ArrayList kidsArray = (ArrayList) formNode; for (Object kid : kidsArray) { if (kid instanceof Reference) { kid = library.getObject((Reference) kid); } if (kid instanceof AbstractWidgetAnnotation) { ((AbstractWidgetAnnotation) kid).setEnableHighlightedWidget(highLight); } else if (kid instanceof FieldDictionary) { descendFormTree(kid, highLight); } } } } } /** * Gets a vector of Images where each index represents an image inside * the specified page. The images are returned in the size in which they * where embedded in the PDF document, which may be different than the * size displayed when the complete PDF page is rendered. * * @param pageNumber page number to act on. Zero-based page number. * @return vector of Images inside the current page */ public List<Image> getPageImages(int pageNumber) throws InterruptedException { Page pg = catalog.getPageTree().getPage(pageNumber); pg.init(); return pg.getImages(); } /** * Gets the Document Catalog's PageTree entry as specified by the Document * hierarchy. The PageTree can be used to obtain detailed information about * the Page object which makes up the document. * * @return PageTree specified by the document hierarchy. Null if the document * has not yet loaded or the catalog can not be found. */ public PageTree getPageTree() { if (catalog != null) { PageTree pageTree = catalog.getPageTree(); if (pageTree != null) { pageTree.setWatermarkCallback(watermarkCallback); } return pageTree; } else { return null; } } /** * Gets the Document's Catalog as specified by the Document hierarchy. The * Catalog can be used to traverse the Document's hierarchy. * * @return document's Catalog object; null, if one does not exist. */ public Catalog getCatalog() { return catalog; } /** * Sets the caching mode when handling file loaded by an URI. If enabled * URI streams will be cached to disk, otherwise they will be stored in * memory. This method must be set before a call to setByteArray() or * setInputStream() is called. * * @param cachingEnabled true to enable, otherwise false. */ public static void setCachingEnabled(boolean cachingEnabled) { isCachingEnabled = cachingEnabled; } }