/* * Copyright 2006-2012 ICEsoft Technologies Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.icepdf.core.pobjects; import org.icepdf.core.SecurityCallback; import org.icepdf.core.application.ProductInfo; import org.icepdf.core.exceptions.PDFException; import org.icepdf.core.exceptions.PDFSecurityException; import org.icepdf.core.io.*; import org.icepdf.core.pobjects.graphics.text.PageText; import org.icepdf.core.pobjects.security.SecurityManager; import org.icepdf.core.util.Defs; import org.icepdf.core.util.LazyObjectLoader; import org.icepdf.core.util.Library; import org.icepdf.core.util.Parser; import org.icepdf.core.util.IncrementalUpdater; import java.awt.*; import java.awt.image.BufferedImage; import java.io.*; import java.net.URL; import java.net.URLConnection; import java.util.Hashtable; import java.util.Vector; import java.util.logging.Level; import java.util.logging.Logger; /** * <p>The <code>Document</code> class represents a PDF document and provides * access to the hierarchy of objects contained in the body section of the * PDF document. Most of the objects in the hierarchy are dictionaries which * contain references to page content and other objects such such as annotations. * For more information on the document object hierarchy, see the <i>ICEpdf * Developer's Guide</i>.</p> * <p/> * <p>The <code>Document</code> class also provides access to methods responsible * for rendering PDF document content. Methods are available to capture page * content to a graphics context or extract image and text data on a page-by-page * basis.</p> * <p/> * <p>If your PDF rendering application will be accessing encrypted documents, * it is important to implement the SecurityCallback. This interface provides * methods for getting password data from a user if needed.<p> * * @since 1.0 */ public class Document { private static final Logger logger = Logger.getLogger(Document.class.toString()); /** * Gets the version number of ICEpdf rendering core. This is not the version * number of the PDF format used to encode this document. * * @return version number of ICEpdf's rendering core. */ public static String getLibraryVersion() { return new StringBuilder().append(ProductInfo.PRIMARY).append("."). append(ProductInfo.SECONDARY).append("."). append(ProductInfo.TERTIARY).append(" "). append(ProductInfo.RELEASE_TYPE).toString(); } // core catalog, root of the document hierarchy. private Catalog catalog; // We used to keep the document main PTrailer's PInfo, // but now that's lazily loaded, so instead we keep the // PTrailer itself, which can get us the PInfo whenever private PTrailer pTrailer; // state manager for tracking object that have been touched in some way // for editing purposes, private StateManager stateManager; // This is the original file or url path of where the PDF document was load // from private String origin; // This is the location of the file when it is saved to the hard drive. This // is usually only different from the origin if the the PDF document // was loaded from a URL private String cachedFilePath; // callback for password dialogs, or command line access. private SecurityCallback securityCallback; // disable/enable file caching, overrides fileCachingSize. private static boolean isCachingEnabled; // repository of all PDF object associated with this document. private Library library = null; private SeekableInput documentSeekableInput; static { // sets if file caching is enabled or disabled. isCachingEnabled = Defs.sysPropertyBoolean("org.icepdf.core.streamcache.enabled", true); } /** * Creates a new instance of a Document. A Document class represents * one PDF document. */ public Document() { } /** * Utility method for setting the origin (filepath or URL) of this Document * * @param o new origin value * @see #getDocumentOrigin() */ private void setDocumentOrigin(String o) { origin = o; if (logger.isLoggable(Level.CONFIG)) { logger.config( "MEMFREE: " + Runtime.getRuntime().freeMemory() + " of " + Runtime.getRuntime().totalMemory()); logger.config("LOADING: " + o); } } /** * Sets the cached file path in the case of opening a file from a URL. * * @param o new cached file path value * @see #getDocumentCachedFilePath */ private void setDocumentCachedFilePath(String o) { cachedFilePath = o; } /** * Returns the cached file path in the case of opening a file from a URL. * * @return file path */ private String getDocumentCachedFilePath() { return cachedFilePath; } /** * Load a PDF file from the given path and initiates the document's Catalog. * * @param filepath path of PDF document. * @throws PDFException if an invalid file encoding. * @throws PDFSecurityException if a security provider cannot be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the file. */ public void setFile(String filepath) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(filepath); RandomAccessFileInputStream rafis = RandomAccessFileInputStream.build(new File(filepath)); /* // Test code for setByteArray(-) if( true ) { byte[] buffer = new byte[4096]; int read = buffer.length; ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream( 40960 ); while ((read = rafis.read(buffer, 0, buffer.length)) > 0){ byteArrayOutputStream.write(buffer, 0, read); } byteArrayOutputStream.flush(); byteArrayOutputStream.close(); rafis.close(); int length = byteArrayOutputStream.size(); byte[] data = byteArrayOutputStream.toByteArray(); setByteArray( data, 0, length, null ); return; } */ setInputStream(rafis); } /** * Load a PDF file from the given URL and initiates the document's Catalog. * If the system property org.icepdf.core.streamcache.enabled=true, the file * will be cached to a temp file; otherwise, the complete document stream will * be stored in memory. * * @param url location of file. * @throws PDFException an invalid file encoding. * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem downloading, setting up, or parsing the file. */ public void setUrl(URL url) throws PDFException, PDFSecurityException, IOException { InputStream in = null; try { // make a connection URLConnection urlConnection = url.openConnection(); // Create a stream on the URL connection in = urlConnection.getInputStream(); String pathOrURL = url.toString(); setInputStream(in, pathOrURL); } finally { if (in != null) { in.close(); } } } /** * Load a PDF file from the given input stream and initiates the document's Catalog. * If the system property org.icepdf.core.streamcache.enabled=true, the file * will be cached to a temp file; otherwise, the complete document stream will * be stored in memory. * * @param in input stream containing PDF data * @param pathOrURL value assigned to document origin * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the SeekableInput. */ public void setInputStream(InputStream in, String pathOrURL) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(pathOrURL); if (!isCachingEnabled) { //System.out.println("Started downloading PDF to memory : " + (new java.util.Date())); // read into memory first ConservativeSizingByteArrayOutputStream byteArrayOutputStream = new ConservativeSizingByteArrayOutputStream(100 * 1024, null); // write the bytes. byte[] buffer = new byte[4096]; int length; // int pdfFileSize = 0; // in.read will block until the end of the file is read. while ((length = in.read(buffer, 0, buffer.length)) > 0) { byteArrayOutputStream.write(buffer, 0, length); // pdfFileSize += length; } byteArrayOutputStream.flush(); byteArrayOutputStream.close(); int size = byteArrayOutputStream.size(); byteArrayOutputStream.trim(); byte[] data = byteArrayOutputStream.relinquishByteArray(); //System.out.println("Finished downloading PDF to memory : " + (new java.util.Date()) + " pdfFileSize: " + pdfFileSize); // finally read the cached file SeekableByteArrayInputStream byteArrayInputStream = new SeekableByteArrayInputStream(data, 0, size); setInputStream(byteArrayInputStream); } // if caching is allowed cache the url to file else { //System.out.println("Started downloading PDF to disk : " + (new java.util.Date())); // create tmp file and write bytes to it. File tempFile = File.createTempFile( "ICEpdfTempFile" + getClass().hashCode(), ".tmp"); // Delete temp file on exit tempFile.deleteOnExit(); // Write the data to the temp file. FileOutputStream fileOutputStream = new FileOutputStream(tempFile.getAbsolutePath(), true); // write the bytes. byte[] buffer = new byte[4096]; int length; // int pdfFileSize = 0; while ((length = in.read(buffer, 0, buffer.length)) > 0) { fileOutputStream.write(buffer, 0, length); // pdfFileSize += length; } fileOutputStream.flush(); fileOutputStream.close(); //System.out.println("Finished downloading PDF to disk : " + (new java.util.Date()) + " pdfFileSize: " + pdfFileSize); setDocumentCachedFilePath(tempFile.getAbsolutePath()); // finally read the cached file RandomAccessFileInputStream rafis = RandomAccessFileInputStream.build(tempFile); setInputStream(rafis); } } /** * Load a PDF file from the given byte array and initiates the document's Catalog. * If the system propertyorg.icepdf.core.streamcache.enabled=true, the file * will be cached to a temp file; otherwise, the complete document stream will * be stored in memory. * The given byte array is not necessarily copied, and will try to be directly * used, so do not modify it after passing it to this method. * * @param data byte array containing PDF data * @param offset the index into the byte array where the PDF data begins * @param length the number of bytes in the byte array belonging to the PDF data * @param pathOrURL value assigned to document origin * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the SeekableInput. */ public void setByteArray(byte[] data, int offset, int length, String pathOrURL) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(pathOrURL); if (!isCachingEnabled) { // finally read the cached file SeekableByteArrayInputStream byteArrayInputStream = new SeekableByteArrayInputStream(data, offset, length); setInputStream(byteArrayInputStream); } // if caching is allowed cache the url to file else { //System.out.println("Started downloading PDF to disk : " + (new java.util.Date())); // create tmp file and write bytes to it. File tempFile = File.createTempFile( "ICEpdfTempFile" + getClass().hashCode(), ".tmp"); // Delete temp file on exit tempFile.deleteOnExit(); // Write the data to the temp file. FileOutputStream fileOutputStream = new FileOutputStream(tempFile.getAbsolutePath(), true); // write the bytes. // int pdfFileSize = 0; fileOutputStream.write(data, offset, length); // pdfFileSize += length; fileOutputStream.flush(); fileOutputStream.close(); //System.out.println("Finished downloading PDF to disk : " + (new java.util.Date()) + " pdfFileSize: " + pdfFileSize); setDocumentCachedFilePath(tempFile.getAbsolutePath()); // finally read the cached file RandomAccessFileInputStream rafis = RandomAccessFileInputStream.build(tempFile); setInputStream(rafis); } } /** * Load a PDF file from the given SeekableInput stream and initiates the * document's Catalog. * * @param in input stream containing PDF data * @param pathOrURL value assigned to document origin * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. * @throws IOException if a problem setting up, or parsing the SeekableInput. */ public void setInputStream(SeekableInput in, String pathOrURL) throws PDFException, PDFSecurityException, IOException { setDocumentOrigin(pathOrURL); setInputStream(in); } /** * Sets the input stream of the PDF file to be rendered. * * @param in inputstream containing PDF data stream * @throws PDFException if error occurs * @throws PDFSecurityException security error * @throws IOException io error during stream handling */ private void setInputStream(final SeekableInput in) throws PDFException, PDFSecurityException, IOException { try { documentSeekableInput = in; // create library to hold all document objects library = new Library(); // if interactive show visual progress bar //ProgressMonitorInputStream monitor = null; boolean loaded = false; try { loadDocumentViaXRefs(in); loaded = true; } catch (PDFException e) { throw e; } catch (PDFSecurityException e) { throw e; } catch (Exception e) { if (logger.isLoggable(Level.WARNING)) { logger.warning("Cross reference deferred loading failed, will fall back to linear reading."); } } if (!loaded) { // Cleanup any bits left behind by the failed xref loading if (catalog != null) { catalog.dispose(false); catalog = null; } if (library != null) { library.dispose(); library = null; } library = new Library(); pTrailer = null; in.seekAbsolute(0L); loadDocumentViaLinearTraversal(in.getInputStream()); } // initiate the catalog, build the outline for the document catalog.init(); // create new instance of state manager and add it to the library stateManager = new StateManager(pTrailer); library.setStateManager(stateManager); } catch (PDFException e) { logger.log(Level.FINE, "Error loading PDF file during linear parse.", e); dispose(); throw e; } catch (PDFSecurityException e) { dispose(); throw e; } catch (IOException e) { dispose(); throw e; } catch (Exception e) { dispose(); logger.log(Level.SEVERE, "Error loading PDF Document.", e); throw new IOException(e.getMessage()); } } /** * Uitility method for loading the documents objects from the Xref table. * * @param in input stream to parse * @throws IOException an i/o problem * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. */ private void loadDocumentViaXRefs(SeekableInput in) throws PDFException, PDFSecurityException, IOException { //if( true ) throw new RuntimeException("Fallback to linear traversal"); long xrefPosition = getInitialCrossReferencePosition(in); PTrailer documentTrailer = null; while (xrefPosition > 0L) { in.seekAbsolute(xrefPosition); Parser parser = new Parser(in); Object obj = parser.getObject(library); if (obj instanceof PObject) obj = ((PObject) obj).getObject(); PTrailer trailer = (PTrailer) obj; //PTrailer trailer = (PTrailer) parser.getObject( library ); if (trailer == null) throw new RuntimeException("Could not find trailer"); if (trailer.getPrimaryCrossReference() == null) throw new RuntimeException("Could not find cross reference"); trailer.setPosition(xrefPosition); if (documentTrailer == null) documentTrailer = trailer; else documentTrailer.addPreviousTrailer(trailer); // If this trailer has everything we need to get started, // then we can lazily load other trailers later if (true) //TODO So should we make this not be a loop then? break; xrefPosition = trailer.getPrev(); } if (documentTrailer == null) throw new RuntimeException("Could not find document trailer"); LazyObjectLoader lol = new LazyObjectLoader( library, in, documentTrailer.getPrimaryCrossReference()); library.setLazyObjectLoader(lol); pTrailer = documentTrailer; catalog = documentTrailer.getRootCatalog(); library.setCatalog(catalog); if (catalog == null) throw new NullPointerException("Loading via xref failed to find catalog"); boolean madeSecurityManager = makeSecurityManager(documentTrailer); if (madeSecurityManager) attemptAuthorizeSecurityManager(); } private long getInitialCrossReferencePosition(SeekableInput in) throws IOException { in.seekEnd(); long endOfFile = in.getAbsolutePosition(); long currentPosition = endOfFile - 1; long afterStartxref = -1; String startxref = "startxref"; int startxrefIndexToMatch = startxref.length() - 1; while (currentPosition >= 0 && (endOfFile - currentPosition) < 2048) { in.seekAbsolute(currentPosition); int curr = in.read(); if (curr < 0) throw new EOFException("Could not find startxref at end of file"); if (curr == startxref.charAt(startxrefIndexToMatch)) { // If we've matched the whole string if (startxrefIndexToMatch == 0) { afterStartxref = currentPosition + startxref.length(); break; } startxrefIndexToMatch--; } else startxrefIndexToMatch = startxref.length() - 1; currentPosition--; } if (afterStartxref < 0) throw new EOFException("Could not find startxref near end of file"); in.seekAbsolute(afterStartxref); Parser parser = new Parser(in); Number xrefPositionObj = (Number) parser.getToken(); if (xrefPositionObj == null) throw new RuntimeException("Could not find ending cross reference position"); return xrefPositionObj.longValue(); } /** * Uitily method for parsing a PDF documents object. This should only be * called when the xref lookup fails. * * @param in stream representing whole pdf document * @throws PDFException an invalid stream or file encoding * @throws PDFSecurityException if a security provider can not be found * or there is an error decrypting the file. */ private void loadDocumentViaLinearTraversal(InputStream in) throws PDFException, PDFSecurityException { skipPastAnyPrefixJunk(in); library.setLinearTraversal(); Parser parser = new Parser(in); // document Trailer, holds encryption info PTrailer documentTrailer = null; // Loop through all objects that where parsed from the data stream Object pdfObject; while (true) { // parse all of the objects in the stream, objects are added // to the library object. pdfObject = parser.getObject(library); // eof or io error result in break if (pdfObject == null) { break; } // display object information in debug mode if (logger.isLoggable(Level.FINER)) { logger.finer(pdfObject.getClass().getName() + " " + pdfObject); } // Add any PObjects to the vector, so they can be decrypted if (pdfObject instanceof PObject) { PObject tmp = (PObject) pdfObject; Object obj = tmp.getObject(); if (obj != null) pdfObject = obj; } // find the catalog which has information on outlines // which is need by the gui if (pdfObject instanceof Catalog) { catalog = (Catalog) pdfObject; } // Find the trailer object so that we can get the encryption information // trailer information is not a PObject and thus there should if (pdfObject instanceof PTrailer) { if (documentTrailer == null) { documentTrailer = (PTrailer) pdfObject; } else { // add more trailer data to the original PTrailer nextTrailer = (PTrailer) pdfObject; documentTrailer.addNextTrailer(nextTrailer); documentTrailer = nextTrailer; } } } // The LazyObjectLoader is used for both reading from a SeekableInput, // and also accessing ObjectStreams. // So, even with linear traversal, we still need it for PDF 1.5 documents if (documentTrailer != null) { LazyObjectLoader lol = new LazyObjectLoader( library, null, documentTrailer.getPrimaryCrossReference()); library.setLazyObjectLoader(lol); } pTrailer = documentTrailer; library.setCatalog(catalog); // Add Document information object to catalog if (documentTrailer != null) { boolean madeSecurityManager = makeSecurityManager(documentTrailer); if (madeSecurityManager) attemptAuthorizeSecurityManager(); } } /** * Typically, if we're doing a linear traversal, it's because the PDF file * is corrupted, usually by junk being appended to it, or the ending * being truncated, or, in this case, from junk being inserted into the * beginning of the file, skewing all the xref object offsets. * <p/> * We're going to look for the "%PDF-1." string that most PDF files start * with. If we do find it, then leave the InputStream after the next * whitespace, else rewind back to the beginning, in case the file was * never encoded with the PDF version comment. * * @param in InputStream derived from SeekableInput.getInputStream() */ private void skipPastAnyPrefixJunk(InputStream in) { if (!in.markSupported()) return; try { final int scanLength = 2048; final String scanFor = "%PDF-1."; int scanForIndex = 0; boolean scanForWhiteSpace = false; in.mark(scanLength); for (int i = 0; i < scanLength; i++) { int data = in.read(); if (data < 0) { in.reset(); return; } if (scanForWhiteSpace) { if (Parser.isWhitespace((char) data)) { return; } } else { if (data == scanFor.charAt(scanForIndex)) { scanForIndex++; if (scanForIndex == scanFor.length()) { // Now read until we find white space scanForWhiteSpace = true; } } else scanForIndex = 0; } } // Searched through scanLength number of bytes and didn't find it, // so reset, in case it was never there to find in.reset(); } catch (IOException e) { try { in.reset(); } catch (IOException e2) { } } } /** * Utility method for building the SecurityManager if the document * contains a crypt entry in the PTrailer. * * @param documentTrailer document trailer * @return Whether or not a SecurityManager was made, and set in the Library * @throws PDFSecurityException if there is an issue finding encryption libraries. */ private boolean makeSecurityManager(PTrailer documentTrailer) throws PDFSecurityException { /** * Before a securtiy manager can be created or needs to be created * we need the following * 1. The trailer object must have an encrypt entry * 2. The trailer object must have an ID entry */ boolean madeSecurityManager = false; Hashtable encryptDictionary = documentTrailer.getEncrypt(); Vector fileID = documentTrailer.getID(); if (encryptDictionary != null && fileID != null) { // create new security manager library.securityManager = new SecurityManager( library, encryptDictionary, fileID); madeSecurityManager = true; } return madeSecurityManager; } /** * If the document has a SecurityManager it is encrypted and as a result the * following method is used with the SecurityCallback to prompt a user for * a password if needed. * * @throws PDFSecurityException error during authorization manager setup */ private void attemptAuthorizeSecurityManager() throws PDFSecurityException { // check if pdf is password protected, by passing in black // password if (!library.securityManager.isAuthorized("")) { // count password tries int count = 1; // store temporary password String password; // Give user 3 chances to type the correct password // before throwing security exceptions while (true) { // Display password dialog // make sure a callback has been set. if (securityCallback != null) { password = securityCallback.requestPassword(this); if (password == null) { throw new PDFSecurityException("Encryption error"); } } else { throw new PDFSecurityException("Encryption error"); } // Verify new password, proceed if authorized, // fatal exception otherwise. if (library.securityManager.isAuthorized(password)) { break; } count++; // after 3 tries throw the the error. if (count > 3) { throw new PDFSecurityException("Encryption error"); } } } // set the encryption flag on catalog library.setEncrypted(true); } /** * Gets the page dimension of the indicated page number using the specified * rotation factor. * * @param pageNumber Page number for the given dimension. The page * number is zero-based. * @param userRotation Rotation, in degrees, that has been applied to page * when calculating the dimension. * @return page dimension for the specified page number * @see #getPageDimension(int, float, float) */ public PDimension getPageDimension(int pageNumber, float userRotation) { Page page = catalog.getPageTree().getPage(pageNumber, this); PDimension pd = page.getSize(userRotation); catalog.getPageTree().releasePage(page, this); return pd; } /** * Gets the page dimension of the indicated page number using the specified * rotation and zoom settings. If the page does not exist then a zero * dimension is returned. * * @param pageNumber Page number for the given dimension. The page * number is zero-based. * @param userRotation Rotation, in degrees, that has been applied to page * when calculating the dimension. * @param userZoom Any deviation from the page's actual size, by zooming in or out. * @return page dimension for the specified page number. * @see #getPageDimension(int, float) */ public PDimension getPageDimension(int pageNumber, float userRotation, float userZoom) { Page page = catalog.getPageTree().getPage(pageNumber, this); if (page != null) { PDimension pd = page.getSize(userRotation, userZoom); catalog.getPageTree().releasePage(page, this); return pd; } else { return new PDimension(0, 0); } } /** * Returns the origin (filepath or URL) of this Document. This is the original * location of the file where the method getDocumentLocation returns the actual * location of the file. The origin and location of the document will only * be different if it was loaded from a URL or an input stream. * * @return file path or URL * @see #getDocumentLocation */ public String getDocumentOrigin() { return origin; } /** * Returns the file location or URL of this Document. This location may be different * from the file origin if the document was loaded from a URL or input stream. * If the file was loaded from a URL or input stream the file location is * the path to where the document content is cached. * * @return file path * @see #getDocumentOrigin() */ public String getDocumentLocation() { if (cachedFilePath != null) return cachedFilePath; return origin; } /** * Gets an instance of the the document state manager which stores references * of object that need to be written to file. * @return stateManager instance for this document. */ public StateManager getStateManager(){ return stateManager; } /** * Returns the total number of pages in this document. * * @return number of pages in the document */ public int getNumberOfPages() { try { return catalog.getPageTree().getNumberOfPages(); } catch (Exception e) { logger.log(Level.FINE, "Error getting number of pages.", e); } return 0; } /** * Paints the contents of the given page number to the graphics context using * the specified rotation, zoom, rendering hints and page boundary. * * @param pageNumber Page number to paint. The page number is zero-based. * @param g graphics context to which the page content will be painted. * @param renderHintType Constant specified by the GraphicsRenderingHints class. * There are two possible entries, SCREEN and PRINT, each with configurable * rendering hints settings. * @param pageBoundary Constant specifying the page boundary to use when * painting the page content. * @param userRotation Rotation factor, in degrees, to be applied to the rendered page. * @param userZoom Zoom factor to be applied to the rendered page. */ public void paintPage(int pageNumber, Graphics g, final int renderHintType, final int pageBoundary, float userRotation, float userZoom) { Page page = catalog.getPageTree().getPage(pageNumber, this); PDimension sz = page.getSize(userRotation, userZoom); int pageWidth = (int) sz.getWidth(); int pageHeight = (int) sz.getHeight(); Graphics gg = g.create(0, 0, pageWidth, pageHeight); page.paint(gg, renderHintType, pageBoundary, userRotation, userZoom); gg.dispose(); catalog.getPageTree().releasePage(page, this); } /** * Dispose of Document, freeing up all used resources. */ public void dispose() { if (catalog != null) { catalog.dispose(false); catalog = null; } if (library != null) { library.dispose(); library = null; } pTrailer = null; if (documentSeekableInput != null) { try { documentSeekableInput.close(); } catch (IOException e) { logger.log(Level.FINE, "Error closing document input stream.", e); } documentSeekableInput = null; } String fileToDelete = getDocumentCachedFilePath(); if (fileToDelete != null) { File file = new File(fileToDelete); boolean success = file.delete(); if (!success && logger.isLoggable(Level.WARNING)) { logger.warning("Error deleting URL cached to file " + fileToDelete); } } } /** * Takes the internal PDF data, which may be in a file or in RAM, * and write it to the provided OutputStream. * The OutputStream is not flushed or closed, in case this method's * caller requires otherwise. * * @param out OutputStream to which the PDF file bytes are written. * @throws IOException if there is some problem reading or writing the PDF data * @return The length of the PDF file copied */ public long writeToOutputStream(OutputStream out) throws IOException { long documentLength = documentSeekableInput.getLength(); SeekableInputConstrainedWrapper wrapper = new SeekableInputConstrainedWrapper( documentSeekableInput, 0L, documentLength, false); try { wrapper.prepareForCurrentUse(); byte[] buffer = new byte[4096]; int length; while ((length = wrapper.read(buffer, 0, buffer.length)) > 0) { out.write(buffer, 0, length); } } catch (Throwable e) { logger.log(Level.FINE, "Error writting PDF output stream.", e); throw new IOException(e.getMessage()); } finally { try { wrapper.close(); } catch (IOException e) { } } return documentLength; } /** * Copies the pre-existing PDF file, and appends an incremental update for * any edits, to the specified OutputStream. For the pre-existing PDF * content copying, writeToOutputStream(OutputStream out) is used. * * @param out OutputStream to which the PDF file bytes are written. * @throws IOException if there is some problem reading or writing the PDF data * @return The length of the PDF file saved */ public long saveToOutputStream(OutputStream out) throws IOException { long documentLength = writeToOutputStream(out); long appendedLength = appendIncrementalUpdate(out, documentLength); return documentLength + appendedLength; } /** * If ICEpdf Pro, then use append an incremental update of any edits. * * @param out OutputStream to which the incremental update bytes are written. * @param documentLength Length of the PDF file sp far, before the incremental update. * @return The number of bytes written for the incremental update. * @throws IOException */ protected long appendIncrementalUpdate(OutputStream out, long documentLength) throws IOException { return IncrementalUpdater.appendIncrementalUpdate(this, out, documentLength); } /** * Gets an Image of the specified page. The image size is automatically * calculated given the page boundary, user rotation and zoom. The rendering * quality is defined by GraphicsRenderingHints.SCREEN. * * @param pageNumber Page number of the page to capture the image rendering. * The page number is zero-based. * @param renderHintType Constant specified by the GraphicsRenderingHints class. * There are two possible entries, SCREEN and PRINT each with configurable * rendering hints settings. * @param pageBoundary Constant specifying the page boundary to use when * painting the page content. Typically use Page.BOUNDARY_CROPBOX. * @param userRotation Rotation factor, in degrees, to be applied to the rendered page. * Arbitrary rotations are not currently supported for this method, * so only the following values are valid: 0.0f, 90.0f, 180.0f, 270.0f. * @param userZoom Zoom factor to be applied to the rendered page. * @return an Image object of the current page. */ public Image getPageImage(int pageNumber, final int renderHintType, final int pageBoundary, float userRotation, float userZoom) { Page page = catalog.getPageTree().getPage(pageNumber, this); PDimension sz = page.getSize(pageBoundary, userRotation, userZoom); int pageWidth = (int) sz.getWidth(); int pageHeight = (int) sz.getHeight(); BufferedImage image = new BufferedImage(pageWidth, pageHeight, BufferedImage.TYPE_INT_RGB); Graphics g = image.createGraphics(); page.paint(g, renderHintType, pageBoundary, userRotation, userZoom); g.dispose(); catalog.getPageTree().releasePage(page, this); return image; } /** * Exposes a page's PageText object which can be used to get text with * in the PDF document. The PageText.toString() is the simplest way to * get a pages text. This utility call does not parse the whole stream * and is best suited for text extraction functionality as it faster then * #getPageViewText(int). * * @param pageNumber Page number of page in which text extraction will act on. * The page number is zero-based. * @return page PageText data Structure. * @see #getPageViewText(int). */ public PageText getPageText(int pageNumber) { PageTree pageTree = catalog.getPageTree(); if (pageNumber >= 0 && pageNumber < pageTree.getNumberOfPages()) { Page pg = pageTree.getPage(pageNumber, this); PageText text = pg.getText(); catalog.getPageTree().releasePage(pg, this); return text; } else { return null; } } /** * Exposes a page's PageText object which can be used to get text with * in the PDF document. The PageText.toString() is the simplest way to * get a pages text. The pageText hierarchy can be used to search for * selected text or used to set text as highlighted. * * @param pageNumber Page number of page in which text extraction will act on. * The page number is zero-based. * @return page PageText data Structure. */ public PageText getPageViewText(int pageNumber) { PageTree pageTree = catalog.getPageTree(); if (pageNumber >= 0 && pageNumber < pageTree.getNumberOfPages()) { Page pg = pageTree.getPage(pageNumber, this); PageText text = pg.getViewText(); catalog.getPageTree().releasePage(pg, this); return text; } else { return null; } } /** * Gets the security manager for this document. If the document has no * security manager null is returned. * * @return security manager for document if available. */ public SecurityManager getSecurityManager() { return library.securityManager; } /** * Sets the security callback to be used for this document. The security * callback allows a mechanism for prompting a user for a password if the * document is password protected. * * @param securityCallback a class which implements the SecurityCallback * interface. */ public void setSecurityCallback(SecurityCallback securityCallback) { this.securityCallback = securityCallback; } /** * Gets the document's information as specified in the PTrailer in the document * hierarchy. * * @return document information * @see org.icepdf.core.pobjects.PInfo for more information. */ public PInfo getInfo() { if (pTrailer == null) return null; return pTrailer.getInfo(); } /** * Gets a vector of Images where each index represents an image inside * the specified page. The images are returned in the size in which they * where embedded in the PDF document, which may be different than the * size displayed when the complete PDF page is rendered. * * @param pageNumber page number to act on. Zero-based page number. * @return vector of Images inside the current page */ public Vector getPageImages(int pageNumber) { Page pg = catalog.getPageTree().getPage(pageNumber, this); Vector images = pg.getImages(); catalog.getPageTree().releasePage(pg, this); return images; } /** * Gets the Document Catalog's PageTree entry as specified by the Document * hierarchy. The PageTree can be used to obtain detailed information about * the Page object which makes up the document. * * @return PageTree specified by the document hierarchy. */ public PageTree getPageTree() { return catalog.getPageTree(); } /** * Gets the Document's Catalog as specified by the Document hierarchy. The * Catalog can be used to traverse the Document's hierarchy. * * @return document's Catalog object; null, if one does not exist. */ public Catalog getCatalog() { return catalog; } }