/* * Copyright 2010 adam. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * under the License. */ package org.apache.pdfbox; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.persistence.util.COSObjectKey; /** * This program will just take all of the stream objects in a PDF and dereference * them. The streams will be gone in the resulting file and the objects will be * present. This is very helpful when trying to debug problems as it'll make * it possible to easily look through a PDF using a text editor. It also exposes * problems which stem from objects inside object streams overwriting other * objects. * @author <a href="adam@apache.org">Adam Nichols</a> */ public class PdfDecompressor { /** * This is a very simple program, so everything is in the main method. * @param args arguments to the program */ public static void main(String[] args) { if(args.length < 1) usage(); String inputFilename = args[0]; String outputFilename; if(args.length > 1) { outputFilename = args[1]; } else { if(inputFilename.matches(".*\\.[pP][dD][fF]$")) outputFilename = inputFilename.replaceAll("\\.[pP][dD][fF]$", ".unc.pdf"); else outputFilename = inputFilename + ".unc.pdf"; } PDDocument doc = null; try { doc = PDDocument.load(inputFilename); for(COSObject objStream : doc.getDocument().getObjectsByType("ObjStm")) { COSStream stream = (COSStream)objStream.getObject(); PDFObjectStreamParser sp = new PDFObjectStreamParser(stream, doc.getDocument()); sp.parse(); for(COSObject next : sp.getObjects()) { COSObjectKey key = new COSObjectKey(next); COSObject obj = doc.getDocument().getObjectFromPool(key); obj.setObject(next.getObject()); } doc.getDocument().removeObject(new COSObjectKey(objStream)); } doc.save(outputFilename); } catch(Exception e) { System.out.println("Error processing file: " + e.getMessage()); } finally { if(doc != null) try { doc.close(); } catch(Exception e) { } } } /** * Explains how to use the program. */ private static void usage() { System.err.println( "Usage: java -cp /path/to/pdfbox.jar;/path/to/commons-logging-api.jar " + "org.apache.pdfbox.PdfDecompressor <input PDF File> [<Output PDF File>]\n" + " <input PDF File> The PDF document to decompress\n" + " <output PDF File> The output filename (default is to replace .pdf with .unc.pdf)"); System.exit(1); } }