package pdfainspector; import java.io.IOException; import java.util.ArrayList; import java.util.List; import nu.xom.Attribute; import nu.xom.Element; import com.itextpdf.text.pdf.PdfName; import com.itextpdf.text.pdf.PdfNumber; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.ImageRenderInfo; import com.itextpdf.text.pdf.parser.PdfReaderContentParser; import com.itextpdf.text.pdf.parser.RenderListener; import com.itextpdf.text.pdf.parser.TextRenderInfo; /** * Convert image data in a PDF to a XOM XML element. * @author schiele1 */ public class ImageExtractor { /** * Given an iText PDF Reader, extract image data from the PDF and store it * in a XOM XML element. * @param reader A reader for the given PDF. * @return A XOM element containing image data. */ public static Element extractToXML(PdfReader reader){ Element root = new Element("Images"); PdfReaderContentParser parser = new PdfReaderContentParser(reader); // Go through the PDF one page at a time, pulling images from each page. ImageRenderListener listener = new ImageRenderListener(); for (int i = 1; i <= reader.getNumberOfPages(); i++) { try{ listener = parser.processContent(i, new ImageRenderListener()); }catch(IOException e){} List<Element> images = listener.getImageData(); // Add the current page number to each image, add it to the root. if(images != null){ for(Element image : images){ image.addAttribute(new Attribute("Page", Integer.toString(i))); root.appendChild(image); } } } return root; } /** * This class scans the page for images and returns a list of elements, * one representing each image it finds. */ private static class ImageRenderListener implements RenderListener{ private List<Element> elements = new ArrayList<Element>(); @Override public void beginTextBlock() { } @Override public void endTextBlock() { } @Override public void renderImage(ImageRenderInfo renderInfo) { try{ PdfNumber width = (PdfNumber)renderInfo.getImage().get(PdfName.WIDTH); PdfNumber height = (PdfNumber)renderInfo.getImage().get(PdfName.HEIGHT); //data = data + "<image width=\"" + width + "\" height=\"" + height + "\">\n</image>\n"; Element element = new Element("Image"); element.addAttribute(new Attribute("Width", width.toString())); element.addAttribute(new Attribute("Height", height.toString())); elements.add(element); } catch(Exception e){ } } public List<Element> getImageData(){ return elements; } @Override public void renderText(TextRenderInfo renderInfo) { } } }