package edu.isi.bmkeg.lapdf.extraction; import java.awt.image.BufferedImage; import java.util.Iterator; import java.util.List; import org.jpedal.PdfDecoder; import org.jpedal.exception.PdfException; import org.jpedal.fonts.PdfFont; import org.jpedal.grouping.PdfGroupingAlgorithms; import edu.isi.bmkeg.lapdf.extraction.exceptions.AccessException; import edu.isi.bmkeg.lapdf.extraction.exceptions.EncryptionException; public class JPedalPageImageExtractor implements Iterator<BufferedImage> { private PdfDecoder decoder = null; int currentPage = 1; int pageCount; private List<BufferedImage> pageImages; public JPedalPageImageExtractor() { this.decoder = new PdfDecoder(true); //PdfDecoder.s //PdfDecoder.setTextPrint(PdfDecoder.); //decoder.setExtractionMode(PdfDecoder.TEXT); // extract just text decoder.init(true); PdfGroupingAlgorithms.useUnrotatedCoords = true; // if you do not require XML content, pure text extraction is much // faster. decoder.useXMLExtraction(); } public void init(String fileName) throws PdfException, AccessException, EncryptionException { if (decoder.isOpen()) { decoder.flushObjectValues(true); decoder.closePdfFile(); } decoder.openPdfFile(fileName); currentPage = 1; pageCount = decoder.getPageCount(); if (!decoder.isExtractionAllowed()) { throw new AccessException(fileName); } else if (decoder.isEncrypted()) { throw new EncryptionException(fileName); } } public void close(){ if (decoder.isOpen()) { decoder.flushObjectValues(true); decoder.closePdfFile(); } } @Override public boolean hasNext() { if(currentPage<=pageCount) return true; return false; } @Override public BufferedImage next() { try { BufferedImage img = decoder.getPageAsImage(currentPage); currentPage++; return img; } catch (PdfException e) { e.printStackTrace(); } return null; } @Override public void remove() { } }