package cc.abstra.trantor.pdfconverter;
/**
*
* @author nando
*/
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDPixelMap;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.pdfbox.util.PDFOperator;
import javax.activation.FileDataSource;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
public class PdfDoc {
public static final float PDFBOX_DEFAULT_USER_SPACE_UNIT_DPI = 72.0f;
//TODO: isSigned: See http://blog.javabien.net/2009/05/01/pdfbox-to-unit-test-pdf-files/
static boolean isPDFA1bCompliant(String pdf){
ValidationResult result = null;
FileDataSource fd = new FileDataSource(pdf);
try {
PreflightParser parser = new PreflightParser(fd);
parser.parse();
PreflightDocument document = parser.getPreflightDocument();
document.validate();
result = document.getResult();
document.close();
} catch (SyntaxValidationException ex) {
result = ex.getResult();
} catch (IOException ex) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.SEVERE, null, ex);
} catch (NullPointerException ex) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.INFO, "There was an error validating "+pdf+
"\nValidation marked as failing"+
"\nPlease create a ticket at " +
"https://issues.apache.org/jira/browse/PDFBOX/component/12315215 and paste the exception and "+
"the output from http://www.pdf-tools.com/pdf/validate-pdfa-online.aspx", ex);
return false;
}
assert result != null;
if (result.isValid()) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.INFO, "The file " + pdf + " is a valid PDF/A-1b file");
return true;
} else {
Logger.getLogger(PdfDoc.class.getName()).log(Level.INFO, "The file" + pdf + " is not valid, error(s) :");
for (ValidationResult.ValidationError error : result.getErrorsList()) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.INFO, error.getErrorCode() + " : " + error.getDetails());
}
return false;
}
}
public static void pdfToPngPreview(String pdf, String output) throws IOException {
PDDocument pdDoc = null;
try {
pdDoc = PDDocument.load(pdf);
List pdPages = pdDoc.getDocumentCatalog().getAllPages();
ListIterator pageIter = pdPages.listIterator();
PDPage firstPage = (PDPage)pageIter.next();
BufferedImage img = firstPage.convertToImage(BufferedImage.TYPE_INT_RGB, Consts.PREVIEW_DPI);
ImageIO.write(img, Consts.PNG, new File(output));
} catch (Exception ex) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.SEVERE, null, ex);
} finally {
if (null != pdDoc) {
pdDoc.close();
}
}
}
public static void pdfToPdfPreview(String pdf, String output) throws IOException {
PDDocument pdDoc = null;
List<Map<String, Object>> pageList = new ArrayList<>();
try {
pdDoc = PDDocument.load(pdf);
List pdPages = pdDoc.getDocumentCatalog().getAllPages();
for (Object pdPage : pdPages) {
Map<String, Object> pageInfo = new LinkedHashMap<>();
PDPage currentPage = (PDPage)pdPage;
BufferedImage img = currentPage.convertToImage(BufferedImage.TYPE_INT_RGB, Consts.PREVIEW_DPI);
float scaleDownFactor = Consts.PREVIEW_DPI/PDFBOX_DEFAULT_USER_SPACE_UNIT_DPI; //required by PDFBox.convertToImage()
//TODO Subclass org.apache.pdfbox.pdfviewer.PageDrawer if there's too much antialiasing
pageInfo.put(Consts.IMAGE_KEY, ImageHelper.resizeImageToDINA4WithDPI(img, Consts.PREVIEW_DPI, scaleDownFactor));
pageInfo.put(Consts.PAGE_SIZE_KEY, PDPage.PAGE_SIZE_A4); //default preview size
pageInfo.put(Consts.LANDSCAPE_KEY, ImageHelper.hasLandscapeOrientation(img));
pageList.add(pageInfo);
}
writePageListToPdf(pageList, output);
} catch (Exception ex) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.SEVERE, null, ex);
} finally {
if (null != pdDoc) {
pdDoc.close();
}
}
}
static void writePageListToPdf(List<Map<String, Object>> images, String output) throws IOException {
PDDocument pdDoc = null;
try {
pdDoc = new PDDocument();
for (Map<String, Object> image1 : images) {
PDPage page = null;
PDXObjectImage ximage = null;
PDPageContentStream contentStream = null;
Map<String, Object> image = image1;
Object imgKey = image.get(Consts.IMAGE_KEY);
if (imgKey instanceof BufferedImage) {
BufferedImage bi = (BufferedImage) imgKey;
PDRectangle pageSize = (PDRectangle)image.get(Consts.PAGE_SIZE_KEY);
page = new PDPage(pageSize);
page.setMediaBox(pageSize);
ximage = new PDPixelMap(pdDoc, bi); //embeds PNG image
double vOffset, hOffset;
if ((boolean)image.get(Consts.LANDSCAPE_KEY)){
float pageWidth = pageSize.getWidth();
page.setRotation(90);
contentStream = new PDPageContentStream(pdDoc, page);
contentStream.concatenate2CTM(0, 1, -1, 0, pageWidth, 0);
vOffset = 0;
hOffset = ((Consts.A4_W_INCHES * Consts.INCH_TO_POINT) - ximage.getWidth())/2.0;
} else {
vOffset = ((Consts.A4_H_INCHES * Consts.INCH_TO_POINT) - ximage.getHeight())/2.0;
hOffset = 0;
contentStream = new PDPageContentStream(pdDoc, page);
}
contentStream.drawImage(ximage, (float) hOffset, (float) vOffset);
contentStream.close();
bi.flush();
} else {
if (imgKey instanceof PDPage) {
page = (PDPage) imgKey;
} else {
throw new Exception("Unrecognized object found in 'img' value: " + imgKey.getClass().getName());
}
}
pdDoc.addPage(page);
}
pdDoc.save(output);
} catch (Exception ex) {
Logger.getLogger(PdfDoc.class.getName()).log(Level.SEVERE, null, ex);
} finally {
if (null != pdDoc) {
pdDoc.close();
}
}
}
private static boolean hasText(PDPage page) throws IOException {
PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List tokens = parser.getTokens();
for (Object next : tokens) {
if (next instanceof PDFOperator) {
PDFOperator op = (PDFOperator) next;
if (op.getOperation().equalsIgnoreCase(Consts.PDFMetadata.TEXT_OPERATOR)) {
return true;
}
}
}
return false;
}
//TODO: PDF Metadata: http://www.docjar.com/html/api/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java.html
private static boolean checkVersion(String requiredVersion, String file) throws IOException {
PDDocument document = null;
boolean res = false;
try {
document = PDDocument.load(file);
// some pdf-documents are broken and the pdf-version is in the headers and
// not in the metadata section. See: pdfbox/pdfparser/PDFParser.java
String version = Float.toString(document.getDocument().getVersion());
Logger.getLogger(PdfDoc.class.getName()).log(Level.INFO, "The file" + file + " has version: "+ version);
if(version.equals(requiredVersion)) // gte
res = true;
} finally {
if(document != null)
document.close();
}
return res;
}
}