/* * PDFVisualComparator * * Copyright (c) 2012, E&E information consultants AG. All rights reserved. * Authors: * Peter Jentsch * Nico Hezel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301 USA */ package de.ee.hezel; import java.awt.Color; import java.awt.Graphics; import java.awt.Rectangle; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.text.DecimalFormat; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import javax.imageio.ImageIO; import org.apache.log4j.Logger; import com.sun.pdfview.PDFFile; import com.sun.pdfview.PDFPage; import de.ee.hezel.logger.ICompareLogger; import de.ee.hezel.model.PDFInfoHolder; import de.ee.hezel.model.PDFPageHolder; import de.ee.hezel.model.PDFInfoHolder.DifferenceType; import de.ee.hezel.model.pdfelemente.PDFEntryHolder; import de.ee.hezel.model.pdfelemente.PDFImageHolder; import de.ee.hezel.model.pdfelemente.PDFTextHolder; /** * * @author hezeln * */ public class PDFVisualComparator extends AbstractPDFCompare { static Logger log = Logger.getLogger(PDFVisualComparator.class.getName()); // a list of int arrays which gets reused private PDFInfoHolder pdfInfoHolder; private PDFVisualiseDifference pdfVisualiseDifference; private File targetFolder; public PDFVisualComparator(File outputDir, ICompareLogger diffLog, PDFInfoHolder pdfih) { pdfVisualiseDifference = new PDFVisualiseDifference(outputDir, diffLog, pdfih); setDifferenceLogger(diffLog); pdfInfoHolder = pdfih; targetFolder = outputDir; } /** * start visual comparison * * @param targetFolder */ public void compare() { // could not find the new generated pdf document if(pdfInfoHolder.getDifferent() == DifferenceType.MISSINGDOCUMENT) { missingDocument(pdfInfoHolder.getPDF1()); return; } PDFFile pdf1 = pdfInfoHolder.getPDF1(); PDFFile pdf2 = pdfInfoHolder.getPDF2(); // find all differences on all pages for (int i = 1; i <= pdf1.getNumPages(); i++) { // get the current page PDFPage pagePDF1 = pdf1.getPage(i); PDFPage pagePDF2 = pdf2.getPage(i); // missing a page if(pagePDF2 == null) { missingPage(pagePDF1, i); continue; } // find real visual differences findVisualDifferences(i, pagePDF1, pagePDF2, targetFolder); } // if there is a difference on one of the // pages mark the entire pdf as different pdfInfoHolder.checkDifference(); } private void missingPage(PDFPage pagePDF, int pageNum) { try { BufferedImage diffimg = pdfVisualiseDifference.drawPageInRed(pagePDF); // save the result String pathName = targetFolder+"/"+pdfInfoHolder.getFilename() + "_pdf/"; File dir = new File(pathName); dir.mkdirs(); ImageIO.write(diffimg, "PNG", new File(pathName+"page_"+pageNum+".png")); } catch (Exception e) { log.error(e.getMessage(), e); } } private void missingDocument(PDFFile pdf) { for (int i = 1; i <= pdf.getNumPages(); i++) { // get the current page PDFPage pagePDF = pdf.getPage(i); missingPage(pagePDF, i); } } private void findVisualDifferences(int pageNum, PDFPage pagePDF1, PDFPage pagePDF2, File targetFolder) { try { // convert the page in a image BufferedImage pageImgPDF1 = PDFVisualiseDifference.convertPage(pagePDF1); BufferedImage pageImgPDF2 = PDFVisualiseDifference.convertPage(pagePDF2); // get the structure elements for this page PDFPageHolder pdfPageHolder1 = pdfInfoHolder.getPDFStructure1().getPageHolder(pageNum - 1); PDFPageHolder pdfPageHolder2 = pdfInfoHolder.getPDFStructure2().getPageHolder(pageNum - 1); Set<PDFEntryHolder> entryHolders = new HashSet<PDFEntryHolder>(pdfPageHolder1.getElements()); entryHolders.addAll(pdfPageHolder2.getElements()); // compare both images only at those place where a entryholder says comparePDFEntries(pageImgPDF1, pageImgPDF2, entryHolders, pageNum); // check if a difference was found pdfPageHolder1.checkDifference(); pdfPageHolder2.checkDifference(); // mark the found differences visual if (targetFolder != null && (pdfPageHolder1.isDifferent() || pdfPageHolder2.isDifferent())) { // create a illustration which shows the differences BufferedImage diffimg = pdfVisualiseDifference.visualiseDifferences(pageImgPDF1, pageImgPDF2, entryHolders); // save the difference image if desired String pathName = targetFolder + "/" + pdfInfoHolder.getFilename() + "_pdf/"; File dir = new File(pathName); dir.mkdirs(); ImageIO.write(diffimg, "PNG", new File(pathName + "page_" + pageNum + ".png")); } } catch (Exception e) { log.error(pdfInfoHolder.getFilename()+": "+e.getMessage(), e); } } /** * The given entry holder mark those places in the image, which are interesting. * Compare the area around those places and mark them if their are different. * * EntryHolder.isDifferent tells us if this place is visually different * * @param pageImgPDF1 * @param pageImgPDF2 * @param entryHolders */ private void comparePDFEntries(BufferedImage pageImgPDF1, BufferedImage pageImgPDF2, Set<PDFEntryHolder> entryHolders, int pageNum) { int pageWidth = pageImgPDF1.getWidth(); int pageHeight = pageImgPDF1.getHeight(); // 1d pixel array int[] img1Pixels = pdfVisualiseDifference.getPixelArray(1, pageWidth*pageHeight); int[] img2Pixels = pdfVisualiseDifference.getPixelArray(2, pageWidth*pageHeight); // copy pixel array pageImgPDF1.getRGB(0, 0, pageWidth, pageHeight, img1Pixels, 0, pageWidth); pageImgPDF2.getRGB(0, 0, pageWidth, pageHeight, img2Pixels, 0, pageWidth); // search for differences inside the area of all elements for (PDFEntryHolder pdfEntryHolder : entryHolders) { // pixel different for different metricies int diffValue = 0, diffValueL1 = 0, diffValueL2 = 0; // dimension of the entry holder for the current zoom factor int entryX = (int)(pdfEntryHolder.getX() * PDFVisualiseDifference.IMAGE_SCALER); int entryY = (int)(pdfEntryHolder.getY() * PDFVisualiseDifference.IMAGE_SCALER); int entryWidth = (int)(pdfEntryHolder.getWidth() * PDFVisualiseDifference.IMAGE_SCALER); int entryHeight = (int)(pdfEntryHolder.getHeight() * PDFVisualiseDifference.IMAGE_SCALER); // pixel at the edge are sometimes more important double pixelImportance = 1; // search for different pixels for (int y = ((entryY < 0) ? 0 : entryY); y < entryY+entryHeight; y++) { if (y * pageWidth >= img1Pixels.length) { log.error(pdfInfoHolder.getFilename()+": graphics boundaries exceed page boundaries. y=" + y + ", pageWidth=" + pageWidth); diffValue = Integer.MAX_VALUE; break; } for (int x = ((entryX < 0) ? 0 : entryX); x < entryX+entryWidth; x++) { // pixel position in the 1d pixel array int pos = y * pageWidth + x; if (pos >= img1Pixels.length) { log.error(pdfInfoHolder.getFilename()+": graphics boundaries exceed page boundaries. y=" + y + ", x=" + x + ", pageWidth=" + pageWidth); diffValue = Integer.MAX_VALUE; break; } // calc gray value for 1st image int r_img1 = (img1Pixels[pos] >> 16) & 255; int g_img1 = (img1Pixels[pos] >> 8) & 255; int b_img1 = (img1Pixels[pos]) & 255; // calc gray value for 2nd image int r_img2 = (img2Pixels[pos] >> 16) & 255; int g_img2 = (img2Pixels[pos] >> 8) & 255; int b_img2 = (img2Pixels[pos]) & 255; int r_diff = (r_img1 < r_img2) ? r_img2-r_img1 : r_img1-r_img2; int g_diff = (g_img1 < g_img2) ? g_img2-g_img1 : g_img1-g_img2; int b_diff = (b_img1 < b_img2) ? b_img2-b_img1 : b_img1-b_img2; // calc average difference and maximal difference double meanColorDiff = (double)(r_diff+g_diff+b_diff) / 3; // pixel at the edge have a higher value if(pdfEntryHolder instanceof PDFImageHolder) { double yDeviationToBorder = (double)((entryY+(entryHeight/2)) - y) / (entryHeight/2); double xDeviationToBorder = (double)((entryX+(entryWidth/2)) - x) / (entryWidth/2); double maxDeviation = ((yDeviationToBorder < xDeviationToBorder) ? xDeviationToBorder : yDeviationToBorder); double sqrtDeviation = ((maxDeviation*10)*(maxDeviation*10))/10; pixelImportance = 1 + sqrtDeviation; } // count the different pixel if(meanColorDiff > 5) diffValue += 1 * pixelImportance; // calc L1 and L2 metric diffValueL1 += meanColorDiff * pixelImportance; diffValueL2 += (meanColorDiff * pixelImportance) * (meanColorDiff * pixelImportance); } } // necessary calculation for L2 metric diffValueL2 = (int)Math.sqrt(diffValueL2); // mark the entry holder as different, if the images // at this position differ from each other analyseDifference(diffValue, pdfEntryHolder, pageNum); } } /** * calc if the difference for the given pdf element is strong enough * to be count as difference. * * @param diffValue * @param entryHolder */ private void analyseDifference(int diffValue, PDFEntryHolder entryHolder, int pageNum) { boolean isDifferent = false; int entryWidth = (int)(entryHolder.getWidth() * PDFVisualiseDifference.IMAGE_SCALER); int entryHeight = (int)(entryHolder.getHeight() * PDFVisualiseDifference.IMAGE_SCALER); int pixelAmount = entryHeight * entryWidth; double relativeDiff = (double)diffValue / pixelAmount; // does the value exceed a threshold if(relativeDiff > 0.1) // 10% { isDifferent = true; DecimalFormat df = new DecimalFormat( "####.###" ); if(entryHolder instanceof PDFTextHolder) { PDFTextHolder th = (PDFTextHolder)entryHolder; diff.log(pdfInfoHolder.getFilename()+": Text \""+th.getText()+"\" on page " + pageNum + " at position " + df.format(th.getX())+" | " + df.format(th.getY()) + " with size " + df.format(th.getWidth()) + " width and " + df.format(th.getHeight()) + " height looks different"); } else { diff.log(pdfInfoHolder.getFilename()+": Image on page " + pageNum + " at position " + df.format(entryHolder.getX())+" | " + df.format(entryHolder.getY()) + " with size " + df.format(entryHolder.getWidth()) + " width and " + df.format(entryHolder.getHeight()) + " height looks different"); } } entryHolder.setDifferent(isDifferent); } }