/* * Created on 03/set/2015 * Copyright 2015 by Andrea Vacondio (andrea.vacondio@gmail.com). * This file is part of Sejda. * * Sejda is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Sejda is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with Sejda. If not, see <http://www.gnu.org/licenses/>. */ package org.sejda.impl.sambox; import static java.util.Optional.ofNullable; import static org.sejda.common.ComponentsUtility.nullSafeCloseQuietly; import static org.sejda.core.notification.dsl.ApplicationEventsNotifier.notifyEvent; import static org.sejda.core.support.io.IOUtils.createTemporaryBufferWithName; import static org.sejda.impl.sambox.component.SignatureClipper.clipSignatures; import java.io.Closeable; import java.io.File; import java.util.*; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import org.sejda.common.LookupTable; import org.sejda.core.support.io.IOUtils; import org.sejda.core.support.io.OutputWriters; import org.sejda.core.support.io.SingleOutputWriter; import org.sejda.impl.sambox.component.AcroFormsMerger; import org.sejda.impl.sambox.component.AnnotationsDistiller; import org.sejda.impl.sambox.component.DefaultPdfSourceOpener; import org.sejda.impl.sambox.component.FilenameFooterWriter; import org.sejda.impl.sambox.component.OutlineMerger; import org.sejda.impl.sambox.component.PDDocumentHandler; import org.sejda.impl.sambox.component.PdfScaler; import org.sejda.impl.sambox.component.TableOfContentsCreator; import org.sejda.impl.sambox.component.image.ImagesToPdfDocumentConverter; import org.sejda.model.exception.TaskException; import org.sejda.model.input.*; import org.sejda.model.parameter.MergeParameters; import org.sejda.model.scale.ScaleType; import org.sejda.model.task.BaseTask; import org.sejda.model.task.TaskExecutionContext; import org.sejda.model.toc.ToCPolicy; import org.sejda.sambox.pdmodel.PDPage; import org.sejda.sambox.pdmodel.PageNotFoundException; import org.sejda.sambox.pdmodel.common.PDRectangle; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * SAMBox implementation of the Merge task that merges together a number of documents or part of them. * * @author Andrea Vacondio * */ public class MergeTask extends BaseTask<MergeParameters> { private static final Logger LOG = LoggerFactory.getLogger(MergeTask.class); private SingleOutputWriter outputWriter; private PdfSourceOpener<PDDocumentHandler> sourceOpener; private int totalSteps; private PDDocumentHandler destinationDocument; private Queue<Closeable> toClose = new LinkedList<>(); private OutlineMerger outlineMerger; private AcroFormsMerger acroFormsMerger; private TableOfContentsCreator tocCreator; private FilenameFooterWriter footerWriter; private PDRectangle currentPageSize = PDRectangle.A4; private long pagesCounter = 0; @Override public void before(MergeParameters parameters, TaskExecutionContext executionContext) throws TaskException { super.before(parameters, executionContext); totalSteps = parameters.getInputList().size(); sourceOpener = new DefaultPdfSourceOpener(); outputWriter = OutputWriters.newSingleOutputWriter(parameters.getExistingOutputPolicy(), executionContext); outlineMerger = new OutlineMerger(parameters.getOutlinePolicy()); } @Override public void execute(MergeParameters parameters) throws TaskException { int currentStep = 0; File tmpFile = IOUtils.createTemporaryBuffer(parameters.getOutput()); outputWriter.taskOutput(tmpFile); LOG.debug("Temporary output set to {}", tmpFile); this.destinationDocument = new PDDocumentHandler(); this.destinationDocument.setCreatorOnPDDocument(); this.destinationDocument.setVersionOnPDDocument(parameters.getVersion()); this.destinationDocument.setCompress(parameters.isCompress()); this.acroFormsMerger = new AcroFormsMerger(parameters.getAcroFormPolicy(), this.destinationDocument.getUnderlyingPDDocument()); this.tocCreator = new TableOfContentsCreator(parameters, this.destinationDocument.getUnderlyingPDDocument()); this.footerWriter = new FilenameFooterWriter(parameters.isFilenameFooter(), this.destinationDocument.getUnderlyingPDDocument()); convertImageMergeInputToPdf(parameters); for (PdfMergeInput input : parameters.getPdfInputList()) { LOG.debug("Opening {}", input.getSource()); PDDocumentHandler sourceDocumentHandler = input.getSource().open(sourceOpener); toClose.add(sourceDocumentHandler); LOG.debug("Adding pages"); LookupTable<PDPage> pagesLookup = new LookupTable<>(); long relativeCounter = 0; for (Integer currentPage : input.getPages(sourceDocumentHandler.getNumberOfPages())) { executionContext().assertTaskNotCancelled(); pagesCounter++; relativeCounter++; try { PDPage page = sourceDocumentHandler.getPage(currentPage); // we keep rotation into account currentPageSize = page.getMediaBox().rotate(page.getRotation()); // we don't use the original page because once added to the new tree we loose inheritable attributes // so we use a page duplicate to explicitly assign inheritable resources PDPage importedPage = destinationDocument.importPage(page); pagesLookup.addLookupEntry(page, importedPage); String sourceBaseName = FilenameUtils.getBaseName(input.getSource().getName()); // processing the first page of the source if (tocCreator.shouldGenerateToC() && relativeCounter == 1) { tocCreator.pageSizeIfNotSet(currentPageSize); if (ToCPolicy.DOC_TITLES == parameters.getTableOfContentsPolicy()) { sourceBaseName = ofNullable( sourceDocumentHandler.getUnderlyingPDDocument().getDocumentInformation()) .map(i -> i.getTitle()).filter(StringUtils::isNotBlank) .orElse(sourceBaseName); } tocCreator.appendItem(sourceBaseName, pagesCounter, importedPage); } this.footerWriter.addFooter(importedPage, sourceBaseName, pagesCounter + tocCreator.tocNumberOfPages()); LOG.trace("Added imported page"); } catch (PageNotFoundException e) { executionContext().assertTaskIsLenient(e); notifyEvent(executionContext().notifiableTaskMetadata()) .taskWarning(String.format("Page %d was skipped, could not be processed", currentPage), e); } } relativeCounter = 0; outlineMerger.updateOutline(sourceDocumentHandler.getUnderlyingPDDocument(), input.getSource().getName(), pagesLookup); LookupTable<PDAnnotation> annotationsLookup = new AnnotationsDistiller( sourceDocumentHandler.getUnderlyingPDDocument()).retainRelevantAnnotations(pagesLookup); clipSignatures(annotationsLookup.values()); acroFormsMerger.mergeForm( sourceDocumentHandler.getUnderlyingPDDocument().getDocumentCatalog().getAcroForm(), annotationsLookup); if (parameters.isBlankPageIfOdd()) { ofNullable(destinationDocument.addBlankPageIfOdd(currentPageSize)).ifPresent(p -> pagesCounter++); } notifyEvent(executionContext().notifiableTaskMetadata()).stepsCompleted(++currentStep).outOf(totalSteps); } if (outlineMerger.hasOutline()) { LOG.debug("Adding generated outline"); destinationDocument.setDocumentOutline(outlineMerger.getOutline()); } ofNullable(acroFormsMerger.getForm()).filter(f -> !f.getFields().isEmpty()).ifPresent(f -> { LOG.debug("Adding generated AcroForm"); destinationDocument.setDocumentAcroForm(f); }); if (parameters.isNormalizePageSizes()) { LOG.debug("Normalizing page widths to match width of first page"); // Do this before generating TOC, so the first page is from content. new PdfScaler(ScaleType.PAGE).resizePages(destinationDocument.getUnderlyingPDDocument()); } if (tocCreator.hasToc()) { LOG.debug("Adding generated ToC"); tocCreator.addToC(); } destinationDocument.savePDDocument(tmpFile); closeResources(); parameters.getOutput().accept(outputWriter); LOG.debug("Input documents merged correctly and written to {}", parameters.getOutput()); } private void convertImageMergeInputToPdf(MergeParameters parameters) throws TaskException { // if images were supplied, convert them to PDF List<MergeInput> newInputList = new ArrayList<>(); for (MergeInput input : parameters.getInputList()) { if (input instanceof ImageMergeInput) { // collect all consecutive images and convert them to a PDF document newInputList.add(convertImagesToPdfMergeInput((ImageMergeInput) input)); } else { newInputList.add(input); } } parameters.setInputList(newInputList); } private PdfMergeInput convertImagesToPdfMergeInput(ImageMergeInput image) throws TaskException { List<Source<?>> sources = Collections.singletonList(image.getSource()); PDDocumentHandler converted = new ImagesToPdfDocumentConverter().convert(sources); String basename = FilenameUtils.getBaseName(image.getSource().getName()); String filename = String.format("%s.pdf", basename); File convertedTmpFile = createTemporaryBufferWithName(filename); converted.setDocumentTitle(basename); converted.savePDDocument(convertedTmpFile); return new PdfMergeInput(PdfFileSource.newInstanceNoPassword(convertedTmpFile)); } private void closeResources() { Closeable current; while ((current = toClose.poll()) != null) { nullSafeCloseQuietly(current); } nullSafeCloseQuietly(destinationDocument); } @Override public void after() { closeResources(); outputWriter = null; } }