/* * Created on 24/ago/2015 * Copyright 2015 by Andrea Vacondio (andrea.vacondio@gmail.com). * This file is part of Sejda. * * Sejda is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Sejda is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with Sejda. If not, see <http://www.gnu.org/licenses/>. */ package org.sejda.impl.sambox.component.split; import java.io.IOException; import java.util.function.Supplier; import org.sejda.core.support.prefix.model.NameGenerationRequest; import org.sejda.impl.sambox.component.PagesExtractor; import org.sejda.model.exception.TaskExecutionException; import org.sejda.model.exception.TaskIOException; import org.sejda.model.parameter.SplitBySizeParameters; import org.sejda.model.split.NextOutputStrategy; import org.sejda.sambox.output.ExistingPagesSizePredictor; import org.sejda.sambox.output.WriteOption; import org.sejda.sambox.pdmodel.PDDocument; import org.sejda.util.IOUtils; /** * Splitter implementation that tries to split a document at roughly a given size * * @author Andrea Vacondio */ public class SizePdfSplitter extends AbstractPdfSplitter<SplitBySizeParameters> { private static final WriteOption[] COMPRESSED_OPTS = new WriteOption[] { WriteOption.COMPRESS_STREAMS, WriteOption.XREF_STREAM }; private static final int PDF_HEADER_SIZE = 15; // euristic trailer ID size private static final int ID_VALUE_SIZE = 70; // euristic overhead per page (ex. page ref in the page tree) private static final int PAGE_OVERHEAD = 10; private OutputSizeStrategy nextOutputStrategy; public SizePdfSplitter(PDDocument document, SplitBySizeParameters parameters, boolean optimize) { super(document, parameters, optimize, parameters.discardOutline()); this.nextOutputStrategy = new OutputSizeStrategy(document, parameters, optimize); } @Override NameGenerationRequest enrichNameGenerationRequest(NameGenerationRequest request) { return request; } @Override NextOutputStrategy nextOutputStrategy() { return nextOutputStrategy; } @Override protected void onOpen(int page) throws TaskIOException { nextOutputStrategy.newPredictor(); nextOutputStrategy.addPage(page); } @Override protected void onRetain(int page) throws TaskIOException { nextOutputStrategy.addPage(page + 1); } @Override protected void onClose(int page) { nextOutputStrategy.closePredictor(); } @Override protected PagesExtractor supplyPagesExtractor(PDDocument document) { return new PagesExtractor(document) { @Override public void setCompress(boolean compress) { if (compress) { destinationDocument().addWriteOption(COMPRESSED_OPTS); } else { destinationDocument().removeWriteOption(COMPRESSED_OPTS); } } }; } static class OutputSizeStrategy implements NextOutputStrategy { private long sizeLimit; private PDDocument document; private ExistingPagesSizePredictor predictor; private Supplier<ExistingPagesSizePredictor> predictorSupplier = () -> { return ExistingPagesSizePredictor.instance(); }; private PageCopier copier; OutputSizeStrategy(PDDocument document, SplitBySizeParameters parameters, boolean optimize) { this.sizeLimit = parameters.getSizeToSplitAt(); this.document = document; this.copier = new PageCopier(optimize); if (parameters.isCompress()) { predictorSupplier = () -> { return ExistingPagesSizePredictor.instance(WriteOption.COMPRESS_STREAMS, WriteOption.XREF_STREAM); }; } } public void newPredictor() throws TaskIOException { try { predictor = predictorSupplier.get(); predictor.addIndirectReferenceFor(document.getDocumentInformation()); predictor.addIndirectReferenceFor(document.getDocumentCatalog().getViewerPreferences()); } catch (IOException e) { throw new TaskIOException("Unable to initialize the pages size predictor", e); } } public void addPage(int page) throws TaskIOException { try { if (page <= document.getNumberOfPages()) { predictor.addPage(copier.copyOf(document.getPage(page - 1))); } } catch (IOException e) { throw new TaskIOException("Unable to simulate page " + page + " addition", e); } } public void closePredictor() { IOUtils.closeQuietly(predictor); this.predictor = null; } @Override public void ensureIsValid() throws TaskExecutionException { if (sizeLimit < 1) { throw new TaskExecutionException( String.format("Unable to split at %d, a positive size is required.", sizeLimit)); } } @Override public boolean isOpening(Integer page) { return predictor == null || !predictor.hasPages(); } @Override public boolean isClosing(Integer page) throws TaskIOException { try { long currentPageSize = predictor.predictedPagesSize(); return (PDF_HEADER_SIZE + ID_VALUE_SIZE + currentPageSize + predictor.predictedXrefTableSize() + documentFooterSize(currentPageSize) + (predictor.pages() * PAGE_OVERHEAD)) > sizeLimit; } catch (IOException e) { throw new TaskIOException("Unable to simulate page " + page + " addition", e); } } private int documentFooterSize(long documentSize) { // startxref + %%EOF + few EOL return 17 + Long.toString(documentSize).length(); } } }