/*
* Created on 04/set/2015
* Copyright 2015 by Andrea Vacondio (andrea.vacondio@gmail.com).
* This file is part of Sejda.
*
* Sejda is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Sejda is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Sejda. If not, see <http://www.gnu.org/licenses/>.
*/
package org.sejda.impl.sambox;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.awt.Rectangle;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.sejda.core.TestListenerFactory;
import org.sejda.core.TestListenerFactory.TestListenerFailed;
import org.sejda.core.notification.context.ThreadLocalNotificationContext;
import org.sejda.core.service.BaseTaskTest;
import org.sejda.impl.sambox.component.PdfTextExtractorByArea;
import org.sejda.model.exception.TaskIOException;
import org.sejda.model.input.ImageMergeInput;
import org.sejda.model.input.PdfMergeInput;
import org.sejda.model.outline.OutlinePolicy;
import org.sejda.model.output.ExistingOutputPolicy;
import org.sejda.model.parameter.MergeParameters;
import org.sejda.model.pdf.PdfVersion;
import org.sejda.model.pdf.form.AcroFormPolicy;
import org.sejda.model.pdf.page.PageRange;
import org.sejda.model.task.Task;
import org.sejda.model.toc.ToCPolicy;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.PDPage;
import org.sejda.sambox.pdmodel.common.PDRectangle;
import org.sejda.sambox.text.PDFTextStripperByArea;
/**
* @author Andrea Vacondio
*
*/
public class MergeSamboxTaskTest extends BaseTaskTest<MergeParameters> {
@Override
public Task<MergeParameters> getTask() {
return new MergeTask();
}
private MergeParameters setUpParameters(List<PdfMergeInput> input) {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.setCompress(false);
parameters.setVersion(PdfVersion.VERSION_1_6);
for (PdfMergeInput current : input) {
parameters.addInput(current);
}
parameters.setOutlinePolicy(OutlinePolicy.RETAIN);
return parameters;
}
private List<PdfMergeInput> getInputWithOutline() {
List<PdfMergeInput> input = new ArrayList<PdfMergeInput>();
input.add(new PdfMergeInput(largeOutlineInput()));
input.add(new PdfMergeInput(largeInput()));
return input;
}
private List<PdfMergeInput> getInputWithEncrypted() {
List<PdfMergeInput> input = new ArrayList<PdfMergeInput>();
input.add(new PdfMergeInput(stronglyEncryptedInput()));
input.add(new PdfMergeInput(largeInput()));
return input;
}
private List<PdfMergeInput> getInput() {
List<PdfMergeInput> input = new ArrayList<PdfMergeInput>();
input.add(new PdfMergeInput(regularInput()));
input.add(new PdfMergeInput(customInput("pdf/attachments_as_annots.pdf", "attachments_as_annots.pdf")));
return input;
}
@Test
public void executeMergeAllWithOutlineRetainingOutline() throws IOException {
doExecuteMergeAll(true, 311, setUpParameters(getInputWithOutline()));
}
@Test
public void executeMergeAllWithEncryptedRetainingOutline() throws IOException {
doExecuteMergeAll(true, 310, setUpParameters(getInputWithEncrypted()));
}
@Test
public void executeMergeAllRetainingOutline() throws IOException {
doExecuteMergeAll(false, 14, setUpParameters(getInput()));
}
@Test
public void executeMergeAllRetainingOutlineTocNames() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.addInput(new PdfMergeInput(customInput("pdf/with_meta.pdf")));
parameters.setTableOfContentsPolicy(ToCPolicy.FILE_NAMES);
doExecuteMergeAll(false, 19, parameters);
}
@Test
public void executeMergeRotatedTocPage() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.setCompress(false);
parameters.setVersion(PdfVersion.VERSION_1_6);
parameters.setOutlinePolicy(OutlinePolicy.RETAIN);
parameters.addInput(new PdfMergeInput(customInput("pdf/rotated_pages.pdf", "name.pdf")));
parameters.setTableOfContentsPolicy(ToCPolicy.DOC_TITLES);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.forPdfOutput(d -> {
assertEquals(new PDRectangle(0, 0, 595, 842).rotate(90).toString(), d.getPage(0).getMediaBox().toString());
});
}
@Test
public void executeMergeAllRetainingOutlineTocTitles() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.addInput(new PdfMergeInput(customInput("pdf/with_meta.pdf")));
parameters.setTableOfContentsPolicy(ToCPolicy.DOC_TITLES);
doExecuteMergeAll(false, 19, parameters);
}
@Test
public void executeMergeAllRetainingOutlineTocNamesUTF() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.addInput(new PdfMergeInput(customInput("pdf/with_meta.pdf", "αυτό είναι ένα τεστ.pdf")));
parameters.setTableOfContentsPolicy(ToCPolicy.FILE_NAMES);
doExecuteMergeAll(false, 19, parameters);
}
@Test
public void executeMergeAllRetainingOutlineTocNamesUTFThaiAndHindi() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.addInput(new PdfMergeInput(customInput("pdf/with_meta.pdf", "นี่คือการทดสอบ.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/with_meta.pdf", "यह एक परीक्षण है.pdf")));
parameters.setTableOfContentsPolicy(ToCPolicy.FILE_NAMES);
parameters.setFilenameFooter(true);
doExecuteMergeAll(false, 23, parameters);
}
@Test
public void executeMergeAllWithOutlineDiscardingOutline() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
parameters.setOutlinePolicy(OutlinePolicy.DISCARD);
doExecuteMergeAll(false, 311, parameters);
}
@Test
public void executeMergeAllDiscardingOutline() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.setOutlinePolicy(OutlinePolicy.DISCARD);
doExecuteMergeAll(false, 14, parameters);
}
@Test
public void executeMergeAllWithEncryptedDiscardingOutline() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithEncrypted());
parameters.setOutlinePolicy(OutlinePolicy.DISCARD);
doExecuteMergeAll(false, 310, parameters);
}
@Test
public void executeMergeAllWithOutlineOnePerDoc() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
parameters.setOutlinePolicy(OutlinePolicy.ONE_ENTRY_EACH_DOC);
doExecuteMergeAll(true, 311, parameters);
}
@Test
public void executeMergeAllOnePerDoc() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.setOutlinePolicy(OutlinePolicy.ONE_ENTRY_EACH_DOC);
doExecuteMergeAll(true, 14, parameters);
}
@Test
public void executeMergeAllWithEncryptedOnePerDoc() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithEncrypted());
parameters.setOutlinePolicy(OutlinePolicy.ONE_ENTRY_EACH_DOC);
doExecuteMergeAll(true, 310, parameters);
}
void doExecuteMergeAll(boolean hasBookmarks, int pages, MergeParameters parameters) throws IOException {
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertVersion(PdfVersion.VERSION_1_6).assertPages(pages)
.assertHasOutline(hasBookmarks);
}
@Test
public void testExecuteMergeAllFields() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
parameters.addInput(new PdfMergeInput(customInput("pdf/forms/simple_form.pdf")));
parameters.setOutlinePolicy(OutlinePolicy.DISCARD);
parameters.setAcroFormPolicy(AcroFormPolicy.MERGE);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(312).assertVersion(PdfVersion.VERSION_1_6).assertHasOutline(false)
.assertHasAcroforms(true);
}
@Test
public void testExecuteMergeDiscardForms() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
parameters.addInput(new PdfMergeInput(customInput("pdf/forms/simple_form.pdf")));
parameters.setOutlinePolicy(OutlinePolicy.DISCARD);
parameters.setAcroFormPolicy(AcroFormPolicy.DISCARD);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(312).assertVersion(PdfVersion.VERSION_1_6).assertHasOutline(false)
.assertHasAcroforms(false);
}
@Test
public void testExecuteMergeFlattenForms() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
parameters.addInput(new PdfMergeInput(customInput("pdf/forms/simple_form.pdf")));
parameters.setOutlinePolicy(OutlinePolicy.DISCARD);
parameters.setAcroFormPolicy(AcroFormPolicy.FLATTEN);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(312).assertVersion(PdfVersion.VERSION_1_6).assertHasOutline(false)
.assertHasAcroforms(false);
}
@Test
public void testExecuteMergeFlattenFormsWithUnicodeValues() throws IOException {
MergeParameters parameters = setUpParameters(
Collections.singletonList(new PdfMergeInput(customInput("pdf/forms/simple_form_unicode_values.pdf"))));
parameters.setAcroFormPolicy(AcroFormPolicy.FLATTEN);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(1).assertHasAcroforms(false);
testContext.forEachPdfOutput(doc -> {
assertPageTextContains(doc.getPage(0), "ጩ");
});
}
@Test
public void executeMergeRangesMergeForms() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
for (PdfMergeInput input : parameters.getPdfInputList()) {
input.addPageRange(new PageRange(3, 10));
input.addPageRange(new PageRange(20, 23));
input.addPageRange(new PageRange(80, 90));
}
parameters.setAcroFormPolicy(AcroFormPolicy.MERGE);
parameters.addInput(new PdfMergeInput(customInput("pdf/forms/simple_form.pdf")));
doExecuteMergeRanges(parameters);
}
@Test
public void executeMergeRanges() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
for (PdfMergeInput input : parameters.getPdfInputList()) {
input.addPageRange(new PageRange(3, 10));
input.addPageRange(new PageRange(20, 23));
input.addPageRange(new PageRange(80, 90));
}
parameters.setAcroFormPolicy(AcroFormPolicy.DISCARD);
parameters.addInput(new PdfMergeInput(customInput("pdf/forms/simple_form.pdf")));
doExecuteMergeRanges(parameters);
}
public void doExecuteMergeRanges(MergeParameters parameters) throws IOException {
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(27).assertVersion(PdfVersion.VERSION_1_6)
.assertOutlineContains("Bookmark27").assertOutlineDoesntContain("Bookmark1");
}
@Test
public void testExecuteMergeRangesWithBlankPage() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
testContext.pdfOutputTo(parameters);
for (PdfMergeInput input : parameters.getPdfInputList()) {
input.addPageRange(new PageRange(2, 4));
}
parameters.setBlankPageIfOdd(true);
execute(parameters);
PDDocument document = testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(8).assertVersion(PdfVersion.VERSION_1_6);
assertEquals(document.getPage(2).getCropBox().getWidth(), document.getPage(3).getCropBox().getWidth(), 0);
assertEquals(document.getPage(2).getCropBox().getHeight(), document.getPage(3).getCropBox().getHeight(), 0);
assertEquals(document.getPage(6).getCropBox().getWidth(), document.getPage(7).getCropBox().getWidth(), 0);
assertEquals(document.getPage(6).getCropBox().getHeight(), document.getPage(7).getCropBox().getHeight(), 0);
}
@Test
public void testExecuteMergeRangesWithBlankPagesAndToc() throws IOException {
MergeParameters parameters = setUpParameters(getInputWithOutline());
testContext.pdfOutputTo(parameters);
for (PdfMergeInput input : parameters.getPdfInputList()) {
input.addPageRange(new PageRange(2, 4));
}
parameters.setBlankPageIfOdd(true);
parameters.setTableOfContentsPolicy(ToCPolicy.FILE_NAMES);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(10);
}
@Test
public void testExecuteMergeRangesWithFlattenForms() throws IOException {
List<PdfMergeInput> inputs = new ArrayList<PdfMergeInput>();
inputs.add(new PdfMergeInput(customInput("pdf/forms/simple_form_with_values.pdf")));
MergeParameters parameters = setUpParameters(inputs);
parameters.setAcroFormPolicy(AcroFormPolicy.FLATTEN);
testContext.pdfOutputTo(parameters);
execute(parameters);
PDDocument document = testContext.assertTaskCompleted();
PDPage page = document.getPage(0);
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.addRegion("completePage",
new Rectangle((int) page.getCropBox().getWidth(), (int) page.getCropBox().getHeight()));
stripper.extractRegions(page);
String pageText = stripper.getTextForRegion("completePage");
assertThat(pageText, containsString("TextFieldValue"));
}
@Test
public void executeMergeMissingPageNonLenient() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.setCompress(false);
parameters.setVersion(PdfVersion.VERSION_1_6);
parameters.setOutlinePolicy(OutlinePolicy.RETAIN);
parameters.addInput(new PdfMergeInput(customInput("pdf/missing_page_ref.pdf", "name.pdf")));
testContext.pdfOutputTo(parameters);
TestListenerFailed failListener = TestListenerFactory.newFailedListener();
ThreadLocalNotificationContext.getContext().addListener(failListener);
execute(parameters);
assertTrue(failListener.isFailed());
}
@Test
public void executeMergeMissingPageLenient() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.setCompress(false);
parameters.setLenient(true);
parameters.setVersion(PdfVersion.VERSION_1_6);
parameters.setOutlinePolicy(OutlinePolicy.RETAIN);
parameters.addInput(new PdfMergeInput(customInput("pdf/missing_page_ref.pdf", "name.pdf")));
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertCreator().assertPages(3).assertVersion(PdfVersion.VERSION_1_6);
}
@Test
public void normalizePageSizes_FirstPagePortrait() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.addInput(new PdfMergeInput(customInput("pdf/A4Portrait.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/A3Landscape.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/A3Portrait.pdf")));
parameters.setNormalizePageSizes(true);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(3).forEachPdfOutput(d -> {
assertEquals(595, widthOfCropBox(d.getPage(0)), 1);
// landscape should be handled in a special case
assertEquals(595, heightOfCropBox(d.getPage(1)), 1);
assertEquals(840, widthOfCropBox(d.getPage(1)), 1);
assertEquals(595, widthOfCropBox(d.getPage(2)), 1);
});
}
@Test
public void normalizePageSizes_FirstPageLandscape() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.addInput(new PdfMergeInput(customInput("pdf/A3Landscape.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/A4Portrait.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/A3Portrait.pdf")));
parameters.setNormalizePageSizes(true);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(3).forEachPdfOutput(d -> {
assertEquals(1190, widthOfCropBox(d.getPage(0)), 1);
// landscape should be handled in a special case
assertEquals(1190, heightOfCropBox(d.getPage(1)), 1);
assertEquals(841, widthOfCropBox(d.getPage(1)), 1);
assertEquals(841, widthOfCropBox(d.getPage(2)), 1);
});
}
@Test
public void pageFooter() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.setTableOfContentsPolicy(ToCPolicy.NONE);
parameters.setFilenameFooter(true);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(14).forEachPdfOutput(d -> {
assertFooterHasText(d.getPage(0), "test-file 1");
assertFooterHasText(d.getPage(11), "attachments_as_annots 12");
});
}
@Test
public void pageFooterAndToc() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.setTableOfContentsPolicy(ToCPolicy.FILE_NAMES);
parameters.setFilenameFooter(true);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(15).forEachPdfOutput(d -> {
assertFooterHasText(d.getPage(1), "test-file 2");
assertFooterHasText(d.getPage(12), "attachments_as_annots 13");
});
}
@Test
public void pageFooterAndTocAddBlank() throws IOException {
MergeParameters parameters = setUpParameters(getInput());
parameters.setTableOfContentsPolicy(ToCPolicy.FILE_NAMES);
parameters.setFilenameFooter(true);
parameters.setBlankPageIfOdd(true);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(18).forEachPdfOutput(d -> {
try {
assertFalse(isBlankPage(d.getPage(0)));
assertTrue(isBlankPage(d.getPage(1)));
assertFooterHasText(d.getPage(2), "test-file 3");
assertFooterHasText(d.getPage(15), "attachments_as_annots 16");
} catch (TaskIOException e) {
fail(e.getMessage());
}
});
}
@Test
public void mergeImagesAndPdfs() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/draft.png")));
parameters.addInput(new PdfMergeInput(customInput("pdf/test-pdf.pdf")));
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/draft.png")));
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/large.jpg")));
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/draft.tiff")));
parameters.addInput(new PdfMergeInput(customInput("pdf/test-pdf.pdf")));
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/draft.png")));
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(1 + 11 + 3 + 11 + 1).forEachPdfOutput(d -> {
assertEquals(Arrays.asList(1, 13, 14, 15, 27), getPagesContainingImages(d));
});
}
@Test
public void mergeImagesWithTocAndFooter() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/draft.png", "draft.png")));
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/large.jpg", "large.png")));
parameters.addInput(new ImageMergeInput(customNonPdfInput("image/draft.tiff", "draft.tiff")));
parameters.setTableOfContentsPolicy(ToCPolicy.DOC_TITLES);
parameters.setFilenameFooter(true);
testContext.pdfOutputTo(parameters);
execute(parameters);
testContext.assertTaskCompleted();
testContext.assertPages(1 + 3).forEachPdfOutput(d -> {
assertPageText(d.getPage(0), "draft2large3draft4");
assertFooterHasText(d.getPage(1), "draft 2");
assertFooterHasText(d.getPage(2), "large 3");
assertFooterHasText(d.getPage(3), "draft 4");
});
}
private float widthOfCropBox(PDPage page) {
return page.getCropBox().getWidth();
}
private float heightOfCropBox(PDPage page) {
return page.getCropBox().getHeight();
}
private void assertFooterHasText(PDPage page, String expectedText) {
try {
assertThat(new PdfTextExtractorByArea().extractFooterText(page).trim(), is(expectedText));
} catch (TaskIOException e) {
fail(e.getMessage());
}
}
private boolean isBlankPage(PDPage page) throws TaskIOException {
return StringUtils.isBlank(new PdfTextExtractorByArea()
.extractTextFromArea(page,
new Rectangle(0, 0, (int) page.getTrimBox().getWidth(), (int) page.getTrimBox().getHeight()))
.trim());
}
}