/* * ModeShape (http://www.modeshape.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.modeshape.sequencer.pdf; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertThat; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ATTACHMENT_NODE; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.AUTHOR; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.CREATION_DATE; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.CREATOR; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ENCRYPTED; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.KEYWORDS; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.MODIFICATION_DATE; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.NAME; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ORIENTATION; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_COUNT; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_NODE; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_NUMBER; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PRODUCER; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.SUBJECT; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.TITLE; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.VERSION; import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.XMP_NODE; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.BASE_URL; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.CREATE_DATE; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.CREATOR_TOOL; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.IDENTIFIER; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.LABEL; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.METADATA_DATE; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.MODIFY_DATE; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.NICKNAME; import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.RATING; import java.util.Calendar; import javax.jcr.Node; import org.junit.Test; import org.modeshape.jcr.api.JcrConstants; import org.modeshape.jcr.sequencer.AbstractSequencerTest; /** * Unit test for {@link PdfMetadataSequencer}. */ public class PdfMetadataSequencerTest extends AbstractSequencerTest { @Test public void shouldSequenceBasicMetadata() throws Exception { // GIVEN String filename = "sample.pdf"; // WHEN createNodeWithContentFromFile(filename, filename); // THEN Node sequencedNode = getOutputNode(rootNode, "sequenced/pdf/" + filename); assertThat(sequencedNode.getProperty(JcrConstants.JCR_MIME_TYPE).getString(), is("application/pdf")); assertThat(sequencedNode.getProperty(PAGE_COUNT).getLong(), is(2L)); assertThat(sequencedNode.getProperty(ORIENTATION).getString(), is("portrait")); assertThat(sequencedNode.getProperty(ENCRYPTED).getBoolean(), is(false)); assertThat(sequencedNode.getProperty(VERSION).getString(), is("1.4")); assertThat(sequencedNode.getProperty(AUTHOR).getString(), is("Author")); assertThat(sequencedNode.getProperty(CREATOR).getString(), is("Creator")); assertThat(sequencedNode.getProperty(KEYWORDS).getString(), is("Keywords")); assertThat(sequencedNode.getProperty(PRODUCER).getString(), is("Producer")); assertThat(sequencedNode.getProperty(SUBJECT).getString(), is("Subject")); assertThat(sequencedNode.getProperty(TITLE).getString(), is("Title")); Node pageNode = sequencedNode.getNode(PAGE_NODE); assertThat(pageNode.getProperty(PAGE_NUMBER).getLong(), is(1L)); } @Test public void shouldSequenceXMPMetadata() throws Exception { // GIVEN String filename = "sample.pdf"; // WHEN createNodeWithContentFromFile(filename, filename); // THEN Node sequencedNode = getOutputNode(rootNode, "sequenced/pdf/" + filename); assertThat(sequencedNode.hasNode(XMP_NODE), is(true)); Node xmpNode = sequencedNode.getNode(XMP_NODE); assertThat(xmpNode.getProperty(BASE_URL).getString(), is("BaseURL")); assertThat(xmpNode.getProperty(CREATOR_TOOL).getString(), is("Creator Tool")); assertThat(xmpNode.getProperty(RATING).getLong(), is(0L)); assertThat(xmpNode.getProperty(IDENTIFIER).getValues()[0].getString(), is("Identifier")); assertThat(xmpNode.getProperty(IDENTIFIER).getValues()[1].getString(), is("Second identifier")); assertThat(xmpNode.getProperty(CREATE_DATE).getDate().get(Calendar.YEAR), is(2000)); assertThat(xmpNode.getProperty(METADATA_DATE).getDate().get(Calendar.YEAR), is(2005)); assertThat(xmpNode.getProperty(MODIFY_DATE).getDate().get(Calendar.YEAR), is(2010)); assertThat(xmpNode.getProperty(NICKNAME).getString(), is("Nickname")); assertThat(xmpNode.getProperty(LABEL).getString(), is("Label")); } @Test public void shouldNotSequenceEncryptedPdf() throws Exception { // GIVEN String filename = "sample-encrypted.pdf"; // WHEN createNodeWithContentFromFile(filename, filename); // as of MODE-2648 and PdfBox 2.x encrypted PDFs are not parseable since the API and dependencies have changed // it turns out that prior to 2.x some basic metadata was still available; this is not the case anymore Thread.sleep(100); assertNoNode("/sequenced/pdf/" + filename); } @Test public void shouldSequencePDFWithAttachments() throws Exception { // GIVEN String filename = "attachments.pdf"; // WHEN createNodeWithContentFromFile(filename, filename); // THEN Node sequencedNode = getOutputNode(rootNode, "sequenced/pdf/" + filename); assertThat(sequencedNode.hasNode(PAGE_NODE), is(true)); assertThat(sequencedNode.hasNode(PAGE_NODE + "[2]"), is(true)); Node firstPageNode = sequencedNode.getNode(PAGE_NODE); assertThat(firstPageNode.getProperty(PAGE_NUMBER).getLong(), is(1L)); assertThat(firstPageNode.hasNode(ATTACHMENT_NODE), is(true)); Node firstAttachmentNode = firstPageNode.getNode(ATTACHMENT_NODE); assertThat(firstAttachmentNode.getProperty(NAME).getString(), is("redhat-icon.jpg")); assertThat(firstAttachmentNode.getProperty(SUBJECT).getString(), is("Subject")); assertThat(firstAttachmentNode.getProperty(CREATION_DATE).getDate().get(Calendar.YEAR), is(2016)); assertThat(firstAttachmentNode.getProperty(MODIFICATION_DATE).getDate().get(Calendar.YEAR), is(2016)); assertThat(firstAttachmentNode.getProperty(JcrConstants.JCR_MIME_TYPE).getString(), is("image/jpeg")); assertThat(firstAttachmentNode.getProperty(JcrConstants.JCR_DATA).getBinary().getSize(), is(820L)); Node secondPageNode = sequencedNode.getNode(PAGE_NODE + "[2]"); assertThat(secondPageNode.getProperty(PAGE_NUMBER).getLong(), is(2L)); assertThat(secondPageNode.hasNode(ATTACHMENT_NODE), is(true)); Node secondAttachmentNode = secondPageNode.getNode(ATTACHMENT_NODE); assertThat(secondAttachmentNode.getProperty(NAME).getString(), is("linux.mp3")); assertThat(secondAttachmentNode.getProperty(SUBJECT).getString(), is("Subject")); assertThat(secondAttachmentNode.getProperty(CREATION_DATE).getDate().get(Calendar.YEAR), is(2016)); assertThat(secondAttachmentNode.getProperty(MODIFICATION_DATE).getDate().get(Calendar.YEAR), is(2016)); assertThat(secondAttachmentNode.getProperty(JcrConstants.JCR_MIME_TYPE).getString(), is("audio/mpeg")); assertThat(secondAttachmentNode.getProperty(JcrConstants.JCR_DATA).getBinary().getSize(), is(82969L)); } }