package org.apache.tika.parser.crypto; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import java.io.InputStream; import java.util.Arrays; import java.util.List; import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.RecursiveParserWrapper; import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; import org.xml.sax.ContentHandler; public class TSDParserTest extends TikaTest { @Test public void testTSDFileData() throws Exception { try (InputStream inputXml = TSDParserTest.class.getResourceAsStream("/test-documents/MANIFEST.XML.TSD"); InputStream inputTxt1 = TSDParserTest.class.getResourceAsStream("/test-documents/Test1.txt.tsd"); InputStream inputTxt2 = TSDParserTest.class.getResourceAsStream("/test-documents/Test2.txt.tsd"); InputStream inputDocx = TSDParserTest.class.getResourceAsStream("/test-documents/Test3.docx.tsd"); InputStream inputPdf = TSDParserTest.class.getResourceAsStream("/test-documents/Test4.pdf.tsd"); InputStream inputPng = TSDParserTest.class.getResourceAsStream("/test-documents/Test5.PNG.tsd");) { TSDParser tsdParser = new TSDParser(); ContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); ParseContext parseContext = new ParseContext(); tsdParser.parse(inputXml, handler, metadata, parseContext); assertNotNull(handler); assertNotNull(metadata); assertContains("Description=Time Stamped Data Envelope", metadata.toString()); assertContains("Content-Type=application/timestamped-data", metadata.toString()); assertContains("File-Parsed=true", metadata.toString()); handler = new BodyContentHandler(); metadata = new Metadata(); parseContext = new ParseContext(); tsdParser.parse(inputTxt1, handler, metadata, parseContext); assertNotNull(handler); assertNotNull(metadata); assertContains("Description=Time Stamped Data Envelope", metadata.toString()); assertContains("Content-Type=application/timestamped-data", metadata.toString()); assertContains("File-Parsed=true", metadata.toString()); handler = new BodyContentHandler(); metadata = new Metadata(); parseContext = new ParseContext(); tsdParser.parse(inputTxt2, handler, metadata, parseContext); assertNotNull(handler); assertNotNull(metadata); assertContains("Description=Time Stamped Data Envelope", metadata.toString()); assertContains("Content-Type=application/timestamped-data", metadata.toString()); assertContains("File-Parsed=true", metadata.toString()); handler = new BodyContentHandler(); metadata = new Metadata(); parseContext = new ParseContext(); tsdParser.parse(inputDocx, handler, metadata, parseContext); assertNotNull(handler); assertNotNull(metadata); assertContains("Description=Time Stamped Data Envelope", metadata.toString()); assertContains("Content-Type=application/timestamped-data", metadata.toString()); assertContains("File-Parsed=true", metadata.toString()); handler = new BodyContentHandler(); metadata = new Metadata(); parseContext = new ParseContext(); tsdParser.parse(inputPdf, handler, metadata, parseContext); assertNotNull(handler); assertNotNull(metadata); assertContains("Description=Time Stamped Data Envelope", metadata.toString()); assertContains("Content-Type=application/timestamped-data", metadata.toString()); assertContains("File-Parsed=true", metadata.toString()); handler = new BodyContentHandler(); metadata = new Metadata(); parseContext = new ParseContext(); tsdParser.parse(inputPng, handler, metadata, parseContext); assertNotNull(handler); assertNotNull(metadata); assertContains("Description=Time Stamped Data Envelope", metadata.toString()); assertContains("Content-Type=application/timestamped-data", metadata.toString()); assertContains("File-Parsed=true", metadata.toString()); } } @Test public void testTSDFileDataRecursiveMetadataXML() throws Exception { List<Metadata> list = getRecursiveMetadata("MANIFEST.XML.TSD"); assertEquals(2, list.size()); assertContains(TSDParser.class.getName(), Arrays.asList(list.get(0).getValues("X-Parsed-By"))); } @Test public void testTSDFileDataRecursiveMetadataTxt1() throws Exception { List<Metadata> list = getRecursiveMetadata("Test1.txt.tsd"); assertEquals(2, list.size()); assertContains(TSDParser.class.getName(), Arrays.asList(list.get(0).getValues("X-Parsed-By"))); } @Test public void testTSDFileDataRecursiveMetadataTxt2() throws Exception { List<Metadata> list = getRecursiveMetadata("Test2.txt.tsd"); assertEquals(2, list.size()); assertContains(TSDParser.class.getName(), Arrays.asList(list.get(0).getValues("X-Parsed-By"))); } @Test public void testTSDFileDataRecursiveMetadataDocx() throws Exception { List<Metadata> list = getRecursiveMetadata("Test3.docx.tsd"); assertEquals(2, list.size()); assertContains(TSDParser.class.getName(), Arrays.asList(list.get(0).getValues("X-Parsed-By"))); } @Test public void testTSDFileDataRecursiveMetadataPdf() throws Exception { List<Metadata> list = getRecursiveMetadata("Test4.pdf.tsd"); assertEquals(2, list.size()); assertContains(TSDParser.class.getName(), Arrays.asList(list.get(0).getValues("X-Parsed-By"))); } //@Test public void testTSDFileDataRecursiveMetadataPng() throws Exception { List<Metadata> list = getRecursiveMetadata("Test5.PNG.tsd"); assertEquals(2, list.size()); assertContains(TSDParser.class.getName(), Arrays.asList(list.get(0).getValues("X-Parsed-By"))); } @Test public void testBrokenPdf() throws Exception { //make sure that embedded file appears in list //and make sure embedded exception is recorded List<Metadata> list = getRecursiveMetadata("testTSD_broken_pdf.tsd"); assertEquals(2, list.size()); assertEquals("application/pdf", list.get(1).get(Metadata.CONTENT_TYPE)); assertNotNull(list.get(1).get(RecursiveParserWrapper.EMBEDDED_EXCEPTION)); assertContains("org.apache.pdfbox.pdmodel.PDDocument.load", list.get(1).get(RecursiveParserWrapper.EMBEDDED_EXCEPTION)); } }