/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.media; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.Relationship; import org.apache.nifi.provenance.ProvenanceEventRecord; import org.apache.nifi.provenance.ProvenanceEventType; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Test; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class TestExtractMediaMetadata { @Test public void testProperties() { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); ProcessContext context = runner.getProcessContext(); Map<PropertyDescriptor, String> propertyValues = context.getProperties(); assertEquals(4, propertyValues.size()); } @Test public void testRelationships() { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); ProcessContext context = runner.getProcessContext(); Set<Relationship> relationships = context.getAvailableRelationships(); assertEquals(2, relationships.size()); assertTrue(relationships.contains(ExtractMediaMetadata.SUCCESS)); assertTrue(relationships.contains(ExtractMediaMetadata.FAILURE)); } @Test public void testTextBytes() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "test1.txt"); runner.enqueue("test1".getBytes(), attrs); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "test1.txt"); flowFile0.assertAttributeExists("txt.Content-Type"); assertTrue(flowFile0.getAttribute("txt.Content-Type").startsWith("text/plain")); flowFile0.assertAttributeExists("txt.X-Parsed-By"); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser")); flowFile0.assertAttributeExists("txt.Content-Encoding"); flowFile0.assertAttributeEquals("txt.Content-Encoding", "ISO-8859-1"); flowFile0.assertContentEquals("test1".getBytes("UTF-8")); } @Test public void testProvenance() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "test1.txt"); runner.enqueue("test1".getBytes(), attrs); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<ProvenanceEventRecord> events = runner.getProvenanceEvents(); assertEquals(1, events.size()); final ProvenanceEventRecord event = events.get(0); assertEquals(ExtractMediaMetadata.class.getSimpleName(), event.getComponentType()); assertEquals("media attributes extracted", event.getDetails()); assertEquals(ProvenanceEventType.ATTRIBUTES_MODIFIED, event.getEventType()); } @Test public void testNoFlowFile() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 0); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); } @Test public void testTextFile() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.enqueue(new File("target/test-classes/textFile.txt").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "textFile.txt"); flowFile0.assertAttributeExists("txt.Content-Type"); assertTrue(flowFile0.getAttribute("txt.Content-Type").startsWith("text/plain")); flowFile0.assertAttributeExists("txt.X-Parsed-By"); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser")); flowFile0.assertAttributeExists("txt.Content-Encoding"); flowFile0.assertAttributeEquals("txt.Content-Encoding", "ISO-8859-1"); flowFile0.assertContentEquals("This file is not an image and is used for testing the image metadata extractor.".getBytes("UTF-8")); } @Test public void testBigTextFile() throws IOException { File textFile = new File("target/test-classes/textFileBig.txt"); final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.enqueue(textFile.toPath()); runner.run(2); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "textFileBig.txt"); flowFile0.assertAttributeExists("txt.Content-Type"); assertTrue(flowFile0.getAttribute("txt.Content-Type").startsWith("text/plain")); flowFile0.assertAttributeExists("txt.X-Parsed-By"); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser")); flowFile0.assertAttributeExists("txt.Content-Encoding"); flowFile0.assertAttributeEquals("txt.Content-Encoding", "ISO-8859-1"); assertEquals(flowFile0.getSize(), textFile.length()); } @Test public void testJunkBytes() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "junk."); runner.assertValid(); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "junk"); Random random = new Random(); byte[] bytes = new byte[2048]; random.nextBytes(bytes); runner.enqueue(bytes, attrs); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "junk"); flowFile0.assertAttributeExists("junk.Content-Type"); assertTrue(flowFile0.getAttribute("junk.Content-Type").startsWith("application/octet-stream")); flowFile0.assertAttributeExists("junk.X-Parsed-By"); assertTrue(flowFile0.getAttribute("junk.X-Parsed-By").contains("org.apache.tika.parser.EmptyParser")); flowFile0.assertContentEquals(bytes); } @Test public void testMetadataKeyFilter() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, "(X-Parsed.*)"); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.enqueue(new File("target/test-classes/textFile.txt").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "textFile.txt"); flowFile0.assertAttributeExists("txt.X-Parsed-By"); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser")); flowFile0.assertAttributeNotExists("txt.Content-Encoding"); } @Test public void testMetadataKeyPrefix() throws IOException { TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.assertValid(); runner.enqueue(new File("target/test-classes/textFile.txt").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeExists("X-Parsed-By"); runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.enqueue(new File("target/test-classes/textFile.txt").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeExists("txt.X-Parsed-By"); } @Test public void testMaxAttributes() throws IOException { TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.enqueue(new File("target/test-classes/textFile.txt").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); List<MockFlowFile> successFiles0 = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles0.get(0); int fileAttrCount0 = 0; for (Map.Entry attr : flowFile0.getAttributes().entrySet()) { if (attr.getKey().toString().startsWith("txt.")) { fileAttrCount0++; } } assertTrue(fileAttrCount0 > 1); runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.MAX_NUMBER_OF_ATTRIBUTES, Integer.toString(fileAttrCount0 - 1)); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "txt."); runner.assertValid(); runner.enqueue(new File("target/test-classes/textFile.txt").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile1 = successFiles.get(0); int fileAttrCount1 = 0; for (Map.Entry attr : flowFile1.getAttributes().entrySet()) { if (attr.getKey().toString().startsWith("txt.")) { fileAttrCount1++; } } assertEquals(fileAttrCount0, fileAttrCount1 + 1); } @Test public void testBmp() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "bmp."); runner.assertValid(); runner.enqueue(new File("target/test-classes/16color-10x10.bmp").toPath()); runner.run(2); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "16color-10x10.bmp"); flowFile0.assertAttributeExists("bmp.Content-Type"); flowFile0.assertAttributeEquals("bmp.Content-Type", "image/x-ms-bmp"); flowFile0.assertAttributeExists("bmp.X-Parsed-By"); assertTrue(flowFile0.getAttribute("bmp.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); // assertTrue(flowFile0.getAttribute("bmp.X-Parsed-By").contains("org.apache.tika.parser.image.ImageParser")); flowFile0.assertAttributeExists("bmp.height"); flowFile0.assertAttributeEquals("bmp.height", "10"); flowFile0.assertAttributeExists("bmp.width"); flowFile0.assertAttributeEquals("bmp.width", "10"); } @Test public void testJpg() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "jpg."); runner.assertValid(); runner.enqueue(new File("target/test-classes/simple.jpg").toPath()); runner.run(2); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "simple.jpg"); flowFile0.assertAttributeExists("jpg.Content-Type"); flowFile0.assertAttributeEquals("jpg.Content-Type", "image/jpeg"); flowFile0.assertAttributeExists("jpg.X-Parsed-By"); assertTrue(flowFile0.getAttribute("jpg.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); // assertTrue(flowFile0.getAttribute("jpg.X-Parsed-By").contains("org.apache.tika.parser.jpeg.JpegParser")); } @Test public void testWav() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "wav."); runner.assertValid(); runner.enqueue(new File("target/test-classes/testWAV.wav").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "testWAV.wav"); flowFile0.assertAttributeExists("wav.Content-Type"); assertTrue(flowFile0.getAttribute("wav.Content-Type").startsWith("audio/x-wav")); flowFile0.assertAttributeExists("wav.X-Parsed-By"); assertTrue(flowFile0.getAttribute("wav.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("wav.X-Parsed-By").contains("org.apache.tika.parser.audio.AudioParser")); flowFile0.assertAttributeExists("wav.encoding"); flowFile0.assertAttributeEquals("wav.encoding", "PCM_SIGNED"); } @Test public void testOgg() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "ogg."); runner.assertValid(); runner.enqueue(new File("target/test-classes/testVORBIS.ogg").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "testVORBIS.ogg"); flowFile0.assertAttributeExists("ogg.Content-Type"); assertTrue(flowFile0.getAttribute("ogg.Content-Type").startsWith("audio/vorbis")); flowFile0.assertAttributeExists("ogg.X-Parsed-By"); assertTrue(flowFile0.getAttribute("ogg.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("ogg.X-Parsed-By").contains("org.gagravarr.tika.VorbisParser")); } @Test public void testOggCorruptFails() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "ogg."); runner.assertValid(); runner.enqueue(new File("target/test-classes/testVORBIS-corrupt.ogg").toPath()); runner.run(2); runner.assertTransferCount(ExtractMediaMetadata.SUCCESS, 0); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 1); final List<MockFlowFile> failureFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.FAILURE); MockFlowFile flowFile0 = failureFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "testVORBIS-corrupt.ogg"); flowFile0.assertAttributeNotExists("ogg.Content-Type"); flowFile0.assertAttributeNotExists("ogg.X-Parsed-By"); } @Test public void testMp3() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractMediaMetadata()); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, ""); runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX, "mp3."); runner.assertValid(); runner.enqueue(new File("target/test-classes/testMP3id3v1.mp3").toPath()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS, 1); runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS); MockFlowFile flowFile0 = successFiles.get(0); flowFile0.assertAttributeExists("filename"); flowFile0.assertAttributeEquals("filename", "testMP3id3v1.mp3"); flowFile0.assertAttributeExists("mp3.Content-Type"); assertTrue(flowFile0.getAttribute("mp3.Content-Type").startsWith("audio/mpeg")); flowFile0.assertAttributeExists("mp3.X-Parsed-By"); assertTrue(flowFile0.getAttribute("mp3.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser")); assertTrue(flowFile0.getAttribute("mp3.X-Parsed-By").contains("org.apache.tika.parser.mp3.Mp3Parser")); flowFile0.assertAttributeExists("mp3.title"); flowFile0.assertAttributeEquals("mp3.title", "Test Title"); } }