/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.avro; import org.apache.avro.Schema; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumWriter; import org.apache.nifi.stream.io.ByteArrayOutputStream; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Test; import java.io.File; import java.io.IOException; import java.util.Arrays; public class TestExtractAvroMetadata { static final String AVRO_SCHEMA_ATTR = "avro.schema"; static final String AVRO_CODEC_ATTR = "avro.codec"; @Test public void testDefaultExtraction() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithOneUser(schema); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR, "b2d1d8d3de2833ce"); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.RECORD.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "User"); flowFile.assertAttributeNotExists(AVRO_SCHEMA_ATTR); flowFile.assertAttributeNotExists(ExtractAvroMetadata.ITEM_COUNT_ATTR); } @Test public void testExtractionWithItemCount() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.COUNT_ITEMS, "true"); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithMultipleUsers(schema, 6000); // creates 2 blocks runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals(ExtractAvroMetadata.ITEM_COUNT_ATTR, "6000"); } @Test public void testExtractionWithZeroUsers() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.COUNT_ITEMS, "true"); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithMultipleUsers(schema, 0); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR, "b2d1d8d3de2833ce"); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.RECORD.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "User"); flowFile.assertAttributeEquals(ExtractAvroMetadata.ITEM_COUNT_ATTR, "0"); } @Test public void testExtractionWithMD5() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.FINGERPRINT_ALGORITHM, ExtractAvroMetadata.MD5); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithOneUser(schema); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR, "3c6a7bee8994be20314dd28c6a3af4f2"); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.RECORD.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "User"); flowFile.assertAttributeNotExists(AVRO_SCHEMA_ATTR); } @Test public void testExtractionWithSHA256() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.FINGERPRINT_ALGORITHM, ExtractAvroMetadata.SHA_256); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithOneUser(schema); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR, "683f8f51ecd208038f4f0d39820ee9dd0ef3e32a3bee9371de0a2016d501b113"); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.RECORD.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "User"); flowFile.assertAttributeNotExists(AVRO_SCHEMA_ATTR); } @Test public void testExtractionWithMetadataKey() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_SCHEMA_ATTR); // test dynamic attribute avro.schema final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithOneUser(schema); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeExists(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.RECORD.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "User"); flowFile.assertAttributeEquals(AVRO_SCHEMA_ATTR, schema.toString()); } @Test public void testExtractionWithMetadataKeysWhitespace() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, "foo, bar, " + AVRO_SCHEMA_ATTR); // test dynamic attribute avro.schema final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final ByteArrayOutputStream out = getOutputStreamWithOneUser(schema); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeExists(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.RECORD.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "User"); flowFile.assertAttributeEquals(AVRO_SCHEMA_ATTR, schema.toString()); } @Test public void testExtractionWithNonRecordSchema() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.COUNT_ITEMS, "true"); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc")); final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three")); final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out = new ByteArrayOutputStream(); final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, out); dataFileWriter.append(data); dataFileWriter.append(data); dataFileWriter.close(); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeExists(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.ARRAY.getName()); flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "array"); flowFile.assertAttributeEquals(ExtractAvroMetadata.ITEM_COUNT_ATTR, "2"); // number of arrays, not elements } @Test public void testExtractionWithCodec() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_CODEC_ATTR); // test dynamic attribute avro.codec final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc")); final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three")); final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out = new ByteArrayOutputStream(); final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(CodecFactory.deflateCodec(1)); dataFileWriter.create(schema, out); dataFileWriter.append(data); dataFileWriter.close(); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals("avro.codec", "deflate"); } @Test public void testExtractionWithBadInput() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); final ByteArrayOutputStream out = new ByteArrayOutputStream(); out.write("not avro".getBytes("UTF-8")); out.flush(); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_FAILURE, 1); } private ByteArrayOutputStream getOutputStreamWithOneUser(Schema schema) throws IOException { final GenericRecord user = new GenericData.Record(schema); user.put("name", "Alyssa"); user.put("favorite_number", 256); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); return AvroTestUtil.serializeAvroRecord(schema, datumWriter, user); } private ByteArrayOutputStream getOutputStreamWithMultipleUsers(Schema schema, int numUsers) throws IOException { final GenericRecord[] users = new GenericRecord[numUsers]; for (int i=0; i < numUsers; i++) { final GenericRecord user = new GenericData.Record(schema); user.put("name", "user" + i); user.put("favorite_number", i); users[i] = user; } final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); return AvroTestUtil.serializeAvroRecord(schema, datumWriter, users); } }