package com.thinkbiganalytics.nifi.v2.hdfs; /*- * #%L * thinkbig-nifi-hadoop-processors * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.google.gson.Gson; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.MD5Hash; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.util.MockProcessContext; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.InOrder; import org.mockito.Mockito; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Collection; import javax.annotation.Nonnull; import javax.annotation.Nullable; import static org.mockito.Matchers.any; public class ComputeHDFSChecksumsTest { /** * Mock file system */ private final FileSystem fileSystem = Mockito.mock(FileSystem.class); /** * Test runner */ private final TestRunner runner = TestRunners.newTestRunner(new TestableComputeHDFSChecksums()); private final String fileEntry = "{\n" + "\"name\": \"%s\",\n" + "\"size\": 131665,\n" + "\"checksum\": {\n" + "\"length\": 28,\n" + "\"value\": \"%s\",\n" + "\"algorithm\": \"MD5-of-0MD5-of-512CRC32C\"\n" + "}\n" + "}"; /** * Initialize instance variables */ @Before public void setUp() throws Exception { // Setup mock file system Mockito.when(fileSystem.delete(any(Path.class), Mockito.eq(true))).thenReturn(true); // Setup test runner runner.setValidateExpressionUsage(false); } /** * Verify required properties. */ @Test public void testValidators() { String files = ComputeHDFSChecksums.FILES.getName(); // Test with no properties Collection<ValidationResult> results = validate(runner); Assert.assertEquals(1, results.size()); results.forEach((ValidationResult result) -> Assert.assertEquals( String.format("'%s' is invalid because %s is required", files, files), result.toString())); // Test with required properties present runner.setProperty(ComputeHDFSChecksums.FILES, "[]"); results = validate(runner); Assert.assertEquals(0, results.size()); // Test with additional property DIRECTORY set runner.setProperty(ComputeHDFSChecksums.DIRECTORY, "/dropzone"); results = validate(runner); Assert.assertEquals(0, results.size()); } @Test public void testFilesListAttributeNotJSON() { runner.setProperty(ComputeHDFSChecksums.FILES, "a"); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verifyNoMoreInteractions(); } @Test public void testFileListAttributeNotArray() { runner.setProperty(ComputeHDFSChecksums.FILES, "{}"); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verifyNoMoreInteractions(); } @Test public void testFileListAttributeEmtpy() { runner.setProperty(ComputeHDFSChecksums.FILES, ""); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verifyNoMoreInteractions(); } @Test public void testFileListAttributeEmptyArray() { runner.setProperty(ComputeHDFSChecksums.FILES, "[]"); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verifyNoMoreInteractions(); } @Test public void testSingleFileInListDontFailOnWrongChecksum() throws Exception { String fileName = "000000_0"; Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))). when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "False"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA==")); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check whether checksum was passed correctly to attributes String filesJSON = runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).get(0). getAttribute("files"); Gson jsonParser = new Gson(); ComputeHDFSChecksums.File[] files = jsonParser.fromJson(filesJSON, ComputeHDFSChecksums.File[].class); Assert.assertEquals(files[0].getComputedChecksum().getValue(), "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA="); // Check file system calls verifyGetFileChecksumCall(fileName); } @Test public void testSingleFileInListFailOnWrongChecksum() throws Exception { String fileName = "000000_0"; Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))). when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA==")); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls verifyGetFileChecksumCall(fileName); } @Test public void testSingleFileProperChecksum() throws Exception { String fileName = "000000_0"; Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))). when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=")); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls verifyGetFileChecksumCall(fileName); } @Test public void testMultipleFilesFailOnSingleWrongChecksum() throws Exception { String fileName = "000000_0"; String fileName2 = "000000_1"; String fileName3 = "000000_2"; Mockito.when(fileSystem.getFileChecksum(any(Path.class))). thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02"))); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName3, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA=")); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verify(fileSystem).getFileChecksum(new Path(fileName)); inOrder.verify(fileSystem).getFileChecksum(new Path(fileName2)); inOrder.verifyNoMoreInteractions(); } @Test public void testMultipleFilesWithDirectoryDefined() throws Exception { String fileName = "000000_0"; String fileName2 = "000000_1"; String fileName3 = "000000_2"; String directory = "/dropzone"; Mockito.when(fileSystem.getFileChecksum(any(Path.class))). thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02"))); runner.setProperty(ComputeHDFSChecksums.DIRECTORY, directory); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AQAAAAA=", fileName3, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA=")); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName)); inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName2)); inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName3)); inOrder.verifyNoMoreInteractions(); } @Test public void testFileNotFoundException() throws Exception { String fileName = "000000_0"; Mockito.doThrow(new FileNotFoundException()).when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=")); runner.enqueue(new byte[0]); runner.run(); // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls verifyGetFileChecksumCall(fileName); } /** * Enqueues a {@code FlowFile} and validates its properties. * * @param runner the test runner * @return the validation results */ @Nonnull private Collection<ValidationResult> validate(@Nonnull final TestRunner runner) { runner.enqueue(new byte[0]); return ((MockProcessContext) runner.getProcessContext()).validate(); } private void verifyGetFileChecksumCall(String fileName) throws Exception { InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verify(fileSystem).getFileChecksum(new Path(fileName)); inOrder.verifyNoMoreInteractions(); } /** * A mock {@code ComputeHDFSChecksums} for testing. */ private class TestableComputeHDFSChecksums extends ComputeHDFSChecksums { @Nullable @Override protected FileSystem getFileSystem(@Nonnull ProcessContext context) { return fileSystem; } @Override HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context) throws IOException { return null; } } }