/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.hadoop.inotify; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSInotifyEventInputStream; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.client.HdfsAdmin; import org.apache.hadoop.hdfs.inotify.Event; import org.apache.hadoop.hdfs.inotify.EventBatch; import org.apache.nifi.components.state.Scope; import org.apache.nifi.hadoop.KerberosProperties; import org.apache.nifi.processors.hadoop.inotify.util.EventTestUtils; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.NiFiProperties; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import java.io.File; import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class TestGetHDFSEvents { NiFiProperties mockNiFiProperties; KerberosProperties kerberosProperties; DFSInotifyEventInputStream inotifyEventInputStream; HdfsAdmin hdfsAdmin; @Rule public ExpectedException exception = ExpectedException.none(); @Before public void setup() { mockNiFiProperties = mock(NiFiProperties.class); when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null); kerberosProperties = new KerberosProperties(null); inotifyEventInputStream = mock(DFSInotifyEventInputStream.class); hdfsAdmin = mock(HdfsAdmin.class); } @Test public void notSettingHdfsPathToWatchShouldThrowError() throws Exception { exception.expect(AssertionError.class); exception.expectMessage("'HDFS Path to Watch' is invalid because HDFS Path to Watch is required"); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second"); runner.run(); } @Test public void onTriggerShouldProperlyHandleAnEmptyEventBatch() throws Exception { EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(new Event[]{}); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second"); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path"); runner.setProperty(GetHDFSEvents.NUMBER_OF_RETRIES_FOR_POLL, "5"); runner.run(); List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(0, successfulFlowFiles.size()); verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); } @Test public void onTriggerShouldProperlyHandleANullEventBatch() throws Exception { when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(null); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second"); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path${now()}"); runner.run(); List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(0, successfulFlowFiles.size()); assertEquals("-1", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); } @Test public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception { Event[] events = getEvents(); EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second"); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*"); runner.run(); List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(3, successfulFlowFiles.size()); verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); } @Test public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception { Event[] events = getEvents(); EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?"); runner.run(); List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(1, successfulFlowFiles.size()); verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); } @Test public void eventsProcessorShouldProperlyFilterEventTypes() throws Exception { Event[] events = getEvents(); EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/.*)?"); runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create, metadata"); runner.run(); List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(2, successfulFlowFiles.size()); List<String> expectedEventTypes = Arrays.asList("CREATE", "METADATA"); for (MockFlowFile f : successfulFlowFiles) { String eventType = f.getAttribute(EventAttributes.EVENT_TYPE); assertTrue(expectedEventTypes.contains(eventType)); } verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); } @Test public void makeSureExpressionLanguageIsWorkingProperlyWithinTheHdfsPathToWatch() throws Exception { Event[] events = new Event[] { new Event.CreateEvent.Builder().path("/some/path/1/2/3/t.txt").build(), new Event.CreateEvent.Builder().path("/some/path/1/2/4/t.txt").build(), new Event.CreateEvent.Builder().path("/some/path/1/2/3/.t.txt").build() }; EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/${literal(1)}/${literal(2)}/${literal(3)}/.*.txt"); runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create"); runner.setProperty(GetHDFSEvents.IGNORE_HIDDEN_FILES, "true"); runner.run(); List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(1, successfulFlowFiles.size()); for (MockFlowFile f : successfulFlowFiles) { String eventType = f.getAttribute(EventAttributes.EVENT_TYPE); assertTrue(eventType.equals("CREATE")); } verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); } private Event[] getEvents() { return new Event[]{ EventTestUtils.createCreateEvent(), EventTestUtils.createCloseEvent(), EventTestUtils.createMetadataUpdateEvent() }; } private class TestableGetHDFSEvents extends GetHDFSEvents { private final KerberosProperties testKerberosProperties; private final FileSystem fileSystem = new DistributedFileSystem(); private final HdfsAdmin hdfsAdmin; TestableGetHDFSEvents(KerberosProperties testKerberosProperties, HdfsAdmin hdfsAdmin) { this.testKerberosProperties = testKerberosProperties; this.hdfsAdmin = hdfsAdmin; } @Override protected FileSystem getFileSystem() { return fileSystem; } @Override protected KerberosProperties getKerberosProperties(File kerberosConfigFile) { return testKerberosProperties; } @Override protected HdfsAdmin getHdfsAdmin() { return hdfsAdmin; } } }