package com.thinkbiganalytics.nifi.v2.ingest; /*- * #%L * thinkbig-nifi-core-processors * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.thinkbiganalytics.ingest.GetTableDataSupport; import com.thinkbiganalytics.nifi.core.api.metadata.KyloNiFiFlowProvider; import com.thinkbiganalytics.nifi.core.api.metadata.MetadataProvider; import com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService; import com.thinkbiganalytics.nifi.core.api.metadata.MetadataRecorder; import com.thinkbiganalytics.nifi.v2.common.CommonProperties; import com.thinkbiganalytics.util.ComponentAttributes; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.SeekableByteArrayInput; import org.apache.avro.file.SeekableInput; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.util.Utf8; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.controller.AbstractControllerService; import org.apache.nifi.dbcp.DBCPService; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.MockProcessContext; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.joda.time.DateTimeUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; import java.io.IOException; import java.sql.Connection; import java.sql.Date; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.sql.Time; import java.sql.Timestamp; import java.sql.Types; import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; import javax.annotation.Nonnull; public class GetTableDataTest { /** * Identifier for JDBC service */ private static final String JDBC_SERVICE_IDENTIFIER = "MockDBCPService"; /** * Identifier for Metadata service */ private static final String METADATA_SERVICE_IDENTIFIER = "MockMetadataProviderService"; /** * Test runner */ private final TestRunner runner = TestRunners.newTestRunner(GetTableData.class); /** * Initialize instance variables */ @Before public void setUp() throws Exception { // Setup services final DBCPService jdbcService = new MockDBCPService(); final MetadataProviderService metadataService = new MockMetadataService(); // Setup test runner runner.addControllerService(JDBC_SERVICE_IDENTIFIER, jdbcService); runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService); runner.enableControllerService(jdbcService); runner.enableControllerService(metadataService); runner.setProperty(GetTableData.JDBC_SERVICE, JDBC_SERVICE_IDENTIFIER); runner.setProperty(CommonProperties.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER); runner.setProperty(GetTableData.TABLE_NAME, "mytable"); runner.setProperty(GetTableData.TABLE_SPECS, "id\nfirst_name\nlast_name\nemail \n last_updated \n\n"); } /** * Reset current time */ @After public void tearDown() { DateTimeUtils.setCurrentMillisSystem(); } /** * Verify property validators */ @Test public void testValidators() { // Test with no properties runner.removeProperty(GetTableData.JDBC_SERVICE); runner.removeProperty(CommonProperties.METADATA_SERVICE); List<ValidationResult> results = (List<ValidationResult>) ((MockProcessContext) runner.getProcessContext()).validate(); if (results.size() != 2) { Assert.fail("Expected 2 validation errors but found: " + results); } Assert.assertEquals("'Source Database Connection' is invalid because Source Database Connection is required", results.get(0).toString()); Assert.assertEquals("'Metadata Service' is invalid because Metadata Service is required", results.get(1).toString()); // Test with valid properties runner.setProperty(GetTableData.JDBC_SERVICE, JDBC_SERVICE_IDENTIFIER); runner.setProperty(CommonProperties.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER); results = (List<ValidationResult>) ((MockProcessContext) runner.getProcessContext()).validate(); Assert.assertEquals(0, results.size()); } /** * Verify Avro output. */ @Test public void testAvro() throws IOException { // Trigger processor runner.setProperty(GetTableData.OUTPUT_TYPE, GetTableDataSupport.OutputType.AVRO.toString()); runner.enqueue(new byte[0]); runner.run(); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA).size()); Assert.assertEquals(1, flowFiles.size()); Assert.assertEquals("2", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT)); Assert.assertEquals("2", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key())); // Build Avro record reader final SeekableInput avroInput = new SeekableByteArrayInput(flowFiles.get(0).toByteArray()); final Schema schema = SchemaBuilder .record("NiFi_ExecuteSQL_Record").namespace("any.data") .fields() .name("id").type().nullable().intType().noDefault() .name("first_name").type().nullable().stringType().noDefault() .name("last_name").type().nullable().stringType().noDefault() .name("email").type().nullable().stringType().noDefault() .name("last_updated").type().nullable().stringType().noDefault() .endRecord(); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema); final DataFileReader<GenericRecord> dataReader = new DataFileReader<>(avroInput, datumReader); // Verify Avro records List<GenericRecord> records = StreamSupport.stream(dataReader.spliterator(), false).collect(Collectors.toList()); Assert.assertEquals(2, records.size()); Assert.assertEquals(1, records.get(0).get(0)); Assert.assertEquals(new Utf8("Mike"), records.get(0).get(1)); Assert.assertEquals(new Utf8("Hillyer"), records.get(0).get(2)); Assert.assertEquals(new Utf8("Mike.Hillyer@sakilastaff.com"), records.get(0).get(3)); Assert.assertEquals(new Utf8("2006-02-15T03:57:16.000Z"), records.get(0).get(4)); Assert.assertEquals(2, records.get(1).get(0)); Assert.assertEquals(new Utf8("Jon"), records.get(1).get(1)); Assert.assertEquals(new Utf8("Stephens"), records.get(1).get(2)); Assert.assertEquals(new Utf8("Jon.Stephens@sakilastaff.com"), records.get(1).get(3)); Assert.assertEquals(new Utf8("2006-02-15T03:57:16.000Z"), records.get(1).get(4)); } /** * Verify an incremental load from the database. */ @Test public void testIncremental() { // Test first load DateTimeUtils.setCurrentMillisFixed(1139979600000L); runner.setProperty(GetTableData.LOAD_STRATEGY, GetTableData.LoadStrategy.INCREMENTAL.toString()); runner.setProperty(GetTableData.DATE_FIELD, "last_updated"); runner.setProperty(GetTableData.OVERLAP_TIME, "5 minutes"); runner.enqueue(new byte[0], Collections.singletonMap(ComponentAttributes.HIGH_WATER_DATE.key(), null)); runner.run(); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA).size()); Assert.assertEquals(1, flowFiles.size()); Assert.assertEquals("3", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT)); Assert.assertEquals("3", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key())); Assert.assertEquals("2006-02-15T04:47:30", flowFiles.get(0).getAttribute(ComponentAttributes.HIGH_WATER_DATE.key())); flowFiles.get(0).assertContentEquals("id,first_name,last_name,email,last_updated\n" + "1,MARY,SMITH,MARY.SMITH@sakilacustomer.org,2006-02-15T04:32:21.000Z\n" + "2,PATRICIA,JOHNSON,PATRICIA.JOHNSON@sakilacustomer.org,2006-02-15T04:25:50.000Z\n" + "3,LINDA,WILLIAMS,LINDA.WILLIAMS@sakilacustomer.org,2006-02-15T04:47:30.000Z\n"); // Test second load DateTimeUtils.setCurrentMillisFixed(1139983200000L); runner.clearTransferState(); runner.enqueue(new byte[0], Collections.singletonMap(ComponentAttributes.HIGH_WATER_DATE.key(), "2006-02-15T04:47:30.000Z")); runner.run(); flowFiles = runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA).size()); Assert.assertEquals(1, flowFiles.size()); Assert.assertEquals("3", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT)); Assert.assertEquals("3", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key())); Assert.assertEquals("2006-02-15T05:15:33", flowFiles.get(0).getAttribute(ComponentAttributes.HIGH_WATER_DATE.key())); flowFiles.get(0).assertContentEquals("id,first_name,last_name,email,last_updated\n" + "3,LINDA,WILLIAMS,LINDA.WILLIAMS@sakilacustomer.org,2006-02-15T04:47:30.000Z\n" + "4,BARBARA,JONES,BARBARA.JONES@sakilacustomer.org,2006-02-15T04:57:14.000Z\n" + "5,ELIZABETH,BROWN,ELIZABETH.BROWN@sakilacustomer.org,2006-02-15T05:15:33.000Z\n"); } /** * Verify a full load from the database. */ @Test public void testFullLoad() { runner.enqueue(new byte[0]); runner.run(); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA).size()); Assert.assertEquals(1, flowFiles.size()); Assert.assertEquals("2", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT)); Assert.assertEquals("2", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key())); flowFiles.get(0).assertContentEquals("id,first_name,last_name,email,last_updated\n" + "1,Mike,Hillyer,Mike.Hillyer@sakilastaff.com,2006-02-15T03:57:16.000Z\n" + "2,Jon,Stephens,Jon.Stephens@sakilastaff.com,2006-02-15T03:57:16.000Z\n"); } /** * Verify output for no data. */ @Test public void testNoData() { runner.setProperty(GetTableData.TABLE_NAME, "empty"); runner.setProperty(GetTableData.TABLE_SPECS, "id\nemail"); runner.enqueue(new byte[0]); runner.run(); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS).size()); Assert.assertEquals(1, flowFiles.size()); Assert.assertEquals("0", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT)); Assert.assertEquals("0", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key())); flowFiles.get(0).assertContentEquals("id,email\n"); } /** * Verify using a custom output delimiter. */ @Test public void testOutputDelimiter() { runner.setProperty(GetTableData.OUTPUT_DELIMITER, "|"); runner.enqueue(new byte[0]); runner.run(); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS); Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA).size()); Assert.assertEquals(1, flowFiles.size()); Assert.assertEquals("2", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT)); Assert.assertEquals("2", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key())); flowFiles.get(0).assertContentEquals("id|first_name|last_name|email|last_updated\n" + "1|Mike|Hillyer|Mike.Hillyer@sakilastaff.com|2006-02-15T03:57:16.000Z\n" + "2|Jon|Stephens|Jon.Stephens@sakilastaff.com|2006-02-15T03:57:16.000Z\n"); } /** * A mock implementation of {@link DBCPService} for unit testing. */ private class MockDBCPService extends AbstractControllerService implements DBCPService { /** * A SQL connection */ final Connection connection = Mockito.mock(Connection.class); /** * A SQL statement */ final Statement statement = Mockito.mock(Statement.class); /** * Constructs a {@code MockDBCPService}. */ MockDBCPService() throws Exception { Mockito.when(connection.createStatement()).thenReturn(statement); Mockito.when(connection.prepareStatement("select id,first_name,last_name,email,last_updated from mytable WHERE last_updated > ? and last_updated < ?")) .then(invocation -> getIncrementalResults()); Mockito.when(statement.executeQuery("SELECT id,email FROM empty")).then(invocation -> getEmptyResults()); Mockito.when(statement.executeQuery("SELECT id,first_name,last_name,email,last_updated FROM mytable")).then(invocation -> getSimpleResults()); } @Override public Connection getConnection() { return connection; } /** * Creates a {@link ResultSet} for the specified metadata and rows. * * @param metadata the result set metadata * @param rows the rows in the result set * @return a new result set * @throws SQLException never */ ResultSet getResultSet(@Nonnull final ResultSetMetaData metadata, @Nonnull final Object[][] rows) throws SQLException { final LinkedList<Object[]> queue = new LinkedList<>(Arrays.asList(rows)); queue.push(new Object[0]); final ResultSet results = Mockito.mock(ResultSet.class); Mockito.when(results.getMetaData()).thenReturn(metadata); Mockito.when(results.next()).then(invocation -> { queue.poll(); return !queue.isEmpty(); }); Mockito.when(results.getDate(Mockito.anyInt())).then(invocation -> { final int index = invocation.getArgumentAt(0, Integer.class) - 1; final Object object = queue.peek()[index]; if (object instanceof Date) { return object; } else { throw new SQLException("Not a Date: " + object); } }); Mockito.when(results.getObject(Mockito.anyInt())).then(invocation -> { final int index = invocation.getArgumentAt(0, Integer.class) - 1; return queue.peek()[index]; }); Mockito.when(results.getString(Mockito.anyInt())).then(invocation -> { final int index = invocation.getArgumentAt(0, Integer.class) - 1; return queue.peek()[index].toString(); }); Mockito.when(results.getTime(Mockito.anyInt())).then(invocation -> { final int index = invocation.getArgumentAt(0, Integer.class) - 1; final Object object = queue.peek()[index]; if (object instanceof Time) { return object; } else { throw new SQLException("Not a Time: " + object); } }); Mockito.when(results.getTimestamp(Mockito.anyInt())).then(invocation -> { final int index = invocation.getArgumentAt(0, Integer.class) - 1; final Object object = queue.peek()[index]; if (object instanceof Timestamp) { return object; } else { throw new SQLException("Not a Timestamp: " + object); } }); return results; } /** * Creates an empty result set. * * @return a new result set * @throws SQLException never */ ResultSet getEmptyResults() throws SQLException { final ResultSetMetaData metadata = Mockito.mock(ResultSetMetaData.class); Mockito.when(metadata.getColumnCount()).thenReturn(2); Mockito.when(metadata.getColumnName(1)).thenReturn("id"); Mockito.when(metadata.getColumnName(2)).thenReturn("email"); Mockito.when(metadata.getColumnType(1)).thenReturn(Types.TINYINT); Mockito.when(metadata.getColumnType(2)).thenReturn(Types.VARCHAR); return getResultSet(metadata, new Object[0][]); } /** * Creates a prepared statement for incremental results. * * @return a new prepared statement * @throws SQLException never */ PreparedStatement getIncrementalResults() throws SQLException { final ResultSetMetaData metadata = Mockito.mock(ResultSetMetaData.class); Mockito.when(metadata.getColumnCount()).thenReturn(5); Mockito.when(metadata.getColumnName(1)).thenReturn("id"); Mockito.when(metadata.getColumnName(2)).thenReturn("first_name"); Mockito.when(metadata.getColumnName(3)).thenReturn("last_name"); Mockito.when(metadata.getColumnName(4)).thenReturn("email"); Mockito.when(metadata.getColumnName(5)).thenReturn("last_updated"); Mockito.when(metadata.getColumnType(1)).thenReturn(Types.TINYINT); Mockito.when(metadata.getColumnType(2)).thenReturn(Types.VARCHAR); Mockito.when(metadata.getColumnType(3)).thenReturn(Types.VARCHAR); Mockito.when(metadata.getColumnType(4)).thenReturn(Types.VARCHAR); Mockito.when(metadata.getColumnType(5)).thenReturn(Types.TIMESTAMP); final Object[][] rows = new Object[][]{ new Object[]{1, "MARY", "SMITH", "MARY.SMITH@sakilacustomer.org", new Timestamp(1139977941000L)}, new Object[]{2, "PATRICIA", "JOHNSON", "PATRICIA.JOHNSON@sakilacustomer.org", new Timestamp(1139977550000L)}, new Object[]{3, "LINDA", "WILLIAMS", "LINDA.WILLIAMS@sakilacustomer.org", new Timestamp(1139978850000L)}, new Object[]{4, "BARBARA", "JONES", "BARBARA.JONES@sakilacustomer.org", new Timestamp(1139979434000L)}, new Object[]{5, "ELIZABETH", "BROWN", "ELIZABETH.BROWN@sakilacustomer.org", new Timestamp(1139980533000L)}, }; final AtomicReference<Timestamp> before = new AtomicReference<>(); final AtomicReference<Timestamp> after = new AtomicReference<>(); final PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(preparedStatement.executeQuery()).then(invocation -> { final Object[][] selectedRows = Stream.of(rows).filter(row -> ((Timestamp) row[4]).after(after.get()) && ((Timestamp) row[4]).before(before.get())).toArray(Object[][]::new); return getResultSet(metadata, selectedRows); }); Mockito.doAnswer(invocation -> { after.set(invocation.getArgumentAt(1, Timestamp.class)); return after.get(); }).when(preparedStatement).setTimestamp(Mockito.eq(1), Mockito.any()); Mockito.doAnswer(invocation -> { before.set(invocation.getArgumentAt(1, Timestamp.class)); return before.get(); }).when(preparedStatement).setTimestamp(Mockito.eq(2), Mockito.any()); return preparedStatement; } /** * Creates a simple result set. * * @return a new result set * @throws SQLException never */ ResultSet getSimpleResults() throws SQLException { final ResultSetMetaData metadata = Mockito.mock(ResultSetMetaData.class); Mockito.when(metadata.getColumnCount()).thenReturn(5); Mockito.when(metadata.getColumnName(1)).thenReturn("id"); Mockito.when(metadata.getColumnName(2)).thenReturn("first_name"); Mockito.when(metadata.getColumnName(3)).thenReturn("last_name"); Mockito.when(metadata.getColumnName(4)).thenReturn("email"); Mockito.when(metadata.getColumnName(5)).thenReturn("last_updated"); Mockito.when(metadata.getColumnType(1)).thenReturn(Types.TINYINT); Mockito.when(metadata.getColumnType(2)).thenReturn(Types.VARCHAR); Mockito.when(metadata.getColumnType(3)).thenReturn(Types.VARCHAR); Mockito.when(metadata.getColumnType(4)).thenReturn(Types.VARCHAR); Mockito.when(metadata.getColumnType(5)).thenReturn(Types.TIMESTAMP); Mockito.when(metadata.getTableName(Mockito.anyInt())).thenReturn("mytable"); final Object[][] rows = new Object[][]{ new Object[]{1, "Mike", "Hillyer", "Mike.Hillyer@sakilastaff.com", new Timestamp(1139975836000L)}, new Object[]{2, "Jon", "Stephens", "Jon.Stephens@sakilastaff.com", new Timestamp(1139975836000L)} }; return getResultSet(metadata, rows); } } /** * A mock implementation of {@link MetadataProviderService} for unit testing. */ private class MockMetadataService extends AbstractControllerService implements MetadataProviderService { @Override public KyloNiFiFlowProvider getKyloNiFiFlowProvider() { throw new UnsupportedOperationException(); } @Override public MetadataProvider getProvider() { throw new UnsupportedOperationException(); } @Override public MetadataRecorder getRecorder() { throw new UnsupportedOperationException(); } } }