package com.thinkbiganalytics.nifi.v2.ingest;
/*-
* #%L
* thinkbig-nifi-core-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.google.common.collect.ImmutableMap;
import com.thinkbiganalytics.nifi.v2.thrift.ThriftService;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.util.MockProcessContext;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.InOrder;
import org.mockito.Mockito;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Collection;
public class RegisterFeedTablesTest {
/**
* Identifier for thrift service
*/
private static final String THRIFT_SERVICE_IDENTIFIER = "MockThriftService";
/**
* Test runner
*/
private final TestRunner runner = TestRunners.newTestRunner(RegisterFeedTables.class);
/**
* Mock thrift service
*/
private MockThriftService thriftService;
/**
* Initialize instance variables
*/
@Before
public void setUp() throws Exception {
// Setup thrift service
thriftService = new MockThriftService();
// Setup test runner
runner.addControllerService(THRIFT_SERVICE_IDENTIFIER, thriftService);
runner.enableControllerService(thriftService);
runner.setProperty(IngestProperties.THRIFT_SERVICE, THRIFT_SERVICE_IDENTIFIER);
}
/**
* Verify no properties are required.
*/
@Test
public void testValidators() {
runner.enqueue(new byte[0]);
Collection<ValidationResult> results = ((MockProcessContext) runner.getProcessContext()).validate();
Assert.assertEquals(0, results.size());
}
/**
* Verify registering tables.
*/
@Test
public void testRegisterTables() throws Exception {
// Test with only required properties
runner.setProperty(IngestProperties.FIELD_SPECIFICATION, "id|int\nfirst_name|string\nlast_name|string");
runner.enqueue(new byte[0], ImmutableMap.of("metadata.category.systemName", "movies", "metadata.systemFeedName", "artists"));
runner.run();
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
final InOrder inOrder = Mockito.inOrder(thriftService.statement);
inOrder.verify(thriftService.statement).execute("CREATE DATABASE IF NOT EXISTS `movies`");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("use `movies`");
inOrder.verify(thriftService.statement).executeQuery("show tables like 'artists*'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE EXTERNAL TABLE IF NOT EXISTS `movies`.`artists_feed` (`id` string, `first_name` string, `last_name` string) "
+ "PARTITIONED BY (`processing_dttm` string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE "
+ "LOCATION '/model.db/movies/artists/feed'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_invalid` (`id` string, `first_name` string, `last_name` string, dlp_reject_reason string "
+ ") PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/model.db/movies/artists/invalid'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_valid` (`id` int, `first_name` string, `last_name` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/model.db/movies/artists/valid'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists` (`id` int, `first_name` string, `last_name` string, processing_dttm string) STORED AS ORC "
+ "LOCATION '/app/warehouse/movies/artists'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_profile` ( `columnname` string,`metrictype` string,`metricvalue` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/model.db/movies/artists/profile'");
inOrder.verify(thriftService.statement).close();
inOrder.verifyNoMoreInteractions();
// Test with all properties
runner.setProperty(IngestProperties.PARTITION_SPECS, "year|int");
runner.setProperty(IngestProperties.FEED_FORMAT_SPECS, "ROW FORMAT DELIMITED LINES TERMINATED BY '\n' STORED AS TEXTFILE");
runner.setProperty(IngestProperties.TARGET_FORMAT_SPECS, "STORED AS PARQUET");
runner.setProperty(IngestProperties.TARGET_TBLPROPERTIES, "TBLPROPERTIES (\"comment\"=\"Movie Actors\")");
runner.enqueue(new byte[0], ImmutableMap.of("metadata.category.systemName", "movies", "metadata.systemFeedName", "artists"));
runner.run();
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(2, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
inOrder.verify(thriftService.statement).execute("CREATE DATABASE IF NOT EXISTS `movies`");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("use `movies`");
inOrder.verify(thriftService.statement).executeQuery("show tables like 'artists*'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE EXTERNAL TABLE IF NOT EXISTS `movies`.`artists_feed` (`id` string, `first_name` string, `last_name` string) "
+ "PARTITIONED BY (`processing_dttm` string) ROW FORMAT DELIMITED LINES TERMINATED BY '\n' STORED AS TEXTFILE "
+ "LOCATION '/model.db/movies/artists/feed'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_invalid` (`id` string, `first_name` string, `last_name` string, dlp_reject_reason string "
+ ") PARTITIONED BY (`processing_dttm` string) STORED AS PARQUET LOCATION '/model.db/movies/artists/invalid' "
+ "TBLPROPERTIES (\"comment\"=\"Movie Actors\")");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_valid` (`id` int, `first_name` string, `last_name` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS PARQUET LOCATION '/model.db/movies/artists/valid' "
+ "TBLPROPERTIES (\"comment\"=\"Movie Actors\")");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement)
.execute("CREATE TABLE IF NOT EXISTS `movies`.`artists` (`id` int, `first_name` string, `last_name` string, processing_dttm string) PARTITIONED BY (`year` int) "
+ "STORED AS PARQUET LOCATION '/app/warehouse/movies/artists' TBLPROPERTIES (\"comment\"=\"Movie Actors\")");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_profile` ( `columnname` string,`metrictype` string,`metricvalue` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS PARQUET LOCATION '/model.db/movies/artists/profile'");
inOrder.verify(thriftService.statement).close();
inOrder.verifyNoMoreInteractions();
}
/**
* Verify registering tables with some pre-existing.
*/
@Test
public void testRegisterTablesWithExisting() throws Exception {
// Mock 'show table' results
Mockito.when(thriftService.artistsTablesResults.next()).thenReturn(true);
Mockito.when(thriftService.artistsTablesResults.getString(1)).thenReturn("artists");
Mockito.when(thriftService.artistsTablesResults.next()).thenReturn(true);
Mockito.when(thriftService.artistsTablesResults.getString(1)).thenReturn("artists_valid");
Mockito.when(thriftService.artistsTablesResults.next()).thenReturn(true);
Mockito.when(thriftService.artistsTablesResults.getString(1)).thenReturn("artists_invalid");
Mockito.when(thriftService.artistsTablesResults.next()).thenReturn(false);
// Run flow
runner.setProperty(IngestProperties.FIELD_SPECIFICATION, "id|int\nfirst_name|string\nlast_name|string");
runner.enqueue(new byte[0], ImmutableMap.of("metadata.category.systemName", "movies", "metadata.systemFeedName", "artists"));
runner.run();
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
// Verify SQL
final InOrder inOrder = Mockito.inOrder(thriftService.statement);
inOrder.verify(thriftService.statement).execute("CREATE DATABASE IF NOT EXISTS `movies`");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("use `movies`");
inOrder.verify(thriftService.statement).executeQuery("show tables like 'artists*'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE EXTERNAL TABLE IF NOT EXISTS `movies`.`artists_feed` (`id` string, `first_name` string, `last_name` string) "
+ "PARTITIONED BY (`processing_dttm` string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE "
+ "LOCATION '/model.db/movies/artists/feed'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_profile` ( `columnname` string,`metrictype` string,`metricvalue` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/model.db/movies/artists/profile'");
inOrder.verify(thriftService.statement).close();
inOrder.verifyNoMoreInteractions();
}
/**
* Verify error for missing category name.
*/
@Test
public void testRegisterTablesWithMissingCategory() {
runner.setProperty(IngestProperties.FIELD_SPECIFICATION, "data|string");
runner.enqueue(new byte[0], ImmutableMap.of("metadata.systemFeedName", "artists"));
runner.run();
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
}
/**
* Verify error for missing feed name.
*/
@Test
public void testRegisterTablesWithMissingFeed() {
runner.setProperty(IngestProperties.FIELD_SPECIFICATION, "data|string");
runner.enqueue(new byte[0], ImmutableMap.of("metadata.category.systemName", "movies"));
runner.enqueue(new byte[0], ImmutableMap.of("feed", "artists"));
runner.run();
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
}
/**
* Verify error for missing field specification.
*/
@Test
public void testRegisterTablesWithMissingFieldSpecification() {
runner.enqueue(new byte[0], ImmutableMap.of("metadata.category.systemName", "movies", "metadata.systemFeedName", "artists"));
runner.run();
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
}
/**
* Verify registering a single table.
*/
@Test
public void testRegisterTablesWithTableType() throws Exception {
runner.setProperty(IngestProperties.FIELD_SPECIFICATION, "id|int\nfirst_name|string\nlast_name|string");
runner.setProperty(RegisterFeedTables.TABLE_TYPE, "MASTER");
runner.enqueue(new byte[0], ImmutableMap.of("metadata.category.systemName", "movies", "metadata.systemFeedName", "artists"));
runner.run();
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
final InOrder inOrder = Mockito.inOrder(thriftService.statement);
inOrder.verify(thriftService.statement).execute("CREATE DATABASE IF NOT EXISTS `movies`");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists` (`id` int, `first_name` string, `last_name` string, processing_dttm string) STORED AS ORC "
+ "LOCATION '/app/warehouse/movies/artists'");
inOrder.verify(thriftService.statement).close();
inOrder.verifyNoMoreInteractions();
}
/**
* Verify registering a single table.
*/
@Test
public void testRegisterTablesWithConfig() throws Exception {
runner.setProperty(IngestProperties.FIELD_SPECIFICATION, "id|int\nfirst_name|string\nlast_name|string");
runner.setProperty(RegisterFeedTables.TABLE_TYPE, "ALL");
runner.enqueue(new byte[0], ImmutableMap
.of("metadata.category.systemName", "movies", "metadata.systemFeedName", "artists", "hive.ingest.root", "/var/ingest", "hive.profile.root", "/var/profile/",
"hive.master.root", "/master"));
runner.run();
Assert.assertEquals(0, runner.getFlowFilesForRelationship(IngestProperties.REL_FAILURE).size());
Assert.assertEquals(1, runner.getFlowFilesForRelationship(IngestProperties.REL_SUCCESS).size());
final InOrder inOrder = Mockito.inOrder(thriftService.statement);
inOrder.verify(thriftService.statement).execute(
"CREATE EXTERNAL TABLE IF NOT EXISTS `movies`.`artists_feed` (`id` string, `first_name` string, `last_name` string) PARTITIONED BY (`processing_dttm` string) "
+ "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '/var/ingest/movies/artists/feed'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_invalid` (`id` string, `first_name` string, `last_name` string, dlp_reject_reason string"
+ " ) PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/var/ingest/movies/artists/invalid'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_valid` (`id` int, `first_name` string, `last_name` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/var/ingest/movies/artists/valid'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists` (`id` int, `first_name` string, `last_name` string, processing_dttm string) STORED AS ORC "
+ "LOCATION '/master/movies/artists'");
inOrder.verify(thriftService.statement).close();
inOrder.verify(thriftService.statement).execute("CREATE TABLE IF NOT EXISTS `movies`.`artists_profile` ( `columnname` string,`metrictype` string,`metricvalue` string) "
+ "PARTITIONED BY (`processing_dttm` string) STORED AS ORC LOCATION '/var/profile/movies/artists/profile'");
inOrder.verify(thriftService.statement).close();
inOrder.verifyNoMoreInteractions();
}
/**
* A mock implementation of {@link ThriftService} for unit testing.
*/
private class MockThriftService extends AbstractControllerService implements ThriftService {
/**
* Query results for {@code SHOW TABLES}
*/
public final ResultSet artistsTablesResults = Mockito.mock(ResultSet.class);
/**
* Mock connection for unit testing
*/
public final Connection connection = Mockito.mock(Connection.class);
/**
* Mock statement for unit testing
*/
public final Statement statement = Mockito.mock(Statement.class);
/**
* Constructs a {@code MockThriftService}.
*
* @throws Exception never
*/
public MockThriftService() throws Exception {
Mockito.when(connection.createStatement()).thenReturn(statement);
Mockito.when(statement.executeQuery("show tables like 'artists*'")).thenReturn(artistsTablesResults);
}
@Override
public Connection getConnection() throws ProcessException {
return connection;
}
}
}