/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.hive; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.nifi.controller.AbstractControllerService; import org.apache.nifi.dbcp.DBCPService; import org.apache.nifi.dbcp.hive.HiveDBCPService; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.reporting.InitializationException; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import static org.apache.nifi.processors.hive.SelectHiveQL.HIVEQL_OUTPUT_FORMAT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class TestSelectHiveQL { private static final Logger LOGGER; static { System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info"); System.setProperty("org.slf4j.simpleLogger.showDateTime", "true"); System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug"); System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHiveQL", "debug"); System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHiveQL", "debug"); LOGGER = LoggerFactory.getLogger(TestSelectHiveQL.class); } final static String DB_LOCATION = "target/db"; final static String QUERY_WITH_EL = "select " + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" + " from persons PER" + " where PER.ID > ${person.id}"; final static String QUERY_WITHOUT_EL = "select " + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" + " from persons PER" + " where PER.ID > 10"; @BeforeClass public static void setupClass() { System.setProperty("derby.stream.error.file", "target/derby.log"); } private TestRunner runner; @Before public void setup() throws InitializationException { final DBCPService dbcp = new DBCPServiceSimpleImpl(); final Map<String, String> dbcpProperties = new HashMap<>(); runner = TestRunners.newTestRunner(SelectHiveQL.class); runner.addControllerService("dbcp", dbcp, dbcpProperties); runner.enableControllerService(dbcp); runner.setProperty(SelectHiveQL.HIVE_DBCP_SERVICE, "dbcp"); } @Test public void testIncomingConnectionWithNoFlowFile() throws InitializationException { runner.setIncomingConnection(true); runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons"); runner.run(); runner.assertTransferCount(SelectHiveQL.REL_SUCCESS, 0); runner.assertTransferCount(SelectHiveQL.REL_FAILURE, 0); } @Test public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException { runner.setIncomingConnection(false); invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro"); } @Test public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException { invokeOnTrigger(QUERY_WITH_EL, true, "Avro"); } @Test public void testWithNullIntColumn() throws SQLException { // remove previous test database, if any final File dbLocation = new File(DB_LOCATION); dbLocation.delete(); // load test data to database final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection(); Statement stmt = con.createStatement(); try { stmt.execute("drop table TEST_NULL_INT"); } catch (final SQLException sqle) { } stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))"); stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)"); stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)"); runner.setIncomingConnection(false); runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT"); runner.run(); runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1); runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(0).assertAttributeEquals(SelectHiveQL.RESULT_ROW_COUNT, "2"); } @Test public void testWithSqlException() throws SQLException { // remove previous test database, if any final File dbLocation = new File(DB_LOCATION); dbLocation.delete(); // load test data to database final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection(); Statement stmt = con.createStatement(); try { stmt.execute("drop table TEST_NO_ROWS"); } catch (final SQLException sqle) { } stmt.execute("create table TEST_NO_ROWS (id integer)"); runner.setIncomingConnection(false); // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS"); runner.run(); runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); } @Test public void invokeOnTriggerWithCsv() throws InitializationException, ClassNotFoundException, SQLException, IOException { invokeOnTrigger(QUERY_WITHOUT_EL, false, SelectHiveQL.CSV); } @Test public void invokeOnTriggerWithAvro() throws InitializationException, ClassNotFoundException, SQLException, IOException { invokeOnTrigger(QUERY_WITHOUT_EL, false, SelectHiveQL.AVRO); } public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat) throws InitializationException, ClassNotFoundException, SQLException, IOException { // remove previous test database, if any final File dbLocation = new File(DB_LOCATION); dbLocation.delete(); // load test data to database final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection(); final Statement stmt = con.createStatement(); try { stmt.execute("drop table persons"); } catch (final SQLException sqle) { // Nothing to do here, the table didn't exist } stmt.execute("create table persons (id integer, name varchar(100), code integer)"); Random rng = new Random(53496); final int nrOfRows = 100; stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")"); for (int i = 2; i < nrOfRows; i++) { stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")"); } stmt.executeUpdate("insert into persons values (" + nrOfRows + ", 'Last Person', NULL)"); LOGGER.info("test data loaded"); runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, query); runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat); if (incomingFlowFile) { // incoming FlowFile content is not used, but attributes are used final Map<String, String> attributes = new HashMap<>(); attributes.put("person.id", "10"); runner.enqueue("Hello".getBytes(), attributes); } runner.setIncomingConnection(incomingFlowFile); runner.run(); runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1); final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS); MockFlowFile flowFile = flowfiles.get(0); final InputStream in = new ByteArrayInputStream(flowFile.toByteArray()); long recordsFromStream = 0; if (SelectHiveQL.AVRO.equals(outputFormat)) { assertEquals(SelectHiveQL.AVRO_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { GenericRecord record = null; while (dataFileReader.hasNext()) { // Reuse record object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. record = dataFileReader.next(record); recordsFromStream++; } } } else { assertEquals(SelectHiveQL.CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String headerRow = br.readLine(); // Derby capitalizes column names assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow); // Validate rows String line; while ((line = br.readLine()) != null) { recordsFromStream++; String[] values = line.split(","); if(recordsFromStream < (nrOfRows - 10)) { assertEquals(3, values.length); assertTrue(values[1].startsWith("\"")); assertTrue(values[1].endsWith("\"")); } else { assertEquals(2, values.length); // Middle value is null } } } assertEquals(nrOfRows - 10, recordsFromStream); assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHiveQL.RESULT_ROW_COUNT))); } /** * Simple implementation only for SelectHiveQL processor testing. */ private class DBCPServiceSimpleImpl extends AbstractControllerService implements HiveDBCPService { @Override public String getIdentifier() { return "dbcp"; } @Override public Connection getConnection() throws ProcessException { try { Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); final Connection con = DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true"); return con; } catch (final Exception e) { throw new ProcessException("getConnection failed: " + e); } } @Override public String getConnectionURL() { return "jdbc:derby:" + DB_LOCATION + ";create=true"; } } }