/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.hive; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import org.apache.drill.common.exceptions.UserRemoteException; import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.planner.physical.PlannerSettings; import org.apache.drill.exec.proto.UserProtos; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.joda.time.DateTime; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.math.BigDecimal; import java.sql.Date; import java.sql.Timestamp; import java.util.List; import java.util.Map; import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; public class TestHiveStorage extends HiveTestBase { @BeforeClass public static void setupOptions() throws Exception { test(String.format("alter session set `%s` = true", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY)); } @Test // DRILL-4083 public void testNativeScanWhenNoColumnIsRead() throws Exception { try { test(String.format("alter session set `%s` = true", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); String query = "SELECT count(*) as col FROM hive.countStar_Parquet"; testPhysicalPlan(query, "hive-drill-native-parquet-scan"); testBuilder() .sqlQuery(query) .unOrdered() .baselineColumns("col") .baselineValues(200l) .go(); } finally { test(String.format("alter session set `%s` = %s", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS, ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS_VALIDATOR.getDefault().bool_val ? "true" : "false")); } } @Test public void hiveReadWithDb() throws Exception { test("select * from hive.kv"); } @Test public void queryEmptyHiveTable() throws Exception { testBuilder() .sqlQuery("SELECT * FROM hive.empty_table") .expectsEmptyResultSet() .go(); } @Test // DRILL-3328 public void convertFromOnHiveBinaryType() throws Exception { testBuilder() .sqlQuery("SELECT convert_from(binary_field, 'UTF8') col1 from hive.readtest") .unOrdered() .baselineColumns("col1") .baselineValues("binaryfield") .baselineValues(new Object[]{null}) .go(); } /** * Test to ensure Drill reads the all supported types correctly both normal fields (converted to Nullable types) and * partition fields (converted to Required types). * @throws Exception */ @Test public void readAllSupportedHiveDataTypes() throws Exception { testBuilder().sqlQuery("SELECT * FROM hive.readtest") .unOrdered() .baselineColumns( "binary_field", "boolean_field", "tinyint_field", "decimal0_field", "decimal9_field", "decimal18_field", "decimal28_field", "decimal38_field", "double_field", "float_field", "int_field", "bigint_field", "smallint_field", "string_field", "varchar_field", "timestamp_field", "date_field", "char_field", // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. //"binary_part", "boolean_part", "tinyint_part", "decimal0_part", "decimal9_part", "decimal18_part", "decimal28_part", "decimal38_part", "double_part", "float_part", "int_part", "bigint_part", "smallint_part", "string_part", "varchar_part", "timestamp_part", "date_part", "char_part") .baselineValues( "binaryfield".getBytes(), false, 34, new BigDecimal("66"), new BigDecimal("2347.92"), new BigDecimal("2758725827.99990"), new BigDecimal("29375892739852.8"), new BigDecimal("89853749534593985.783"), 8.345d, 4.67f, 123456, 234235L, 3455, "stringfield", "varcharfield", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), "charfield", // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. //"binary", true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), "char") .baselineValues( // All fields are null, but partition fields have non-null values null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. //"binary", true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), "char") .go(); } /** * Test to ensure Drill reads the all supported types through native Parquet readers. * NOTE: As part of Hive 1.2 upgrade, make sure this test and {@link #readAllSupportedHiveDataTypes()} are merged * into one test. */ @Test public void readAllSupportedHiveDataTypesNativeParquet() throws Exception { try { test(String.format("alter session set `%s` = true", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); final String query = "SELECT * FROM hive.readtest_parquet"; // Make sure the plan has Hive scan with native parquet reader testPhysicalPlan(query, "hive-drill-native-parquet-scan"); testBuilder().sqlQuery(query) .unOrdered() .baselineColumns( "binary_field", "boolean_field", "tinyint_field", "decimal0_field", "decimal9_field", "decimal18_field", "decimal28_field", "decimal38_field", "double_field", "float_field", "int_field", "bigint_field", "smallint_field", "string_field", "varchar_field", "timestamp_field", "char_field", // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now. //"binary_part", "boolean_part", "tinyint_part", "decimal0_part", "decimal9_part", "decimal18_part", "decimal28_part", "decimal38_part", "double_part", "float_part", "int_part", "bigint_part", "smallint_part", "string_part", "varchar_part", "timestamp_part", "date_part", "char_part") .baselineValues( "binaryfield".getBytes(), false, 34, new BigDecimal("66"), new BigDecimal("2347.92"), new BigDecimal("2758725827.99990"), new BigDecimal("29375892739852.8"), new BigDecimal("89853749534593985.783"), 8.345d, 4.67f, 123456, 234235L, 3455, "stringfield", "varcharfield", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), "charfield", // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now. //"binary", true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), "char") .baselineValues( // All fields are null, but partition fields have non-null values null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, // There is a regression in Hive 1.2.1 in binary and boolean partition columns. Disable for now. //"binary", true, 64, new BigDecimal("37"), new BigDecimal("36.90"), new BigDecimal("3289379872.94565"), new BigDecimal("39579334534534.4"), new BigDecimal("363945093845093890.900"), 8.345d, 4.67f, 123456, 234235L, 3455, "string", "varchar", new DateTime(Timestamp.valueOf("2013-07-05 17:01:00").getTime()), new DateTime(Date.valueOf("2013-07-05").getTime()), "char") .go(); } finally { test(String.format("alter session set `%s` = false", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); } } @Test public void orderByOnHiveTable() throws Exception { testBuilder() .sqlQuery("SELECT * FROM hive.kv ORDER BY `value` DESC") .ordered() .baselineColumns("key", "value") .baselineValues(5, " key_5") .baselineValues(4, " key_4") .baselineValues(3, " key_3") .baselineValues(2, " key_2") .baselineValues(1, " key_1") .go(); } @Test public void queryingTablesInNonDefaultFS() throws Exception { // Update the default FS settings in Hive test storage plugin to non-local FS hiveTest.updatePluginConfig(getDrillbitContext().getStorage(), ImmutableMap.of(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost:9001")); testBuilder() .sqlQuery("SELECT * FROM hive.`default`.kv LIMIT 1") .unOrdered() .baselineColumns("key", "value") .baselineValues(1, " key_1") .go(); } @Test // DRILL-745 public void queryingHiveAvroTable() throws Exception { testBuilder() .sqlQuery("SELECT * FROM hive.db1.avro ORDER BY key DESC LIMIT 1") .unOrdered() .baselineColumns("key", "value") .baselineValues(5, " key_5") .go(); } @Test // DRILL-3266 public void queryingTableWithSerDeInHiveContribJar() throws Exception { testBuilder() .sqlQuery("SELECT * FROM hive.db1.kv_db1 ORDER BY key DESC LIMIT 1") .unOrdered() .baselineColumns("key", "value") .baselineValues("5", " key_5") .go(); } @Test // DRILL-3746 public void readFromPartitionWithCustomLocation() throws Exception { testBuilder() .sqlQuery("SELECT count(*) as cnt FROM hive.partition_pruning_test WHERE c=99 AND d=98 AND e=97") .unOrdered() .baselineColumns("cnt") .baselineValues(1L) .go(); } @Test // DRILL-3938 public void readFromAlteredPartitionedTable() throws Exception { testBuilder() .sqlQuery("SELECT key, `value`, newcol FROM hive.kv_parquet ORDER BY key LIMIT 1") .unOrdered() .baselineColumns("key", "value", "newcol") .baselineValues(1, " key_1", null) .go(); } @Test // DRILL-3938 public void nativeReaderIsDisabledForAlteredPartitionedTable() throws Exception { try { test(String.format("alter session set `%s` = true", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); final String query = "EXPLAIN PLAN FOR SELECT key, `value`, newcol FROM hive.kv_parquet ORDER BY key LIMIT 1"; // Make sure the HiveScan in plan has no native parquet reader final String planStr = getPlanInString(query, JSON_FORMAT); assertFalse("Hive native is not expected in the plan", planStr.contains("hive-drill-native-parquet-scan")); } finally { test(String.format("alter session set `%s` = false", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); } } @Test // DRILL-3739 public void readingFromStorageHandleBasedTable() throws Exception { testBuilder() .sqlQuery("SELECT * FROM hive.kv_sh ORDER BY key LIMIT 2") .ordered() .baselineColumns("key", "value") .expectsEmptyResultSet() .go(); } @Test // DRILL-3739 public void readingFromStorageHandleBasedTable2() throws Exception { try { test(String.format("alter session set `%s` = true", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); testBuilder() .sqlQuery("SELECT * FROM hive.kv_sh ORDER BY key LIMIT 2") .ordered() .baselineColumns("key", "value") .expectsEmptyResultSet() .go(); } finally { test(String.format("alter session set `%s` = false", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); } } @Test // DRILL-3688 public void readingFromSmallTableWithSkipHeaderAndFooter() throws Exception { testBuilder() .sqlQuery("select key, `value` from hive.skipper.kv_text_small order by key asc") .ordered() .baselineColumns("key", "value") .baselineValues(1, "key_1") .baselineValues(2, "key_2") .baselineValues(3, "key_3") .baselineValues(4, "key_4") .baselineValues(5, "key_5") .go(); testBuilder() .sqlQuery("select count(1) as cnt from hive.skipper.kv_text_small") .unOrdered() .baselineColumns("cnt") .baselineValues(5L) .go(); } @Test // DRILL-3688 public void readingFromLargeTableWithSkipHeaderAndFooter() throws Exception { testBuilder() .sqlQuery("select sum(key) as sum_keys from hive.skipper.kv_text_large") .unOrdered() .baselineColumns("sum_keys") .baselineValues((long)(5000*(5000 + 1)/2)) .go(); testBuilder() .sqlQuery("select count(1) as cnt from hive.skipper.kv_text_large") .unOrdered() .baselineColumns("cnt") .baselineValues(5000L) .go(); } @Test // DRILL-3688 public void testIncorrectHeaderFooterProperty() throws Exception { Map<String, String> testData = ImmutableMap.<String, String>builder() .put("hive.skipper.kv_incorrect_skip_header","skip.header.line.count") .put("hive.skipper.kv_incorrect_skip_footer", "skip.footer.line.count") .build(); String query = "select * from %s"; String exceptionMessage = "Hive table property %s value 'A' is non-numeric"; for (Map.Entry<String, String> entry : testData.entrySet()) { try { test(String.format(query, entry.getKey())); } catch (UserRemoteException e) { assertThat(e.getMessage(), containsString(String.format(exceptionMessage, entry.getValue()))); } } } @Test // DRILL-3688 public void testIgnoreSkipHeaderFooterForRcfile() throws Exception { testBuilder() .sqlQuery("select count(1) as cnt from hive.skipper.kv_rcfile_large") .unOrdered() .baselineColumns("cnt") .baselineValues(5000L) .go(); } @Test // DRILL-3688 public void testIgnoreSkipHeaderFooterForParquet() throws Exception { testBuilder() .sqlQuery("select count(1) as cnt from hive.skipper.kv_parquet_large") .unOrdered() .baselineColumns("cnt") .baselineValues(5000L) .go(); } @Test // DRILL-3688 public void testIgnoreSkipHeaderFooterForSequencefile() throws Exception { testBuilder() .sqlQuery("select count(1) as cnt from hive.skipper.kv_sequencefile_large") .unOrdered() .baselineColumns("cnt") .baselineValues(5000L) .go(); } @Test public void testStringColumnsMetadata() throws Exception { String query = "select varchar_field, char_field, string_field from hive.readtest"; Map<String, Integer> expectedResult = Maps.newHashMap(); expectedResult.put("varchar_field", 50); expectedResult.put("char_field", 10); expectedResult.put("string_field", HiveVarchar.MAX_VARCHAR_LENGTH); verifyColumnsMetadata(client.createPreparedStatement(query).get() .getPreparedStatement().getColumnsList(), expectedResult); try { test("alter session set `%s` = true", ExecConstants.EARLY_LIMIT0_OPT_KEY); verifyColumnsMetadata(client.createPreparedStatement(String.format("select * from (%s) t limit 0", query)).get() .getPreparedStatement().getColumnsList(), expectedResult); } finally { test("alter session reset `%s`", ExecConstants.EARLY_LIMIT0_OPT_KEY); } } private void verifyColumnsMetadata(List<UserProtos.ResultColumnMetadata> columnsList, Map<String, Integer> expectedResult) { for (UserProtos.ResultColumnMetadata columnMetadata : columnsList) { assertTrue("Column should be present in result set", expectedResult.containsKey(columnMetadata.getColumnName())); Integer expectedSize = expectedResult.get(columnMetadata.getColumnName()); assertNotNull("Expected size should not be null", expectedSize); assertEquals("Display size should match", expectedSize.intValue(), columnMetadata.getDisplaySize()); assertEquals("Precision should match", expectedSize.intValue(), columnMetadata.getPrecision()); assertTrue("Column should be nullable", columnMetadata.getIsNullable()); } } @AfterClass public static void shutdownOptions() throws Exception { test(String.format("alter session set `%s` = false", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY)); } }