/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.parquet; import com.google.common.base.Joiner; import org.apache.drill.PlanTestBase; import org.apache.drill.common.util.TestTools; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.Path; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; import java.nio.file.Files; import static org.junit.Assert.assertEquals; public class TestParquetMetadataCache extends PlanTestBase { private static final String WORKING_PATH = TestTools.getWorkingPath(); private static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources"; private static final String tableName1 = "parquetTable1"; private static final String tableName2 = "parquetTable2"; @BeforeClass public static void copyData() throws Exception { // copy the data into the temporary location String tmpLocation = getDfsTestTmpSchemaLocation(); File dataDir1 = new File(tmpLocation + Path.SEPARATOR + tableName1); dataDir1.mkdir(); FileUtils.copyDirectory(new File(String.format(String.format("%s/multilevel/parquet", TEST_RES_PATH))), dataDir1); File dataDir2 = new File(tmpLocation + Path.SEPARATOR + tableName2); dataDir2.mkdir(); FileUtils.copyDirectory(new File(String.format(String.format("%s/multilevel/parquet2", TEST_RES_PATH))), dataDir2); } @Test public void testPartitionPruningWithMetadataCache_1() throws Exception { test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName1)); checkForMetadataFile(tableName1); String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1994 and dir1 in ('Q1', 'Q2')", getDfsTestTmpSchemaLocation(), tableName1); int expectedRowCount = 20; int expectedNumFiles = 2; int actualRowCount = testSql(query); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s/1994", getDfsTestTmpSchemaLocation(), tableName1); PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } @Test // DRILL-3917, positive test case for DRILL-4530 public void testPartitionPruningWithMetadataCache_2() throws Exception { test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName1)); checkForMetadataFile(tableName1); String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1994", getDfsTestTmpSchemaLocation(), tableName1); int expectedRowCount = 40; int expectedNumFiles = 4; int actualRowCount = testSql(query); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s/1994", getDfsTestTmpSchemaLocation(), tableName1); PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {"Filter"}); } @Test // DRILL-3937 (partitioning column is varchar) public void testPartitionPruningWithMetadataCache_3() throws Exception { String tableName = "orders_ctas_varchar"; test("use dfs_test.tmp"); test(String.format("create table %s (o_orderdate, o_orderpriority) partition by (o_orderpriority) " + "as select o_orderdate, o_orderpriority from dfs_test.`%s/multilevel/parquet/1994/Q1`", tableName, TEST_RES_PATH)); test(String.format("refresh table metadata %s", tableName)); checkForMetadataFile(tableName); String query = String.format("select * from %s where o_orderpriority = '1-URGENT'", tableName); int expectedRowCount = 3; int expectedNumFiles = 1; int actualRowCount = testSql(query); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern}, new String[] {}); } @Test // DRILL-3937 (partitioning column is binary using convert_to) public void testPartitionPruningWithMetadataCache_4() throws Exception { String tableName = "orders_ctas_binary"; test("use dfs_test.tmp"); test(String.format("create table %s (o_orderdate, o_orderpriority) partition by (o_orderpriority) " + "as select o_orderdate, convert_to(o_orderpriority, 'UTF8') as o_orderpriority " + "from dfs_test.`%s/multilevel/parquet/1994/Q1`", tableName, TEST_RES_PATH)); test(String.format("refresh table metadata %s", tableName)); checkForMetadataFile(tableName); String query = String.format("select * from %s where o_orderpriority = '1-URGENT'", tableName); int expectedRowCount = 3; int expectedNumFiles = 1; int actualRowCount = testSql(query); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern}, new String[] {}); } @Test public void testCache() throws Exception { String tableName = "nation_ctas"; test("use dfs_test.tmp"); test(String.format("create table `%s/t1` as select * from cp.`tpch/nation.parquet`", tableName)); test(String.format("create table `%s/t2` as select * from cp.`tpch/nation.parquet`", tableName)); test(String.format("refresh table metadata %s", tableName)); checkForMetadataFile(tableName); String query = String.format("select * from %s", tableName); int rowCount = testSql(query); Assert.assertEquals(50, rowCount); testPlanMatchingPatterns(query, new String[] { "usedMetadataFile=true" }, new String[]{}); } @Test public void testUpdate() throws Exception { String tableName = "nation_ctas_update"; test("use dfs_test.tmp"); test(String.format("create table `%s/t1` as select * from cp.`tpch/nation.parquet`", tableName)); test(String.format("refresh table metadata %s", tableName)); checkForMetadataFile(tableName); Thread.sleep(1000); test(String.format("create table `%s/t2` as select * from cp.`tpch/nation.parquet`", tableName)); int rowCount = testSql(String.format("select * from %s", tableName)); Assert.assertEquals(50, rowCount); } @Test public void testCacheWithSubschema() throws Exception { String tableName = "nation_ctas_subschema"; test(String.format("create table dfs_test.tmp.`%s/t1` as select * from cp.`tpch/nation.parquet`", tableName)); test(String.format("refresh table metadata dfs_test.tmp.%s", tableName)); checkForMetadataFile(tableName); int rowCount = testSql(String.format("select * from dfs_test.tmp.%s", tableName)); Assert.assertEquals(25, rowCount); } @Test public void testFix4449() throws Exception { runSQL("CREATE TABLE dfs_test.tmp.`4449` PARTITION BY(l_discount) AS SELECT l_orderkey, l_discount FROM cp.`tpch/lineitem.parquet`"); runSQL("REFRESH TABLE METADATA dfs_test.tmp.`4449`"); testBuilder() .sqlQuery("SELECT COUNT(*) cnt FROM (" + "SELECT l_orderkey FROM dfs_test.tmp.`4449` WHERE l_discount < 0.05" + " UNION ALL" + " SELECT l_orderkey FROM dfs_test.tmp.`4449` WHERE l_discount > 0.02)") .unOrdered() .baselineColumns("cnt") .baselineValues(71159L) .go(); } @Test public void testAbsentPluginOrWorkspaceError() throws Exception { testBuilder() .sqlQuery("refresh table metadata dfs_test.incorrect.table_name") .unOrdered() .baselineColumns("ok", "summary") .baselineValues(false, "Storage plugin or workspace does not exist [dfs_test.incorrect]") .go(); testBuilder() .sqlQuery("refresh table metadata incorrect.table_name") .unOrdered() .baselineColumns("ok", "summary") .baselineValues(false, "Storage plugin or workspace does not exist [incorrect]") .go(); } @Test public void testNoSupportedError() throws Exception { testBuilder() .sqlQuery("refresh table metadata cp.`tpch/nation.parquet`") .unOrdered() .baselineColumns("ok", "summary") .baselineValues(false, "Table tpch/nation.parquet does not support metadata refresh. " + "Support is currently limited to directory-based Parquet tables.") .go(); } @Test // DRILL-4530 // single leaf level partition public void testDrill4530_1() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1995 and dir1='Q3'", getDfsTestTmpSchemaLocation(), tableName2); int expectedRowCount = 20; int expectedNumFiles = 2; int actualRowCount = testSql(query1); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s/1995/Q3", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {"Filter"}); } @Test // DRILL-4530 // single non-leaf level partition public void testDrill4530_2() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1995", getDfsTestTmpSchemaLocation(), tableName2); int expectedRowCount = 80; int expectedNumFiles = 8; int actualRowCount = testSql(query1); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s/1995", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {"Filter"}); } @Test // DRILL-4530 // only dir1 filter is present, no dir0, hence this maps to multiple partitions public void testDrill4530_3() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir1='Q3'", getDfsTestTmpSchemaLocation(), tableName2); int expectedRowCount = 40; int expectedNumFiles = 4; int actualRowCount = testSql(query1); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } @Test // DRILL-4530 // non-existent partition (1 subdirectory's cache file will still be read for schema) public void testDrill4530_4() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1995 and dir1='Q6'", getDfsTestTmpSchemaLocation(), tableName2); int expectedRowCount = 0; int expectedNumFiles = 1; int actualRowCount = testSql(query1); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s/*/*", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } @Test // DRILL-4794 public void testDrill4794() throws Exception { test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName1)); checkForMetadataFile(tableName1); String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1994 or dir1='Q3'", getDfsTestTmpSchemaLocation(), tableName1); int expectedRowCount = 60; int expectedNumFiles = 6; int actualRowCount = testSql(query); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s", getDfsTestTmpSchemaLocation(), tableName1); PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } @Test // DRILL-4786 public void testDrill4786_1() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0=1995 and dir1 in ('Q1', 'Q2')", getDfsTestTmpSchemaLocation(), tableName2); int expectedRowCount = 40; int expectedNumFiles = 4; int actualRowCount = testSql(query1); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s/1995", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } @Test // DRILL-4786 public void testDrill4786_2() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s` " + " where dir0 in (1994, 1995) and dir1 = 'Q3'", getDfsTestTmpSchemaLocation(), tableName2); int expectedRowCount = 40; int expectedNumFiles = 4; int actualRowCount = testSql(query1); assertEquals(expectedRowCount, actualRowCount); String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } @Test // DRILL-4877 public void testDrill4877() throws Exception { // create metadata cache test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(), tableName2)); checkForMetadataFile(tableName2); // run query and check correctness String query1 = String.format("select max(dir0) as max0, max(dir1) as max1 from dfs_test.`%s/%s` ", getDfsTestTmpSchemaLocation(), tableName2); testBuilder() .sqlQuery(query1) .unOrdered() .baselineColumns("max0", "max1") .baselineValues("1995", "Q4") .go(); int expectedNumFiles = 1; // point to selectionRoot since no pruning is done in this query String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=true"; String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s", getDfsTestTmpSchemaLocation(), tableName2); PlanTestBase.testPlanMatchingPatterns(query1, new String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern}, new String[] {}); } private void checkForMetadataFile(String table) throws Exception { String tmpDir = getDfsTestTmpSchemaLocation(); String metaFile = Joiner.on("/").join(tmpDir, table, Metadata.METADATA_FILENAME); Assert.assertTrue(Files.exists(new File(metaFile).toPath())); } }