/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl.writer; import org.apache.drill.BaseTestQuery; import org.apache.drill.exec.ExecConstants; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; public class TestParquetWriterEmptyFiles extends BaseTestQuery { private static FileSystem fs; @BeforeClass public static void initFs() throws Exception { Configuration conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS); fs = FileSystem.get(conf); updateTestCluster(3, null); } @Test // see DRILL-2408 public void testWriteEmptyFile() throws Exception { final String outputFile = "testparquetwriteremptyfiles_testwriteemptyfile"; try { test("CREATE TABLE dfs_test.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 1=0", outputFile); final Path path = new Path(getDfsTestTmpSchemaLocation(), outputFile); Assert.assertFalse(fs.exists(path)); } finally { deleteTableIfExists(outputFile); } } @Test public void testMultipleWriters() throws Exception { final String outputFile = "testparquetwriteremptyfiles_testmultiplewriters"; runSQL("alter session set `planner.slice_target` = 1"); try { final String query = "SELECT position_id FROM cp.`employee.json` WHERE position_id IN (15, 16) GROUP BY position_id"; test("CREATE TABLE dfs_test.tmp.%s AS %s", outputFile, query); // this query will fail if an "empty" file was created testBuilder() .unOrdered() .sqlQuery("SELECT * FROM dfs_test.tmp.%s", outputFile) .sqlBaselineQuery(query) .go(); } finally { runSQL("alter session set `planner.slice_target` = " + ExecConstants.SLICE_TARGET_DEFAULT); deleteTableIfExists(outputFile); } } @Test // see DRILL-2408 public void testWriteEmptyFileAfterFlush() throws Exception { final String outputFile = "testparquetwriteremptyfiles_test_write_empty_file_after_flush"; deleteTableIfExists(outputFile); try { // this specific value will force a flush just after the final row is written // this may cause the creation of a new "empty" parquet file test("ALTER SESSION SET `store.parquet.block-size` = 19926"); final String query = "SELECT * FROM cp.`employee.json` LIMIT 100"; test("CREATE TABLE dfs_test.tmp.%s AS %s", outputFile, query); // this query will fail if an "empty" file was created testBuilder() .unOrdered() .sqlQuery("SELECT * FROM dfs_test.tmp.%s", outputFile) .sqlBaselineQuery(query) .go(); } finally { // restore the session option test("ALTER SESSION SET `store.parquet.block-size` = %d", ExecConstants.PARQUET_BLOCK_SIZE_VALIDATOR.getDefault().num_val); deleteTableIfExists(outputFile); } } private static boolean deleteTableIfExists(String tableName) { try { Path path = new Path(getDfsTestTmpSchemaLocation(), tableName); if (fs.exists(path)) { return fs.delete(path, true); } } catch (Exception e) { // ignore exceptions. return false; } return true; } }