/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql; import java.util.Arrays; import java.util.List; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.junit.Assert; import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; /** * Same as TestTxnCommands2 but tests ACID tables with 'transactional_properties' set to 'default'. * This tests whether ACID tables with split-update turned on are working correctly or not * for the same set of tests when it is turned off. Of course, it also adds a few tests to test * specific behaviors of ACID tables with split-update turned on. */ public class TestTxnCommands2WithSplitUpdate extends TestTxnCommands2 { public TestTxnCommands2WithSplitUpdate() { super(); } @Rule public ExpectedException expectedException = ExpectedException.none(); @Override @Before public void setUp() throws Exception { setUpWithTableProperties("'transactional'='true','transactional_properties'='default'"); } @Override @Test public void testInitiatorWithMultipleFailedCompactions() throws Exception { // Test with split-update turned on. testInitiatorWithMultipleFailedCompactionsForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); } @Override @Test public void writeBetweenWorkerAndCleaner() throws Exception { writeBetweenWorkerAndCleanerForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); } @Override @Test public void testACIDwithSchemaEvolutionAndCompaction() throws Exception { testACIDwithSchemaEvolutionForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); } /** * In current implementation of ACID, altering the value of transactional_properties or trying to * set a value for previously unset value for an acid table will throw an exception. * @throws Exception */ @Test public void testFailureOnAlteringTransactionalProperties() throws Exception { expectedException.expect(RuntimeException.class); expectedException.expectMessage("TBLPROPERTIES with 'transactional_properties' cannot be altered after the table is created"); runStatementOnDriver("create table acidTblLegacy (a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); runStatementOnDriver("alter table acidTblLegacy SET TBLPROPERTIES ('transactional_properties' = 'default')"); } /** * Test the query correctness and directory layout for ACID table conversion with split-update * enabled. * 1. Insert a row to Non-ACID table * 2. Convert Non-ACID to ACID table with split-update enabled * 3. Insert a row to ACID table * 4. Perform Major compaction * 5. Clean * @throws Exception */ @Test @Override public void testNonAcidToAcidConversion1() throws Exception { FileSystem fs = FileSystem.get(hiveConf); FileStatus[] status; // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 2. Convert NONACIDORCTBL to ACID table runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 3. Insert another row to newly-converted ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. // The delta directory should also have only 1 bucket file (bucket_00001) Assert.assertEquals(3, status.length); boolean sawNewDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, buckets.length); // only one bucket file Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertTrue(sawNewDelta); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b"); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 4. Perform a major compaction runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new directory: base_xxxxxxx. // Original bucket files and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(4, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } } Assert.assertTrue(sawNewBase); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 5. Let Cleaner delete obsolete files/dirs // Note, here we create a fake directory along with fake files as original directories/files String fakeFile0 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + "/subdir/000000_0"; String fakeFile1 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + "/subdir/000000_1"; fs.create(new Path(fakeFile0)); fs.create(new Path(fakeFile1)); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 5 items: // 2 original files, 1 original directory, 1 base directory and 1 delta directory Assert.assertEquals(5, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_xxxxxxx. // Original bucket files and delta directory should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); } /** * Test the query correctness and directory layout for ACID table conversion with split-update * enabled. * 1. Insert a row to Non-ACID table * 2. Convert Non-ACID to ACID table with split update enabled. * 3. Update the existing row in ACID table * 4. Perform Major compaction * 5. Clean * @throws Exception */ @Test @Override public void testNonAcidToAcidConversion2() throws Exception { FileSystem fs = FileSystem.get(hiveConf); FileStatus[] status; // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 2. Convert NONACIDORCTBL to ACID table runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 3. Update the existing row in newly-converted ACID table runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files (000000_0 and 000001_0), plus one delta directory // and one delete_delta directory. When split-update is enabled, an update event is split into // a combination of delete and insert, that generates the delete_delta directory. // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) // and so should the delete_delta directory. Assert.assertEquals(4, status.length); boolean sawNewDelta = false; boolean sawNewDeleteDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); } else if (status[i].getPath().getName().matches("delete_delta_.*")) { sawNewDeleteDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertTrue(sawNewDelta); Assert.assertTrue(sawNewDeleteDelta); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 4. Perform a major compaction runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new directory: base_0000001. // Original bucket files and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(5, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } } Assert.assertTrue(sawNewBase); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 5. Let Cleaner delete obsolete files/dirs status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 5 items: // 2 original files, 1 delta directory, 1 delete_delta directory and 1 base directory Assert.assertEquals(5, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. // Original bucket files, delta directory and delete_delta should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); } /** * Test the query correctness and directory layout for ACID table conversion with split-update * enabled. * 1. Insert a row to Non-ACID table * 2. Convert Non-ACID to ACID table with split-update enabled * 3. Perform Major compaction * 4. Insert a new row to ACID table * 5. Perform another Major compaction * 6. Clean * @throws Exception */ @Test @Override public void testNonAcidToAcidConversion3() throws Exception { FileSystem fs = FileSystem.get(hiveConf); FileStatus[] status; // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 2. Convert NONACIDORCTBL to ACID table with split_update enabled. (txn_props=default) runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 3. Perform a major compaction runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new directory: base_-9223372036854775808 // Original bucket files should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(3, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); sawNewBase = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } Assert.assertTrue(sawNewBase); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 4. Update the existing row, and insert another row to newly-converted ACID table runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, // plus two new delta directories and one delete_delta directory that would be created due to // the update statement (remember split-update U=D+I)! Assert.assertEquals(6, status.length); int numDelta = 0; int numDeleteDelta = 0; sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { numDelta++; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); if (numDelta == 1) { Assert.assertEquals("delta_0000022_0000022_0000", status[i].getPath().getName()); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numDelta == 2) { Assert.assertEquals("delta_0000023_0000023_0000", status[i].getPath().getName()); Assert.assertEquals(1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } else if (status[i].getPath().getName().matches("delete_delta_.*")) { numDeleteDelta++; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); if (numDeleteDelta == 1) { Assert.assertEquals("delete_delta_0000022_0000022_0000", status[i].getPath().getName()); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } else if (status[i].getPath().getName().matches("base_.*")) { Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); sawNewBase = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertEquals(2, numDelta); Assert.assertEquals(1, numDeleteDelta); Assert.assertTrue(sawNewBase); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 5. Perform another major compaction runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new base directory: base_0000001 // Original bucket files, delta directories, delete_delta directories and the // previous base directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(status); Assert.assertEquals(7, status.length); int numBase = 0; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { numBase++; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); if (numBase == 1) { Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numBase == 2) { // The new base dir now has two bucket files, since the delta dir has two bucket files Assert.assertEquals("base_0000023", status[i].getPath().getName()); Assert.assertEquals(1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } } Assert.assertEquals(2, numBase); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 6. Let Cleaner delete obsolete files/dirs status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 6 items: // 2 original files, 2 delta directories, 1 delete_delta directory and 2 base directories Assert.assertEquals(7, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. // Original bucket files, delta directories and previous base directory should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertEquals("base_0000023", status[0].getPath().getName()); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); Assert.assertEquals(1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); } @Ignore("HIVE-14947") @Test @Override public void testDynamicPartitionsMerge() throws Exception {} @Ignore("HIVE-14947") @Test @Override public void testDynamicPartitionsMerge2() throws Exception {} @Ignore("HIVE-14947") @Test @Override public void testMerge() throws Exception {} /** * todo: remove this test once HIVE-14947 is done (parent class has a better version) */ @Test @Override public void testMerge2() throws Exception { int[][] baseValsOdd = {{5,5},{11,11}}; int[][] baseValsEven = {{2,2},{4,44}}; runStatementOnDriver("insert into " + Table.NONACIDPART2 + " PARTITION(p2='odd') " + makeValuesClause(baseValsOdd)); runStatementOnDriver("insert into " + Table.NONACIDPART2 + " PARTITION(p2='even') " + makeValuesClause(baseValsEven)); int[][] vals = {{2,1},{4,3},{5,6},{7,8}}; runStatementOnDriver("insert into " + Table.ACIDTBL + " " + makeValuesClause(vals)); List<String> r = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b"); Assert.assertEquals(stringifyValues(vals), r); String query = "merge into " + Table.ACIDTBL + " using " + Table.NONACIDPART2 + " source ON " + Table.ACIDTBL + ".a = source.a2 " + "WHEN MATCHED THEN UPDATE set b = source.b2 "; r = runStatementOnDriver(query); r = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b"); int[][] rExpected = {{2,2},{4,44},{5,5},{7,8}}; Assert.assertEquals(stringifyValues(rExpected), r); } @Ignore("HIVE-14947") @Test @Override public void testMergeWithPredicate() throws Exception {} }