/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec; import static org.junit.Assert.fail; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.RetryUtilities.RetryException; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; public class TestMsckCreatePartitionsInBatches { private static HiveConf hiveConf; private static DDLTask ddlTask; private final String tableName = "test_msck_batch"; private static Hive db; private List<String> repairOutput; private Table table; @BeforeClass public static void setupClass() throws HiveException { hiveConf = new HiveConf(TestMsckCreatePartitionsInBatches.class); hiveConf.setIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE, 5); hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); SessionState.start(hiveConf); db = Hive.get(hiveConf); ddlTask = new DDLTask(); } @Before public void before() throws Exception { createPartitionedTable("default", tableName); table = db.getTable(tableName); repairOutput = new ArrayList<String>(); } @After public void after() throws Exception { cleanUpTableQuietly("default", tableName); } private Table createPartitionedTable(String dbName, String tableName) throws Exception { try { db.dropTable(dbName, tableName); db.createTable(tableName, Arrays.asList("key", "value"), // Data columns. Arrays.asList("city"), // Partition columns. TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); return db.getTable(dbName, tableName); } catch (Exception exception) { fail("Unable to drop and create table " + dbName + "." + tableName + " because " + StringUtils.stringifyException(exception)); throw exception; } } private void cleanUpTableQuietly(String dbName, String tableName) { try { db.dropTable(dbName, tableName, true, true, true); } catch (Exception exception) { fail("Unexpected exception: " + StringUtils.stringifyException(exception)); } } private Set<PartitionResult> createPartsNotInMs(int numOfParts) { Set<PartitionResult> partsNotInMs = new HashSet<>(); for (int i = 0; i < numOfParts; i++) { PartitionResult result = new PartitionResult(); result.setPartitionName("city=dummyCity_" + String.valueOf(i)); partsNotInMs.add(result); } return partsNotInMs; } /** * Tests the number of times Hive.createPartitions calls are executed with total number of * partitions to be added are equally divisible by batch size * * @throws Exception */ @Test public void testNumberOfCreatePartitionCalls() throws Exception { // create 10 dummy partitions Set<PartitionResult> partsNotInMs = createPartsNotInMs(10); Hive spyDb = Mockito.spy(db); // batch size of 5 and decaying factor of 2 ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 5, 2, 0); // there should be 2 calls to create partitions with each batch size of 5 ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb, Mockito.times(2)).createPartitions(argument.capture()); // confirm the batch sizes were 5, 5 in the two calls to create partitions List<AddPartitionDesc> apds = argument.getAllValues(); int retryAttempt = 1; Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 5, apds.get(0).getPartitionCount()); Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 5, apds.get(1).getPartitionCount()); } /** * Tests the number of times Hive.createPartitions calls are executed with total number of * partitions to be added are not exactly divisible by batch size * * @throws Exception */ @Test public void testUnevenNumberOfCreatePartitionCalls() throws Exception { // create 9 dummy partitions Set<PartitionResult> partsNotInMs = createPartsNotInMs(9); Hive spyDb = Mockito.spy(db); // batch size of 5 and decaying factor of 2 ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 5, 2, 0); // there should be 2 calls to create partitions with batch sizes of 5, 4 ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb, Mockito.times(2)).createPartitions(argument.capture()); // confirm the batch sizes were 5, 4 in the two calls to create partitions List<AddPartitionDesc> apds = argument.getAllValues(); int retryAttempt = 1; Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 5, apds.get(0).getPartitionCount()); Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 4, apds.get(1).getPartitionCount()); } /** * Tests the number of times Hive.createPartitions calls are executed with total number of * partitions exactly equal to batch size * * @throws Exception */ @Test public void testEqualNumberOfPartitions() throws Exception { // create 13 dummy partitions Set<PartitionResult> partsNotInMs = createPartsNotInMs(13); Hive spyDb = Mockito.spy(db); // batch size of 13 and decaying factor of 2 ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 13, 2, 0); // there should be 1 call to create partitions with batch sizes of 13 ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb, Mockito.times(1)).createPartitions(argument.capture()); Assert.assertEquals("Unexpected number of batch size", 13, argument.getValue().getPartitionCount()); } /** * Tests the number of times Hive.createPartitions calls are executed with total number of * partitions to is less than batch size * * @throws Exception */ @Test public void testSmallNumberOfPartitions() throws Exception { // create 10 dummy partitions Set<PartitionResult> partsNotInMs = createPartsNotInMs(10); Hive spyDb = Mockito.spy(db); // batch size of 20 and decaying factor of 2 ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 20, 2, 0); // there should be 1 call to create partitions with batch sizes of 10 Mockito.verify(spyDb, Mockito.times(1)).createPartitions(Mockito.anyObject()); ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb).createPartitions(argument.capture()); Assert.assertEquals("Unexpected number of batch size", 10, argument.getValue().getPartitionCount()); } /** * Tests the number of calls to createPartitions and the respective batch sizes when first call to * createPartitions throws HiveException. The batch size should be reduced by the decayingFactor * * @throws Exception */ @Test public void testBatchingWhenException() throws Exception { // create 13 dummy partitions Set<PartitionResult> partsNotInMs = createPartsNotInMs(23); Hive spyDb = Mockito.spy(db); // first call to createPartitions should throw exception Mockito.doThrow(HiveException.class).doCallRealMethod().doCallRealMethod().when(spyDb) .createPartitions(Mockito.any(AddPartitionDesc.class)); // test with a batch size of 30 and decaying factor of 2 ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 0); // confirm the batch sizes were 23, 15, 8 in the three calls to create partitions ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); // there should be 3 calls to create partitions with batch sizes of 23, 15, 8 Mockito.verify(spyDb, Mockito.times(3)).createPartitions(argument.capture()); List<AddPartitionDesc> apds = argument.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 23, apds.get(0).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 15, apds.get(1).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 8, apds.get(2).getPartitionCount()); } /** * Tests the retries exhausted case when Hive.createPartitions method call always keep throwing * HiveException. The batch sizes should exponentially decreased based on the decaying factor and * ultimately give up when it reaches 0 * * @throws Exception */ @Test public void testRetriesExhaustedBatchSize() throws Exception { Set<PartitionResult> partsNotInMs = createPartsNotInMs(17); Hive spyDb = Mockito.spy(db); Mockito.doThrow(HiveException.class).when(spyDb) .createPartitions(Mockito.any(AddPartitionDesc.class)); // batch size of 5 and decaying factor of 2 Exception ex = null; try { ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 0); } catch (Exception retryEx) { ex = retryEx; } Assert.assertFalse("Exception was expected but was not thrown", ex == null); Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); // there should be 5 calls to create partitions with batch sizes of 17, 15, 7, 3, 1 ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb, Mockito.times(5)).createPartitions(argument.capture()); List<AddPartitionDesc> apds = argument.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 17, apds.get(0).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 15, apds.get(1).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 7, apds.get(2).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 3, apds.get(3).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 1, apds.get(4).getPartitionCount()); } /** * Tests the maximum retry attempts provided by configuration * @throws Exception */ @Test public void testMaxRetriesReached() throws Exception { Set<PartitionResult> partsNotInMs = createPartsNotInMs(17); Hive spyDb = Mockito.spy(db); Mockito.doThrow(HiveException.class).when(spyDb) .createPartitions(Mockito.any(AddPartitionDesc.class)); // batch size of 5 and decaying factor of 2 Exception ex = null; try { ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 2); } catch (Exception retryEx) { ex = retryEx; } Assert.assertFalse("Exception was expected but was not thrown", ex == null); Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb, Mockito.times(2)).createPartitions(argument.capture()); List<AddPartitionDesc> apds = argument.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 17, apds.get(0).getPartitionCount()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 15, apds.get(1).getPartitionCount()); } /** * Tests when max number of retries is set to 1. In this case the number of retries should * be specified * @throws Exception */ @Test public void testOneMaxRetries() throws Exception { Set<PartitionResult> partsNotInMs = createPartsNotInMs(17); Hive spyDb = Mockito.spy(db); Mockito.doThrow(HiveException.class).when(spyDb) .createPartitions(Mockito.any(AddPartitionDesc.class)); // batch size of 5 and decaying factor of 2 Exception ex = null; try { ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 1); } catch (Exception retryEx) { ex = retryEx; } Assert.assertFalse("Exception was expected but was not thrown", ex == null); Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); // there should be 5 calls to create partitions with batch sizes of 17, 15, 7, 3, 1 ArgumentCaptor<AddPartitionDesc> argument = ArgumentCaptor.forClass(AddPartitionDesc.class); Mockito.verify(spyDb, Mockito.times(1)).createPartitions(argument.capture()); List<AddPartitionDesc> apds = argument.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 17, apds.get(0).getPartitionCount()); } }