/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.master.procedure; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DisableTableState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.EnableTableState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateTableState; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.ModifyRegionUtils; import org.junit.After; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestRule; @Category({MasterTests.class, LargeTests.class}) public class TestMasterFailoverWithProcedures { private static final Log LOG = LogFactory.getLog(TestMasterFailoverWithProcedures.class); @ClassRule public static final TestRule timeout = CategoryBasedTimeout.forClass(TestMasterFailoverWithProcedures.class); protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); private static void setupConf(Configuration conf) { // don't waste time retrying with the roll, the test is already slow enough. conf.setInt(WALProcedureStore.MAX_RETRIES_BEFORE_ROLL_CONF_KEY, 1); conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 0); conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 1); conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 1); conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); } @Before public void setup() throws Exception { setupConf(UTIL.getConfiguration()); UTIL.startMiniCluster(2, 1); final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, false); ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, false); } @After public void tearDown() throws Exception { try { UTIL.shutdownMiniCluster(); } catch (Exception e) { LOG.warn("failure shutting down cluster", e); } } // ========================================================================== // Test Create Table // ========================================================================== @Test public void testCreateWithFailover() throws Exception { // TODO: Should we try every step? (master failover takes long time) // It is already covered by TestCreateTableProcedure // but without the master restart, only the executor/store is restarted. // Without Master restart we may not find bug in the procedure code // like missing "wait" for resources to be available (e.g. RS) testCreateWithFailoverAtStep(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS.ordinal()); } private void testCreateWithFailoverAtStep(final int step) throws Exception { final TableName tableName = TableName.valueOf("testCreateWithFailoverAtStep" + step); // create the table ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true); ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true); // Start the Create procedure && kill the executor byte[][] splitKeys = null; HTableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2"); HRegionInfo[] regions = ModifyRegionUtils.createHRegionInfos(htd, splitKeys); long procId = procExec.submitProcedure( new CreateTableProcedure(procExec.getEnvironment(), htd, regions)); testRecoveryAndDoubleExecution(UTIL, procId, step, CreateTableState.values()); MasterProcedureTestingUtility.validateTableCreation( UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2"); } // ========================================================================== // Test Delete Table // ========================================================================== @Test public void testDeleteWithFailover() throws Exception { // TODO: Should we try every step? (master failover takes long time) // It is already covered by TestDeleteTableProcedure // but without the master restart, only the executor/store is restarted. // Without Master restart we may not find bug in the procedure code // like missing "wait" for resources to be available (e.g. RS) testDeleteWithFailoverAtStep(DeleteTableState.DELETE_TABLE_UNASSIGN_REGIONS.ordinal()); } private void testDeleteWithFailoverAtStep(final int step) throws Exception { final TableName tableName = TableName.valueOf("testDeleteWithFailoverAtStep" + step); // create the table byte[][] splitKeys = null; HRegionInfo[] regions = MasterProcedureTestingUtility.createTable( getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); Path tableDir = FSUtils.getTableDir(getRootDir(), tableName); MasterProcedureTestingUtility.validateTableCreation( UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2"); UTIL.getAdmin().disableTable(tableName); ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true); ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true); // Start the Delete procedure && kill the executor long procId = procExec.submitProcedure( new DeleteTableProcedure(procExec.getEnvironment(), tableName)); testRecoveryAndDoubleExecution(UTIL, procId, step, DeleteTableState.values()); MasterProcedureTestingUtility.validateTableDeletion( UTIL.getHBaseCluster().getMaster(), tableName); } // ========================================================================== // Test Truncate Table // ========================================================================== @Test public void testTruncateWithFailover() throws Exception { // TODO: Should we try every step? (master failover takes long time) // It is already covered by TestTruncateTableProcedure // but without the master restart, only the executor/store is restarted. // Without Master restart we may not find bug in the procedure code // like missing "wait" for resources to be available (e.g. RS) testTruncateWithFailoverAtStep(true, TruncateTableState.TRUNCATE_TABLE_ADD_TO_META.ordinal()); } private void testTruncateWithFailoverAtStep(final boolean preserveSplits, final int step) throws Exception { final TableName tableName = TableName.valueOf("testTruncateWithFailoverAtStep" + step); // create the table final String[] families = new String[] { "f1", "f2" }; final byte[][] splitKeys = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") }; HRegionInfo[] regions = MasterProcedureTestingUtility.createTable( getMasterProcedureExecutor(), tableName, splitKeys, families); // load and verify that there are rows in the table MasterProcedureTestingUtility.loadData( UTIL.getConnection(), tableName, 100, splitKeys, families); assertEquals(100, UTIL.countRows(tableName)); // disable the table UTIL.getAdmin().disableTable(tableName); ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); // Start the Truncate procedure && kill the executor long procId = procExec.submitProcedure( new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits)); testRecoveryAndDoubleExecution(UTIL, procId, step, TruncateTableState.values()); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); UTIL.waitUntilAllRegionsAssigned(tableName); // validate the table regions and layout regions = UTIL.getAdmin().getTableRegions(tableName).toArray(new HRegionInfo[0]); if (preserveSplits) { assertEquals(1 + splitKeys.length, regions.length); } else { assertEquals(1, regions.length); } MasterProcedureTestingUtility.validateTableCreation( UTIL.getHBaseCluster().getMaster(), tableName, regions, families); // verify that there are no rows in the table assertEquals(0, UTIL.countRows(tableName)); // verify that the table is read/writable MasterProcedureTestingUtility.loadData( UTIL.getConnection(), tableName, 50, splitKeys, families); assertEquals(50, UTIL.countRows(tableName)); } // ========================================================================== // Test Disable Table // ========================================================================== @Test public void testDisableTableWithFailover() throws Exception { // TODO: Should we try every step? (master failover takes long time) // It is already covered by TestDisableTableProcedure // but without the master restart, only the executor/store is restarted. // Without Master restart we may not find bug in the procedure code // like missing "wait" for resources to be available (e.g. RS) testDisableTableWithFailoverAtStep( DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE.ordinal()); } private void testDisableTableWithFailoverAtStep(final int step) throws Exception { final TableName tableName = TableName.valueOf("testDisableTableWithFailoverAtStep" + step); // create the table final byte[][] splitKeys = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") }; MasterProcedureTestingUtility.createTable( getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); // Start the Delete procedure && kill the executor long procId = procExec.submitProcedure( new DisableTableProcedure(procExec.getEnvironment(), tableName, false)); testRecoveryAndDoubleExecution(UTIL, procId, step, DisableTableState.values()); MasterProcedureTestingUtility.validateTableIsDisabled( UTIL.getHBaseCluster().getMaster(), tableName); } // ========================================================================== // Test Enable Table // ========================================================================== @Test public void testEnableTableWithFailover() throws Exception { // TODO: Should we try every step? (master failover takes long time) // It is already covered by TestEnableTableProcedure // but without the master restart, only the executor/store is restarted. // Without Master restart we may not find bug in the procedure code // like missing "wait" for resources to be available (e.g. RS) testEnableTableWithFailoverAtStep( EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE.ordinal()); } private void testEnableTableWithFailoverAtStep(final int step) throws Exception { final TableName tableName = TableName.valueOf("testEnableTableWithFailoverAtStep" + step); // create the table final byte[][] splitKeys = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") }; MasterProcedureTestingUtility.createTable( getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2"); UTIL.getAdmin().disableTable(tableName); ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); // Start the Delete procedure && kill the executor long procId = procExec.submitProcedure( new EnableTableProcedure(procExec.getEnvironment(), tableName, false)); testRecoveryAndDoubleExecution(UTIL, procId, step, EnableTableState.values()); MasterProcedureTestingUtility.validateTableIsEnabled( UTIL.getHBaseCluster().getMaster(), tableName); } // ========================================================================== // Test Helpers // ========================================================================== public static <TState> void testRecoveryAndDoubleExecution(final HBaseTestingUtility testUtil, final long procId, final int lastStepBeforeFailover, TState[] states) throws Exception { ProcedureExecutor<MasterProcedureEnv> procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor(); ProcedureTestingUtility.waitProcedure(procExec, procId); for (int i = 0; i < lastStepBeforeFailover; ++i) { LOG.info("Restart "+ i +" exec state: " + states[i]); ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); ProcedureTestingUtility.restart(procExec); ProcedureTestingUtility.waitProcedure(procExec, procId); } ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); LOG.info("Trigger master failover"); MasterProcedureTestingUtility.masterFailover(testUtil); procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor(); ProcedureTestingUtility.waitProcedure(procExec, procId); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); } // ========================================================================== // Helpers // ========================================================================== private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } private Path getRootDir() { return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); } }