TestMasterFailover.java example

Explorer
hbase-master
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.testclassification.FlakeyTests;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.junit.Test;
import org.junit.experimental.categories.Category;

@Category({FlakeyTests.class, LargeTests.class})
public class TestMasterFailover {
  private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);

  HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
      final HTableDescriptor htd)
  throws IOException {
    HRegion r = HBaseTestingUtility.createRegionAndWAL(hri, rootdir, c, htd);
    // The above call to create a region will create an wal file.  Each
    // log file create will also create a running thread to do syncing.  We need
    // to close out this log else we will have a running thread trying to sync
    // the file system continuously which is ugly when dfs is taken away at the
    // end of the test.
    HBaseTestingUtility.closeRegionAndWAL(r);
    return r;
  }

  // TODO: Next test to add is with testing permutations of the RIT or the RS
  //       killed are hosting ROOT and hbase:meta regions.

  private void log(String string) {
    LOG.info("\n\n" + string + " \n\n");
  }

  /**
   * Simple test of master failover.
   * <p>
   * Starts with three masters.  Kills a backup master.  Then kills the active
   * master.  Ensures the final master becomes active and we can still contact
   * the cluster.
   * @throws Exception
   */
  @Test (timeout=240000)
  public void testSimpleMasterFailover() throws Exception {

    final int NUM_MASTERS = 3;
    final int NUM_RS = 3;

    // Start the cluster
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();

    // get all the master threads
    List<MasterThread> masterThreads = cluster.getMasterThreads();

    // wait for each to come online
    for (MasterThread mt : masterThreads) {
      assertTrue(mt.isAlive());
    }

    // verify only one is the active master and we have right number
    int numActive = 0;
    int activeIndex = -1;
    ServerName activeName = null;
    HMaster active = null;
    for (int i = 0; i < masterThreads.size(); i++) {
      if (masterThreads.get(i).getMaster().isActiveMaster()) {
        numActive++;
        activeIndex = i;
        active = masterThreads.get(activeIndex).getMaster();
        activeName = active.getServerName();
      }
    }
    assertEquals(1, numActive);
    assertEquals(NUM_MASTERS, masterThreads.size());
    LOG.info("Active master " + activeName);

    // Check that ClusterStatus reports the correct active and backup masters
    assertNotNull(active);
    ClusterStatus status = active.getClusterStatus();
    assertTrue(status.getMaster().equals(activeName));
    assertEquals(2, status.getBackupMastersSize());
    assertEquals(2, status.getBackupMasters().size());

    // attempt to stop one of the inactive masters
    int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
    HMaster master = cluster.getMaster(backupIndex);
    LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
    cluster.stopMaster(backupIndex, false);
    cluster.waitOnMaster(backupIndex);

    // Verify still one active master and it's the same
    for (int i = 0; i < masterThreads.size(); i++) {
      if (masterThreads.get(i).getMaster().isActiveMaster()) {
        assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
        activeIndex = i;
        active = masterThreads.get(activeIndex).getMaster();
      }
    }
    assertEquals(1, numActive);
    assertEquals(2, masterThreads.size());
    int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
    LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
    assertEquals(4, rsCount);

    // Check that ClusterStatus reports the correct active and backup masters
    assertNotNull(active);
    status = active.getClusterStatus();
    assertTrue(status.getMaster().equals(activeName));
    assertEquals(1, status.getBackupMastersSize());
    assertEquals(1, status.getBackupMasters().size());

    // kill the active master
    LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
    cluster.stopMaster(activeIndex, false);
    cluster.waitOnMaster(activeIndex);

    // wait for an active master to show up and be ready
    assertTrue(cluster.waitForActiveAndReadyMaster());

    LOG.debug("\n\nVerifying backup master is now active\n");
    // should only have one master now
    assertEquals(1, masterThreads.size());

    // and he should be active
    active = masterThreads.get(0).getMaster();
    assertNotNull(active);
    status = active.getClusterStatus();
    ServerName mastername = status.getMaster();
    assertTrue(mastername.equals(active.getServerName()));
    assertTrue(active.isActiveMaster());
    assertEquals(0, status.getBackupMastersSize());
    assertEquals(0, status.getBackupMasters().size());
    int rss = status.getServersSize();
    LOG.info("Active master " + mastername.getServerName() + " managing " +
      rss +  " region servers");
    assertEquals(4, rss);

    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
  }

  /**
   * Test region in pending_open/close when master failover
   */
  @Test (timeout=180000)
  public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
    final int NUM_MASTERS = 1;
    final int NUM_RS = 1;

    // Create config to use for this cluster
    Configuration conf = HBaseConfiguration.create();

    // Start the cluster
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Cluster started");

    // get all the master threads
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    assertEquals(1, masterThreads.size());

    // only one master thread, let's wait for it to be initialized
    assertTrue(cluster.waitForActiveAndReadyMaster());
    HMaster master = masterThreads.get(0).getMaster();
    assertTrue(master.isActiveMaster());
    assertTrue(master.isInitialized());

    // Create a table with a region online
    Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
    onlineTable.close();
    // Create a table in META, so it has a region offline
    HTableDescriptor offlineTable = new HTableDescriptor(
      TableName.valueOf(Bytes.toBytes("offlineTable")));
    offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));

    FileSystem filesystem = FileSystem.get(conf);
    Path rootdir = FSUtils.getRootDir(conf);
    FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
    fstd.createTableDescriptor(offlineTable);

    HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
    createRegion(hriOffline, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);

    log("Regions in hbase:meta and namespace have been created");

    // at this point we only expect 3 regions to be assigned out
    // (catalogs and namespace, + 1 online region)
    assertEquals(3, cluster.countServedRegions());
    HRegionInfo hriOnline = null;
    try (RegionLocator locator =
        TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
      hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
    }
    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
    RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();

    // Put the online region in pending_close. It is actually already opened.
    // This is to simulate that the region close RPC is not sent out before failover
    RegionState oldState = regionStates.getRegionState(hriOnline);
    RegionState newState = new RegionState(
      hriOnline, State.PENDING_CLOSE, oldState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

    // Put the offline region in pending_open. It is actually not opened yet.
    // This is to simulate that the region open RPC is not sent out before failover
    oldState = new RegionState(hriOffline, State.OFFLINE);
    newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

    HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
    createRegion(failedClose, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);

    oldState = new RegionState(failedClose, State.PENDING_CLOSE);
    newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

    HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
    createRegion(failedOpen, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);

    // Simulate a region transitioning to failed open when the region server reports the
    // transition as FAILED_OPEN
    oldState = new RegionState(failedOpen, State.PENDING_OPEN);
    newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

    HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
    LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
    createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);

    // Simulate a region transitioning to failed open when the master couldn't find a plan for
    // the region
    oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
    newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

    // Stop the master
    log("Aborting master");
    cluster.abortMaster(0);
    cluster.waitOnMaster(0);
    log("Master has aborted");

    // Start up a new master
    log("Starting up a new master");
    master = cluster.startMaster().getMaster();
    log("Waiting for master to be ready");
    cluster.waitForActiveAndReadyMaster();
    log("Master is ready");

    // Wait till no region in transition any more
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);

    // Get new region states since master restarted
    regionStates = master.getAssignmentManager().getRegionStates();

    // Both pending_open (RPC sent/not yet) regions should be online
    assertTrue(regionStates.isRegionOnline(hriOffline));
    assertTrue(regionStates.isRegionOnline(hriOnline));
    assertTrue(regionStates.isRegionOnline(failedClose));
    assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
    assertTrue(regionStates.isRegionOnline(failedOpen));

    log("Done with verification, shutting down cluster");

    // Done, shutdown the cluster
    TEST_UTIL.shutdownMiniCluster();
  }

  /**
   * Test meta in transition when master failover
   */
  @Test(timeout = 180000)
  public void testMetaInTransitionWhenMasterFailover() throws Exception {
    final int NUM_MASTERS = 1;
    final int NUM_RS = 1;

    // Start the cluster
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Cluster started");

    log("Moving meta off the master");
    HMaster activeMaster = cluster.getMaster();
    HRegionServer rs = cluster.getRegionServer(0);
    ServerName metaServerName = cluster.getLiveRegionServerThreads()
      .get(0).getRegionServer().getServerName();
    activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
      Bytes.toBytes(metaServerName.getServerName()));
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    assertEquals("Meta should be assigned on expected regionserver",
      metaServerName, activeMaster.getMetaTableLocator()
        .getMetaRegionLocation(activeMaster.getZooKeeper()));

    // Now kill master, meta should remain on rs, where we placed it before.
    log("Aborting master");
    activeMaster.abort("test-kill");
    cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
    log("Master has aborted");

    // meta should remain where it was
    RegionState metaState =
      MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
    assertEquals("hbase:meta should be onlined on RS",
      metaState.getServerName(), rs.getServerName());
    assertEquals("hbase:meta should be onlined on RS",
      metaState.getState(), State.OPEN);

    // Start up a new master
    log("Starting up a new master");
    activeMaster = cluster.startMaster().getMaster();
    log("Waiting for master to be ready");
    cluster.waitForActiveAndReadyMaster();
    log("Master is ready");

    // ensure meta is still deployed on RS
    metaState =
      MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
    assertEquals("hbase:meta should be onlined on RS",
      metaState.getServerName(), rs.getServerName());
    assertEquals("hbase:meta should be onlined on RS",
      metaState.getState(), State.OPEN);

    // Update meta state as PENDING_OPEN, then kill master
    // that simulates, that RS successfully deployed, but
    // RPC was lost right before failure.
    // region server should expire (how it can be verified?)
    MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
      rs.getServerName(), State.PENDING_OPEN);
    Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
    rs.removeFromOnlineRegions(meta, null);
    ((HRegion)meta).close();

    log("Aborting master");
    activeMaster.abort("test-kill");
    cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
    log("Master has aborted");

    // Start up a new master
    log("Starting up a new master");
    activeMaster = cluster.startMaster().getMaster();
    log("Waiting for master to be ready");
    cluster.waitForActiveAndReadyMaster();
    log("Master is ready");

    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Meta was assigned");

    metaState =
      MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
    assertEquals("hbase:meta should be onlined on RS",
      metaState.getServerName(), rs.getServerName());
    assertEquals("hbase:meta should be onlined on RS",
      metaState.getState(), State.OPEN);

    // Update meta state as PENDING_CLOSE, then kill master
    // that simulates, that RS successfully deployed, but
    // RPC was lost right before failure.
    // region server should expire (how it can be verified?)
    MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
      rs.getServerName(), State.PENDING_CLOSE);

    log("Aborting master");
    activeMaster.abort("test-kill");
    cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
    log("Master has aborted");

    rs.getRSRpcServices().closeRegion(null, ProtobufUtil.buildCloseRegionRequest(
      rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()));

    // Start up a new master
    log("Starting up a new master");
    activeMaster = cluster.startMaster().getMaster();
    log("Waiting for master to be ready");
    cluster.waitForActiveAndReadyMaster();
    log("Master is ready");

    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Meta was assigned");

    // Done, shutdown the cluster
    TEST_UTIL.shutdownMiniCluster();
  }
}