MasterMetaBootstrap.java example

Explorer
hbase-master
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;

/**
 * Used by the HMaster on startup to split meta logs and assign the meta table.
 */
@InterfaceAudience.Private
public class MasterMetaBootstrap {
  private static final Log LOG = LogFactory.getLog(MasterMetaBootstrap.class);

  private final MonitoredTask status;
  private final HMaster master;

  private Set<ServerName> previouslyFailedServers;
  private Set<ServerName> previouslyFailedMetaRSs;

  public MasterMetaBootstrap(final HMaster master, final MonitoredTask status) {
    this.master = master;
    this.status = status;
  }

  public void splitMetaLogsBeforeAssignment() throws IOException, KeeperException {
    // get a list for previously failed RS which need log splitting work
    // we recover hbase:meta region servers inside master initialization and
    // handle other failed servers in SSH in order to start up master node ASAP
    previouslyFailedServers = master.getMasterWalManager().getFailedServersFromLogFolders();

    // log splitting for hbase:meta server
    ServerName oldMetaServerLocation = master.getMetaTableLocator()
        .getMetaRegionLocation(master.getZooKeeper());
    if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
      splitMetaLogBeforeAssignment(oldMetaServerLocation);
      // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
      // may also host user regions
    }
    previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
    // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
    // instead of previouslyFailedMetaRSs alone to address the following two situations:
    // 1) the chained failure situation(recovery failed multiple times in a row).
    // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
    // same server still has non-meta wals to be replayed so that
    // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
    // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
    // there is no op for the server.
    previouslyFailedMetaRSs.addAll(previouslyFailedServers);
  }

  public void assignMeta() throws InterruptedException, IOException, KeeperException {
    assignMeta(previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID);
  }

  public void processDeadServers() throws IOException {
    // Master has recovered hbase:meta region server and we put
    // other failed region servers in a queue to be handled later by SSH
    for (ServerName tmpServer : previouslyFailedServers) {
      master.getServerManager().processDeadServer(tmpServer, true);
    }
  }

  public void assignMetaReplicas()
      throws IOException, InterruptedException, KeeperException {
    int numReplicas = master.getConfiguration().getInt(HConstants.META_REPLICAS_NUM,
           HConstants.DEFAULT_META_REPLICA_NUM);
    final Set<ServerName> EMPTY_SET = new HashSet<>();
    for (int i = 1; i < numReplicas; i++) {
      assignMeta(EMPTY_SET, i);
    }
    unassignExcessMetaReplica(numReplicas);
  }

  private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
    if (RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode()) {
      // In log replay mode, we mark hbase:meta region as recovering in ZK
      master.getMasterWalManager().prepareLogReplay(currentMetaServer,
        Collections.<HRegionInfo>singleton(HRegionInfo.FIRST_META_REGIONINFO));
    } else {
      // In recovered.edits mode: create recovered edits file for hbase:meta server
      master.getMasterWalManager().splitMetaLog(currentMetaServer);
    }
  }

  private void unassignExcessMetaReplica(int numMetaReplicasConfigured) {
    final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
    // unassign the unneeded replicas (for e.g., if the previous master was configured
    // with a replication of 3 and now it is 2, we need to unassign the 1 unneeded replica)
    try {
      List<String> metaReplicaZnodes = zooKeeper.getMetaReplicaNodes();
      for (String metaReplicaZnode : metaReplicaZnodes) {
        int replicaId = zooKeeper.znodePaths.getMetaReplicaIdFromZnode(metaReplicaZnode);
        if (replicaId >= numMetaReplicasConfigured) {
          RegionState r = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId);
          LOG.info("Closing excess replica of meta region " + r.getRegion());
          // send a close and wait for a max of 30 seconds
          ServerManager.closeRegionSilentlyAndWait(master.getClusterConnection(),
              r.getServerName(), r.getRegion(), 30000);
          ZKUtil.deleteNode(zooKeeper, zooKeeper.znodePaths.getZNodeForReplica(replicaId));
        }
      }
    } catch (Exception ex) {
      // ignore the exception since we don't want the master to be wedged due to potential
      // issues in the cleanup of the extra regions. We can do that cleanup via hbck or manually
      LOG.warn("Ignoring exception " + ex);
    }
  }

  /**
   * Check <code>hbase:meta</code> is assigned. If not, assign it.
   */
  protected void assignMeta(Set<ServerName> previouslyFailedMetaRSs, int replicaId)
      throws InterruptedException, IOException, KeeperException {
    final AssignmentManager assignmentManager = master.getAssignmentManager();

    // Work on meta region
    int assigned = 0;
    long timeout = master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
      status.setStatus("Assigning hbase:meta region");
    } else {
      status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
    }

    // Get current meta state from zk.
    RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
    HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO,
        replicaId);
    RegionStates regionStates = assignmentManager.getRegionStates();
    regionStates.createRegionState(hri, metaState.getState(),
        metaState.getServerName(), null);

    if (!metaState.isOpened() || !master.getMetaTableLocator().verifyMetaRegionLocation(
        master.getClusterConnection(), master.getZooKeeper(), timeout, replicaId)) {
      ServerName currentMetaServer = metaState.getServerName();
      if (master.getServerManager().isServerOnline(currentMetaServer)) {
        if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
          LOG.info("Meta was in transition on " + currentMetaServer);
        } else {
          LOG.info("Meta with replicaId " + replicaId + " was in transition on " +
                    currentMetaServer);
        }
        assignmentManager.processRegionsInTransition(Collections.singletonList(metaState));
      } else {
        if (currentMetaServer != null) {
          if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
            splitMetaLogBeforeAssignment(currentMetaServer);
            regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
            previouslyFailedMetaRSs.add(currentMetaServer);
          }
        }
        LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId +
            " it was on " + currentMetaServer);
        assignmentManager.assignMeta(hri);
      }
      assigned++;
    }

    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
      // TODO: should we prevent from using state manager before meta was initialized?
      // tableStateManager.start();
      master.getTableStateManager()
        .setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
    }

    if ((RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode())
        && (!previouslyFailedMetaRSs.isEmpty())) {
      // replay WAL edits mode need new hbase:meta RS is assigned firstly
      status.setStatus("replaying log for Meta Region");
      master.getMasterWalManager().splitMetaLog(previouslyFailedMetaRSs);
    }

    assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
    master.getTableStateManager().start();

    // Make sure a hbase:meta location is set. We need to enable SSH here since
    // if the meta region server is died at this time, we need it to be re-assigned
    // by SSH so that system tables can be assigned.
    // No need to wait for meta is assigned = 0 when meta is just verified.
    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableCrashedServerProcessing(assigned != 0);
    LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location="
      + master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), replicaId));
    status.setStatus("META assigned.");
  }

  private void enableCrashedServerProcessing(final boolean waitForMeta)
      throws IOException, InterruptedException {
    // If crashed server processing is disabled, we enable it and expire those dead but not expired
    // servers. This is required so that if meta is assigning to a server which dies after
    // assignMeta starts assignment, ServerCrashProcedure can re-assign it. Otherwise, we will be
    // stuck here waiting forever if waitForMeta is specified.
    if (!master.isServerCrashProcessingEnabled()) {
      master.setServerCrashProcessingEnabled(true);
      master.getServerManager().processQueuedDeadServers();
    }

    if (waitForMeta) {
      master.getMetaTableLocator().waitMetaRegionLocation(master.getZooKeeper());
    }
  }

  /**
   * This function returns a set of region server names under hbase:meta recovering region ZK node
   * @return Set of meta server names which were recorded in ZK
   */
  private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
    final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
    Set<ServerName> result = new HashSet<>();
    String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.znodePaths.recoveringRegionsZNode,
      HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
    List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
    if (regionFailedServers == null) return result;

    for (String failedServer : regionFailedServers) {
      ServerName server = ServerName.parseServerName(failedServer);
      result.add(server);
    }
    return result;
  }
}