/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.master; import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Random; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.CoordinatedStateException; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.RegionStateListener; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.favored.FavoredNodesPromoter; import org.apache.hadoop.hbase.ipc.FailedServerException; import org.apache.hadoop.hbase.ipc.RpcClient; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; import org.apache.hadoop.hbase.quotas.QuotaExceededException; import org.apache.hadoop.hbase.regionserver.RegionOpeningState; import org.apache.hadoop.hbase.regionserver.RegionServerAbortedException; import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.KeyLocker; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.PairOfSameType; import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StringUtils; import org.apache.zookeeper.KeeperException; /** * Manages and performs region assignment. * Related communications with regionserver are all done over RPC. */ @InterfaceAudience.Private public class AssignmentManager { private static final Log LOG = LogFactory.getLog(AssignmentManager.class); protected final MasterServices server; private ServerManager serverManager; private boolean shouldAssignRegionsWithFavoredNodes; private LoadBalancer balancer; private final MetricsAssignmentManager metricsAssignmentManager; private AtomicInteger numRegionsOpened = new AtomicInteger(0); final private KeyLocker<String> locker = new KeyLocker<>(); Set<HRegionInfo> replicasToClose = Collections.synchronizedSet(new HashSet<HRegionInfo>()); /** * Map of regions to reopen after the schema of a table is changed. Key - * encoded region name, value - HRegionInfo */ private final Map <String, HRegionInfo> regionsToReopen; /* * Maximum times we recurse an assignment/unassignment. * See below in {@link #assign()} and {@link #unassign()}. */ private final int maximumAttempts; /** * The sleep time for which the assignment will wait before retrying in case of * hbase:meta assignment failure due to lack of availability of region plan or bad region plan */ private final long sleepTimeBeforeRetryingMetaAssignment; /** Plans for region movement. Key is the encoded version of a region name*/ // TODO: When do plans get cleaned out? Ever? In server open and in server // shutdown processing -- St.Ack // All access to this Map must be synchronized. final NavigableMap<String, RegionPlan> regionPlans = new TreeMap<>(); private final TableStateManager tableStateManager; private final ExecutorService executorService; private java.util.concurrent.ExecutorService threadPoolExecutorService; private ScheduledThreadPoolExecutor scheduledThreadPoolExecutor; private final RegionStates regionStates; // The threshold to use bulk assigning. Using bulk assignment // only if assigning at least this many regions to at least this // many servers. If assigning fewer regions to fewer servers, // bulk assigning may be not as efficient. private final int bulkAssignThresholdRegions; private final int bulkAssignThresholdServers; private final int bulkPerRegionOpenTimeGuesstimate; // Should bulk assignment wait till all regions are assigned, // or it is timed out? This is useful to measure bulk assignment // performance, but not needed in most use cases. private final boolean bulkAssignWaitTillAllAssigned; /** * Indicator that AssignmentManager has recovered the region states so * that ServerShutdownHandler can be fully enabled and re-assign regions * of dead servers. So that when re-assignment happens, AssignmentManager * has proper region states. * * Protected to ease testing. */ protected final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false); /** * A map to track the count a region fails to open in a row. * So that we don't try to open a region forever if the failure is * unrecoverable. We don't put this information in region states * because we don't expect this to happen frequently; we don't * want to copy this information over during each state transition either. */ private final ConcurrentHashMap<String, AtomicInteger> failedOpenTracker = new ConcurrentHashMap<>(); // In case not using ZK for region assignment, region states // are persisted in meta with a state store private final RegionStateStore regionStateStore; /** * For testing only! Set to true to skip handling of split. */ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MS_SHOULD_BE_FINAL") public static boolean TEST_SKIP_SPLIT_HANDLING = false; /** Listeners that are called on assignment events. */ private List<AssignmentListener> listeners = new CopyOnWriteArrayList<>(); private RegionStateListener regionStateListener; private RetryCounter.BackoffPolicy backoffPolicy; private RetryCounter.RetryConfig retryConfig; /** * Constructs a new assignment manager. * * @param server instance of HMaster this AM running inside * @param serverManager serverManager for associated HMaster * @param balancer implementation of {@link LoadBalancer} * @param service Executor service * @param metricsMaster metrics manager * @throws IOException */ public AssignmentManager(MasterServices server, ServerManager serverManager, final LoadBalancer balancer, final ExecutorService service, MetricsMaster metricsMaster, final TableStateManager tableStateManager) throws IOException { this.server = server; this.serverManager = serverManager; this.executorService = service; this.regionStateStore = new RegionStateStore(server); this.regionsToReopen = Collections.synchronizedMap (new HashMap<String, HRegionInfo> ()); Configuration conf = server.getConfiguration(); this.tableStateManager = tableStateManager; // This is the max attempts, not retries, so it should be at least 1. this.maximumAttempts = Math.max(1, this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10)); this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong( "hbase.meta.assignment.retry.sleeptime", 1000l); this.balancer = balancer; // Only read favored nodes if using the favored nodes load balancer. this.shouldAssignRegionsWithFavoredNodes = this.balancer instanceof FavoredNodesPromoter; int maxThreads = conf.getInt("hbase.assignment.threads.max", 30); this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool( maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM.")); this.scheduledThreadPoolExecutor = new ScheduledThreadPoolExecutor(1, Threads.newDaemonThreadFactory("AM.Scheduler")); this.regionStates = new RegionStates( server, tableStateManager, serverManager, regionStateStore); this.bulkAssignWaitTillAllAssigned = conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false); this.bulkAssignThresholdRegions = conf.getInt("hbase.bulk.assignment.threshold.regions", 7); this.bulkAssignThresholdServers = conf.getInt("hbase.bulk.assignment.threshold.servers", 3); this.bulkPerRegionOpenTimeGuesstimate = conf.getInt("hbase.bulk.assignment.perregion.open.time", 10000); this.metricsAssignmentManager = new MetricsAssignmentManager(); // Configurations for retrying opening a region on receiving a FAILED_OPEN this.retryConfig = new RetryCounter.RetryConfig(); this.retryConfig.setSleepInterval(conf.getLong("hbase.assignment.retry.sleep.initial", 0l)); // Set the max time limit to the initial sleep interval so we use a constant time sleep strategy // if the user does not set a max sleep limit this.retryConfig.setMaxSleepTime(conf.getLong("hbase.assignment.retry.sleep.max", retryConfig.getSleepInterval())); this.backoffPolicy = getBackoffPolicy(); } /** * Returns the backoff policy used for Failed Region Open retries * @return the backoff policy used for Failed Region Open retries */ RetryCounter.BackoffPolicy getBackoffPolicy() { return new RetryCounter.ExponentialBackoffPolicyWithLimit(); } MetricsAssignmentManager getAssignmentManagerMetrics() { return this.metricsAssignmentManager; } /** * Add the listener to the notification list. * @param listener The AssignmentListener to register */ public void registerListener(final AssignmentListener listener) { this.listeners.add(listener); } /** * Remove the listener from the notification list. * @param listener The AssignmentListener to unregister */ public boolean unregisterListener(final AssignmentListener listener) { return this.listeners.remove(listener); } /** * @return Instance of ZKTableStateManager. */ public TableStateManager getTableStateManager() { // These are 'expensive' to make involving trip to zk ensemble so allow // sharing. return this.tableStateManager; } /** * This SHOULD not be public. It is public now * because of some unit tests. * * TODO: make it package private and keep RegionStates in the master package */ public RegionStates getRegionStates() { return regionStates; } /** * Used in some tests to mock up region state in meta */ @VisibleForTesting RegionStateStore getRegionStateStore() { return regionStateStore; } public RegionPlan getRegionReopenPlan(HRegionInfo hri) { return new RegionPlan(hri, null, regionStates.getRegionServerOfRegion(hri)); } /** * Add a regionPlan for the specified region. * @param encodedName * @param plan */ public void addPlan(String encodedName, RegionPlan plan) { synchronized (regionPlans) { regionPlans.put(encodedName, plan); } } /** * Add a map of region plans. */ public void addPlans(Map<String, RegionPlan> plans) { synchronized (regionPlans) { regionPlans.putAll(plans); } } /** * Set the list of regions that will be reopened * because of an update in table schema * * @param regions * list of regions that should be tracked for reopen */ public void setRegionsToReopen(List <HRegionInfo> regions) { for(HRegionInfo hri : regions) { regionsToReopen.put(hri.getEncodedName(), hri); } } /** * Used by the client to identify if all regions have the schema updates * * @param tableName * @return Pair indicating the status of the alter command * @throws IOException */ public Pair<Integer, Integer> getReopenStatus(TableName tableName) throws IOException { List<HRegionInfo> hris; if (TableName.META_TABLE_NAME.equals(tableName)) { hris = new MetaTableLocator().getMetaRegions(server.getZooKeeper()); } else { hris = MetaTableAccessor.getTableRegions(server.getConnection(), tableName, true); } Integer pending = 0; for (HRegionInfo hri : hris) { String name = hri.getEncodedName(); // no lock concurrent access ok: sequential consistency respected. if (regionsToReopen.containsKey(name) || regionStates.isRegionInTransition(name)) { pending++; } } return new Pair<>(pending, hris.size()); } /** * Used by ServerShutdownHandler to make sure AssignmentManager has completed * the failover cleanup before re-assigning regions of dead servers. So that * when re-assignment happens, AssignmentManager has proper region states. */ public boolean isFailoverCleanupDone() { return failoverCleanupDone.get(); } /** * To avoid racing with AM, external entities may need to lock a region, * for example, when SSH checks what regions to skip re-assigning. */ public Lock acquireRegionLock(final String encodedName) { return locker.acquireLock(encodedName); } /** * Now, failover cleanup is completed. Notify server manager to * process queued up dead servers processing, if any. */ void failoverCleanupDone() { failoverCleanupDone.set(true); serverManager.processQueuedDeadServers(); } /** * Called on startup. * Figures whether a fresh cluster start of we are joining extant running cluster. * @throws IOException * @throws KeeperException * @throws InterruptedException * @throws CoordinatedStateException */ void joinCluster() throws IOException, KeeperException, InterruptedException, CoordinatedStateException { long startTime = System.currentTimeMillis(); // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are // synchronized. The presumption is that in this case it is safe since this // method is being played by a single thread on startup. // TODO: Regions that have a null location and are not in regionsInTransitions // need to be handled. // Scan hbase:meta to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) that some regions // were assigned to (according to the meta) Set<ServerName> deadServers = rebuildUserRegions(); // This method will assign all user regions if a clean server startup or // it will reconstruct master state and cleanup any leftovers from previous master process. boolean failover = processDeadServersAndRegionsInTransition(deadServers); LOG.info("Joined the cluster in " + (System.currentTimeMillis() - startTime) + "ms, failover=" + failover); } /** * Process all regions that are in transition in zookeeper and also * processes the list of dead servers. * Used by master joining an cluster. If we figure this is a clean cluster * startup, will assign all user regions. * @param deadServers Set of servers that are offline probably legitimately that were carrying * regions according to a scan of hbase:meta. Can be null. * @throws IOException * @throws InterruptedException */ boolean processDeadServersAndRegionsInTransition(final Set<ServerName> deadServers) throws KeeperException, IOException, InterruptedException, CoordinatedStateException { // TODO Needed? List<String> nodes = ZKUtil.listChildrenNoWatch(watcher, watcher.assignmentZNode); boolean failover = !serverManager.getDeadServers().isEmpty(); if (failover) { // This may not be a failover actually, especially if meta is on this master. if (LOG.isDebugEnabled()) { LOG.debug("Found dead servers out on cluster " + serverManager.getDeadServers()); } // Check if there are any regions on these servers failover = false; for (ServerName serverName : serverManager.getDeadServers().copyServerNames()) { if (regionStates.getRegionAssignments().values().contains(serverName)) { LOG.debug("Found regions on dead server: " + serverName); failover = true; break; } } } Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet(); if (!failover) { // If any one region except meta is assigned, it's a failover. for (Map.Entry<HRegionInfo, ServerName> en: regionStates.getRegionAssignments().entrySet()) { HRegionInfo hri = en.getKey(); if (!hri.isMetaTable() && onlineServers.contains(en.getValue())) { LOG.debug("Found region " + hri + " out on cluster"); failover = true; break; } } } if (!failover) { // If any region except meta is in transition on a live server, it's a failover. Set<RegionState> regionsInTransition = regionStates.getRegionsInTransition(); if (!regionsInTransition.isEmpty()) { for (RegionState regionState: regionsInTransition) { ServerName serverName = regionState.getServerName(); if (!regionState.getRegion().isMetaRegion() && serverName != null && onlineServers.contains(serverName)) { LOG.debug("Found " + regionState + " for region " + regionState.getRegion().getRegionNameAsString() + " for server " + serverName + "in RITs"); failover = true; break; } } } } if (!failover) { // If we get here, we have a full cluster restart. It is a failover only // if there are some WALs are not split yet. For meta WALs, they should have // been split already, if any. We can walk through those queued dead servers, // if they don't have any WALs, this restart should be considered as a clean one Set<ServerName> queuedDeadServers = serverManager.getRequeuedDeadServers().keySet(); if (!queuedDeadServers.isEmpty()) { Configuration conf = server.getConfiguration(); Path walRootDir = FSUtils.getWALRootDir(conf); FileSystem walFs = FSUtils.getWALFileSystem(conf); for (ServerName serverName: queuedDeadServers) { // In the case of a clean exit, the shutdown handler would have presplit any WALs and // removed empty directories. Path walDir = new Path(walRootDir, AbstractFSWALProvider.getWALDirectoryName(serverName.toString())); Path splitDir = walDir.suffix(AbstractFSWALProvider.SPLITTING_EXT); if (checkWals(walFs, walDir) || checkWals(walFs, splitDir)) { LOG.debug("Found queued dead server " + serverName); failover = true; break; } } if (!failover) { // We figured that it's not a failover, so no need to // work on these re-queued dead servers any more. LOG.info("AM figured that it's not a failover and cleaned up " + queuedDeadServers.size() + " queued dead servers"); serverManager.removeRequeuedDeadServers(); } } } Set<TableName> disabledOrDisablingOrEnabling = null; Map<HRegionInfo, ServerName> allRegions = null; if (!failover) { disabledOrDisablingOrEnabling = tableStateManager.getTablesInStates( TableState.State.DISABLED, TableState.State.DISABLING, TableState.State.ENABLING); // Clean re/start, mark all user regions closed before reassignment allRegions = regionStates.closeAllUserRegions( disabledOrDisablingOrEnabling); } // Now region states are restored regionStateStore.start(); if (failover) { if (deadServers != null && !deadServers.isEmpty()) { for (ServerName serverName: deadServers) { if (!serverManager.isServerDead(serverName)) { serverManager.expireServer(serverName); // Let SSH do region re-assign } } } processRegionsInTransition(regionStates.getRegionsInTransition()); } // Now we can safely claim failover cleanup completed and enable // ServerShutdownHandler for further processing. The nodes (below) // in transition, if any, are for regions not related to those // dead servers at all, and can be done in parallel to SSH. failoverCleanupDone(); if (!failover) { // Fresh cluster startup. LOG.info("Clean cluster startup. Don't reassign user regions"); assignAllUserRegions(allRegions); } else { LOG.info("Failover! Reassign user regions"); } // unassign replicas of the split parents and the merged regions // the daughter replicas are opened in assignAllUserRegions if it was // not already opened. for (HRegionInfo h : replicasToClose) { unassign(h); } replicasToClose.clear(); return failover; } private boolean checkWals(FileSystem fs, Path dir) throws IOException { if (!fs.exists(dir)) { LOG.debug(dir + " doesn't exist"); return false; } if (!fs.getFileStatus(dir).isDirectory()) { LOG.warn(dir + " is not a directory"); return false; } FileStatus[] files = FSUtils.listStatus(fs, dir); if (files == null || files.length == 0) { LOG.debug(dir + " has no files"); return false; } for (int i = 0; i < files.length; i++) { if (files[i].isFile() && files[i].getLen() > 0) { LOG.debug(dir + " has a non-empty file: " + files[i].getPath()); return true; } else if (files[i].isDirectory() && checkWals(fs, files[i].getPath())) { LOG.debug(dir + " is a directory and has a non-empty file: " + files[i].getPath()); return true; } } LOG.debug("Found 0 non-empty wal files for :" + dir); return false; } /** * When a region is closed, it should be removed from the regionsToReopen * @param hri HRegionInfo of the region which was closed */ public void removeClosedRegion(HRegionInfo hri) { if (regionsToReopen.remove(hri.getEncodedName()) != null) { LOG.debug("Removed region from reopening regions because it was closed"); } } void processFavoredNodesForDaughters(HRegionInfo parent, HRegionInfo regionA, HRegionInfo regionB) throws IOException { if (shouldAssignFavoredNodes(parent)) { List<ServerName> onlineServers = this.serverManager.getOnlineServersList(); ((FavoredNodesPromoter) this.balancer). generateFavoredNodesForDaughter(onlineServers, parent, regionA, regionB); } } void processFavoredNodesForMerge(HRegionInfo merged, HRegionInfo regionA, HRegionInfo regionB) throws IOException { if (shouldAssignFavoredNodes(merged)) { ((FavoredNodesPromoter)this.balancer). generateFavoredNodesForMergedRegion(merged, regionA, regionB); } } /* * Favored nodes should be applied only when FavoredNodes balancer is configured and the region * belongs to a non-system table. */ private boolean shouldAssignFavoredNodes(HRegionInfo region) { return this.shouldAssignRegionsWithFavoredNodes && FavoredNodesManager.isFavoredNodeApplicable(region); } /** * Marks the region as online. Removes it from regions in transition and * updates the in-memory assignment information. * <p> * Used when a region has been successfully opened on a region server. * @param regionInfo * @param sn */ void regionOnline(HRegionInfo regionInfo, ServerName sn) { regionOnline(regionInfo, sn, HConstants.NO_SEQNUM); } void regionOnline(HRegionInfo regionInfo, ServerName sn, long openSeqNum) { numRegionsOpened.incrementAndGet(); regionStates.regionOnline(regionInfo, sn, openSeqNum); // Remove plan if one. clearRegionPlan(regionInfo); balancer.regionOnline(regionInfo, sn); // Tell our listeners that a region was opened sendRegionOpenedNotification(regionInfo, sn); } /** * Marks the region as offline. Removes it from regions in transition and * removes in-memory assignment information. * <p> * Used when a region has been closed and should remain closed. * @param regionInfo */ public void regionOffline(final HRegionInfo regionInfo) { regionOffline(regionInfo, null); } public void offlineDisabledRegion(HRegionInfo regionInfo) { replicasToClose.remove(regionInfo); regionOffline(regionInfo); } // Assignment methods /** * Assigns the specified region. * <p> * If a RegionPlan is available with a valid destination then it will be used * to determine what server region is assigned to. If no RegionPlan is * available, region will be assigned to a random available server. * <p> * Updates the RegionState and sends the OPEN RPC. * <p> * This will only succeed if the region is in transition and in a CLOSED or * OFFLINE state or not in transition, and of course, the * chosen server is up and running (It may have just crashed!). * * @param region server to be assigned */ public void assign(HRegionInfo region) { assign(region, false); } /** * Use care with forceNewPlan. It could cause double assignment. */ public void assign(HRegionInfo region, boolean forceNewPlan) { if (isDisabledorDisablingRegionInRIT(region)) { return; } String encodedName = region.getEncodedName(); Lock lock = locker.acquireLock(encodedName); try { RegionState state = forceRegionStateToOffline(region, forceNewPlan); if (state != null) { if (regionStates.wasRegionOnDeadServer(encodedName)) { LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet"); return; } assign(state, forceNewPlan); } } finally { lock.unlock(); } } /** * Bulk assign regions to <code>destination</code>. * @param destination * @param regions Regions to assign. * @return true if successful */ boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException { long startTime = EnvironmentEdgeManager.currentTime(); try { int regionCount = regions.size(); if (regionCount == 0) { return true; } LOG.info("Assigning " + regionCount + " region(s) to " + destination.toString()); Set<String> encodedNames = new HashSet<>(regionCount); for (HRegionInfo region : regions) { encodedNames.add(region.getEncodedName()); } List<HRegionInfo> failedToOpenRegions = new ArrayList<>(); Map<String, Lock> locks = locker.acquireLocks(encodedNames); try { Map<String, RegionPlan> plans = new HashMap<>(regionCount); List<RegionState> states = new ArrayList<>(regionCount); for (HRegionInfo region : regions) { String encodedName = region.getEncodedName(); if (!isDisabledorDisablingRegionInRIT(region)) { RegionState state = forceRegionStateToOffline(region, false); boolean onDeadServer = false; if (state != null) { if (regionStates.wasRegionOnDeadServer(encodedName)) { LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet"); onDeadServer = true; } else { RegionPlan plan = new RegionPlan(region, state.getServerName(), destination); plans.put(encodedName, plan); states.add(state); continue; } } // Reassign if the region wasn't on a dead server if (!onDeadServer) { LOG.info("failed to force region state to offline, " + "will reassign later: " + region); failedToOpenRegions.add(region); // assign individually later } } // Release the lock, this region is excluded from bulk assign because // we can't update its state, or set its znode to offline. Lock lock = locks.remove(encodedName); lock.unlock(); } if (server.isStopped()) { return false; } // Add region plans, so we can updateTimers when one region is opened so // that unnecessary timeout on RIT is reduced. this.addPlans(plans); List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size()); for (RegionState state: states) { HRegionInfo region = state.getRegion(); regionStates.updateRegionState( region, State.PENDING_OPEN, destination); List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST; if (shouldAssignFavoredNodes(region)) { favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region); } regionOpenInfos.add(new Pair<>(region, favoredNodes)); } // Move on to open regions. try { // Send OPEN RPC. If it fails on a IOE or RemoteException, // regions will be assigned individually. Configuration conf = server.getConfiguration(); long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000); for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) { try { List<RegionOpeningState> regionOpeningStateList = serverManager .sendRegionOpen(destination, regionOpenInfos); for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) { RegionOpeningState openingState = regionOpeningStateList.get(k); if (openingState != RegionOpeningState.OPENED) { HRegionInfo region = regionOpenInfos.get(k).getFirst(); LOG.info("Got opening state " + openingState + ", will reassign later: " + region); // Failed opening this region, reassign it later forceRegionStateToOffline(region, true); failedToOpenRegions.add(region); } } break; } catch (IOException e) { if (e instanceof RemoteException) { e = ((RemoteException)e).unwrapRemoteException(); } if (e instanceof RegionServerStoppedException) { LOG.warn("The region server was shut down, ", e); // No need to retry, the region server is a goner. return false; } else if (e instanceof ServerNotRunningYetException) { long now = System.currentTimeMillis(); if (now < maxWaitTime) { if (LOG.isDebugEnabled()) { LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e); } Thread.sleep(100); i--; // reset the try count continue; } } else if (e instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(destination)) { // In case socket is timed out and the region server is still online, // the openRegion RPC could have been accepted by the server and // just the response didn't go through. So we will retry to // open the region on the same server. if (LOG.isDebugEnabled()) { LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e); } // wait and reset the re-try count, server might be just busy. Thread.sleep(100); i--; continue; } else if (e instanceof FailedServerException && i < maximumAttempts) { // In case the server is in the failed server list, no point to // retry too soon. Retry after the failed_server_expiry time long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); if (LOG.isDebugEnabled()) { LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e); } Thread.sleep(sleepTime); continue; } throw e; } } } catch (IOException e) { // Can be a socket timeout, EOF, NoRouteToHost, etc LOG.info("Unable to communicate with " + destination + " in order to assign regions, ", e); for (RegionState state: states) { HRegionInfo region = state.getRegion(); forceRegionStateToOffline(region, true); } return false; } } finally { for (Lock lock : locks.values()) { lock.unlock(); } } if (!failedToOpenRegions.isEmpty()) { for (HRegionInfo region : failedToOpenRegions) { if (!regionStates.isRegionOnline(region)) { invokeAssign(region); } } } // wait for assignment completion ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size()); for (HRegionInfo region: regions) { if (!region.getTable().isSystemTable()) { userRegionSet.add(region); } } if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) { LOG.debug("some user regions are still in transition: " + userRegionSet); } LOG.debug("Bulk assigning done for " + destination); return true; } finally { metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime); } } /** * Send CLOSE RPC if the server is online, otherwise, offline the region. * * The RPC will be sent only to the region sever found in the region state * if it is passed in, otherwise, to the src server specified. If region * state is not specified, we don't update region state at all, instead * we just send the RPC call. This is useful for some cleanup without * messing around the region states (see handleRegion, on region opened * on an unexpected server scenario, for an example) */ private void unassign(final HRegionInfo region, final ServerName server, final ServerName dest) { for (int i = 1; i <= this.maximumAttempts; i++) { if (this.server.isStopped() || this.server.isAborted()) { LOG.debug("Server stopped/aborted; skipping unassign of " + region); return; } if (!serverManager.isServerOnline(server)) { LOG.debug("Offline " + region.getRegionNameAsString() + ", no need to unassign since it's on a dead server: " + server); regionStates.updateRegionState(region, State.OFFLINE); return; } try { // Send CLOSE RPC if (serverManager.sendRegionClose(server, region, dest)) { LOG.debug("Sent CLOSE to " + server + " for region " + region.getRegionNameAsString()); return; } // This never happens. Currently regionserver close always return true. // Todo; this can now happen (0.96) if there is an exception in a coprocessor LOG.warn("Server " + server + " region CLOSE RPC returned false for " + region.getRegionNameAsString()); } catch (Throwable t) { long sleepTime = 0; Configuration conf = this.server.getConfiguration(); if (t instanceof RemoteException) { t = ((RemoteException)t).unwrapRemoteException(); } if (t instanceof RegionServerAbortedException || t instanceof RegionServerStoppedException || t instanceof ServerNotRunningYetException) { // RS is aborting, we cannot offline the region since the region may need to do WAL // recovery. Until we see the RS expiration, we should retry. sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); } else if (t instanceof NotServingRegionException) { LOG.debug("Offline " + region.getRegionNameAsString() + ", it's not any more on " + server, t); regionStates.updateRegionState(region, State.OFFLINE); return; } else if (t instanceof FailedServerException && i < maximumAttempts) { // In case the server is in the failed server list, no point to // retry too soon. Retry after the failed_server_expiry time sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); if (LOG.isDebugEnabled()) { LOG.debug(server + " is on failed server list; waiting " + sleepTime + "ms", t); } } try { if (sleepTime > 0) { Thread.sleep(sleepTime); } } catch (InterruptedException ie) { LOG.warn("Interrupted unassign " + region.getRegionNameAsString(), ie); Thread.currentThread().interrupt(); regionStates.updateRegionState(region, State.FAILED_CLOSE); return; } LOG.info("Server " + server + " returned " + t + " for " + region.getRegionNameAsString() + ", try=" + i + " of " + this.maximumAttempts, t); } } // Run out of attempts regionStates.updateRegionState(region, State.FAILED_CLOSE); } /** * Set region to OFFLINE unless it is opening and forceNewPlan is false. */ private RegionState forceRegionStateToOffline( final HRegionInfo region, final boolean forceNewPlan) { RegionState state = regionStates.getRegionState(region); if (state == null) { LOG.warn("Assigning but not in region states: " + region); state = regionStates.createRegionState(region); } if (forceNewPlan && LOG.isDebugEnabled()) { LOG.debug("Force region state offline " + state); } switch (state.getState()) { case OPEN: case OPENING: case PENDING_OPEN: case CLOSING: case PENDING_CLOSE: if (!forceNewPlan) { LOG.debug("Skip assigning " + region + ", it is already " + state); return null; } case FAILED_CLOSE: case FAILED_OPEN: regionStates.updateRegionState(region, State.PENDING_CLOSE); unassign(region, state.getServerName(), null); state = regionStates.getRegionState(region); if (!state.isOffline() && !state.isClosed()) { // If the region isn't offline, we can't re-assign // it now. It will be assigned automatically after // the regionserver reports it's closed. return null; } case OFFLINE: case CLOSED: break; default: LOG.error("Trying to assign region " + region + ", which is " + state); return null; } return state; } /** * Caller must hold lock on the passed <code>state</code> object. * @param state * @param forceNewPlan */ private void assign(RegionState state, boolean forceNewPlan) { long startTime = EnvironmentEdgeManager.currentTime(); try { Configuration conf = server.getConfiguration(); RegionPlan plan = null; long maxWaitTime = -1; HRegionInfo region = state.getRegion(); Throwable previousException = null; for (int i = 1; i <= maximumAttempts; i++) { if (server.isStopped() || server.isAborted()) { LOG.info("Skip assigning " + region.getRegionNameAsString() + ", the server is stopped/aborted"); return; } if (plan == null) { // Get a server for the region at first try { plan = getRegionPlan(region, forceNewPlan); } catch (HBaseIOException e) { LOG.warn("Failed to get region plan", e); } } if (plan == null) { LOG.warn("Unable to determine a plan to assign " + region); // For meta region, we have to keep retrying until succeeding if (region.isMetaRegion()) { if (i == maximumAttempts) { i = 0; // re-set attempt count to 0 for at least 1 retry LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region + " after maximumAttempts (" + this.maximumAttempts + "). Reset attempts count and continue retrying."); } waitForRetryingMetaAssignment(); continue; } regionStates.updateRegionState(region, State.FAILED_OPEN); return; } LOG.info("Assigning " + region.getRegionNameAsString() + " to " + plan.getDestination()); // Transition RegionState to PENDING_OPEN regionStates.updateRegionState(region, State.PENDING_OPEN, plan.getDestination()); boolean needNewPlan = false; final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() + " to " + plan.getDestination(); try { List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST; if (shouldAssignFavoredNodes(region)) { favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region); } serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes); return; // we're done } catch (Throwable t) { if (t instanceof RemoteException) { t = ((RemoteException) t).unwrapRemoteException(); } previousException = t; // Should we wait a little before retrying? If the server is starting it's yes. boolean hold = (t instanceof ServerNotRunningYetException); // In case socket is timed out and the region server is still online, // the openRegion RPC could have been accepted by the server and // just the response didn't go through. So we will retry to // open the region on the same server. boolean retry = !hold && (t instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(plan.getDestination())); if (hold) { LOG.warn(assignMsg + ", waiting a little before trying on the same region server " + "try=" + i + " of " + this.maximumAttempts, t); if (maxWaitTime < 0) { maxWaitTime = EnvironmentEdgeManager.currentTime() + this.server.getConfiguration().getLong( "hbase.regionserver.rpc.startup.waittime", 60000); } try { long now = EnvironmentEdgeManager.currentTime(); if (now < maxWaitTime) { if (LOG.isDebugEnabled()) { LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", t); } Thread.sleep(100); i--; // reset the try count } else { LOG.debug("Server is not up for a while; try a new one", t); needNewPlan = true; } } catch (InterruptedException ie) { LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie); regionStates.updateRegionState(region, State.FAILED_OPEN); Thread.currentThread().interrupt(); return; } } else if (retry) { i--; // we want to retry as many times as needed as long as the RS is not dead. if (LOG.isDebugEnabled()) { LOG.debug(assignMsg + ", trying to assign to the same region server due ", t); } } else { needNewPlan = true; LOG.warn(assignMsg + ", trying to assign elsewhere instead;" + " try=" + i + " of " + this.maximumAttempts, t); } } if (i == this.maximumAttempts) { // For meta region, we have to keep retrying until succeeding if (region.isMetaRegion()) { i = 0; // re-set attempt count to 0 for at least 1 retry LOG.warn(assignMsg + ", trying to assign a hbase:meta region reached to maximumAttempts (" + this.maximumAttempts + "). Reset attempt counts and continue retrying."); waitForRetryingMetaAssignment(); } else { // Don't reset the region state or get a new plan any more. // This is the last try. continue; } } // If region opened on destination of present plan, reassigning to new // RS may cause double assignments. In case of RegionAlreadyInTransitionException // reassigning to same RS. if (needNewPlan) { // Force a new plan and reassign. Will return null if no servers. // The new plan could be the same as the existing plan since we don't // exclude the server of the original plan, which should not be // excluded since it could be the only server up now. RegionPlan newPlan = null; try { newPlan = getRegionPlan(region, true); } catch (HBaseIOException e) { LOG.warn("Failed to get region plan", e); } if (newPlan == null) { regionStates.updateRegionState(region, State.FAILED_OPEN); LOG.warn("Unable to find a viable location to assign region " + region.getRegionNameAsString()); return; } if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) { // Clean out plan we failed execute and one that doesn't look like it'll // succeed anyways; we need a new plan! // Transition back to OFFLINE regionStates.updateRegionState(region, State.OFFLINE); plan = newPlan; } else if(plan.getDestination().equals(newPlan.getDestination()) && previousException instanceof FailedServerException) { try { LOG.info("Trying to re-assign " + region.getRegionNameAsString() + " to the same failed server."); Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT)); } catch (InterruptedException ie) { LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie); regionStates.updateRegionState(region, State.FAILED_OPEN); Thread.currentThread().interrupt(); return; } } } } // Run out of attempts regionStates.updateRegionState(region, State.FAILED_OPEN); } finally { metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTime() - startTime); } } private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) { if (this.tableStateManager.isTableState(region.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || replicasToClose.contains(region)) { LOG.info("Table " + region.getTable() + " is disabled or disabling;" + " skipping assign of " + region.getRegionNameAsString()); offlineDisabledRegion(region); return true; } return false; } /** * @param region the region to assign * @param forceNewPlan If true, then if an existing plan exists, a new plan * will be generated. * @return Plan for passed <code>region</code> (If none currently, it creates one or * if no servers to assign, it returns null). */ private RegionPlan getRegionPlan(final HRegionInfo region, final boolean forceNewPlan) throws HBaseIOException { // Pickup existing plan or make a new one final String encodedName = region.getEncodedName(); final List<ServerName> destServers = serverManager.createDestinationServersList(); if (destServers.isEmpty()){ LOG.warn("Can't move " + encodedName + ", there is no destination server available."); return null; } RegionPlan randomPlan = null; boolean newPlan = false; RegionPlan existingPlan; synchronized (this.regionPlans) { existingPlan = this.regionPlans.get(encodedName); if (existingPlan != null && existingPlan.getDestination() != null) { LOG.debug("Found an existing plan for " + region.getRegionNameAsString() + " destination server is " + existingPlan.getDestination() + " accepted as a dest server = " + destServers.contains(existingPlan.getDestination())); } if (forceNewPlan || existingPlan == null || existingPlan.getDestination() == null || !destServers.contains(existingPlan.getDestination())) { newPlan = true; try { randomPlan = new RegionPlan(region, null, balancer.randomAssignment(region, destServers)); } catch (IOException ex) { LOG.warn("Failed to create new plan.",ex); return null; } this.regionPlans.put(encodedName, randomPlan); } } if (newPlan) { if (randomPlan.getDestination() == null) { LOG.warn("Can't find a destination for " + encodedName); return null; } if (LOG.isDebugEnabled()) { LOG.debug("No previous transition plan found (or ignoring " + "an existing plan) for " + region.getRegionNameAsString() + "; generated random plan=" + randomPlan + "; " + destServers.size() + " (online=" + serverManager.getOnlineServers().size() + ") available servers, forceNewPlan=" + forceNewPlan); } return randomPlan; } if (LOG.isDebugEnabled()) { LOG.debug("Using pre-existing plan for " + region.getRegionNameAsString() + "; plan=" + existingPlan); } return existingPlan; } /** * Wait for some time before retrying meta table region assignment */ private void waitForRetryingMetaAssignment() { try { Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment); } catch (InterruptedException e) { LOG.error("Got exception while waiting for hbase:meta assignment"); Thread.currentThread().interrupt(); } } /** * Unassigns the specified region. * <p> * Updates the RegionState and sends the CLOSE RPC unless region is being * split by regionserver; then the unassign fails (silently) because we * presume the region being unassigned no longer exists (its been split out * of existence). TODO: What to do if split fails and is rolled back and * parent is revivified? * <p> * If a RegionPlan is already set, it will remain. * * @param region server to be unassigned */ public void unassign(HRegionInfo region) { unassign(region, null); } /** * Unassigns the specified region. * <p> * Updates the RegionState and sends the CLOSE RPC unless region is being * split by regionserver; then the unassign fails (silently) because we * presume the region being unassigned no longer exists (its been split out * of existence). TODO: What to do if split fails and is rolled back and * parent is revivified? * <p> * If a RegionPlan is already set, it will remain. * * @param region server to be unassigned * @param dest the destination server of the region */ public void unassign(HRegionInfo region, ServerName dest) { // TODO: Method needs refactoring. Ugly buried returns throughout. Beware! LOG.debug("Starting unassign of " + region.getRegionNameAsString() + " (offlining), current state: " + regionStates.getRegionState(region)); String encodedName = region.getEncodedName(); // Grab the state of this region and synchronize on it // We need a lock here as we're going to do a put later and we don't want multiple states // creation ReentrantLock lock = locker.acquireLock(encodedName); RegionState state = regionStates.getRegionTransitionState(encodedName); try { if (state == null || state.isFailedClose()) { if (state == null) { // Region is not in transition. // We can unassign it only if it's not SPLIT/MERGED. state = regionStates.getRegionState(encodedName); if (state != null && state.isUnassignable()) { LOG.info("Attempting to unassign " + state + ", ignored"); // Offline region will be reassigned below return; } if (state == null || state.getServerName() == null) { // We don't know where the region is, offline it. // No need to send CLOSE RPC LOG.warn("Attempting to unassign a region not in RegionStates " + region.getRegionNameAsString() + ", offlined"); regionOffline(region); return; } } state = regionStates.updateRegionState( region, State.PENDING_CLOSE); } else if (state.isFailedOpen()) { // The region is not open yet regionOffline(region); return; } else { LOG.debug("Attempting to unassign " + region.getRegionNameAsString() + " but it is " + "already in transition (" + state.getState()); return; } unassign(region, state.getServerName(), dest); } finally { lock.unlock(); // Region is expected to be reassigned afterwards if (!replicasToClose.contains(region) && regionStates.isRegionInState(region, State.OFFLINE)) { assign(region); } } } /** * Used by unit tests. Return the number of regions opened so far in the life * of the master. Increases by one every time the master opens a region * @return the counter value of the number of regions opened so far */ public int getNumRegionsOpened() { return numRegionsOpened.get(); } /** * Waits until the specified region has completed assignment. * <p> * If the region is already assigned, returns immediately. Otherwise, method * blocks until the region is assigned. * @param regionInfo region to wait on assignment for * @return true if the region is assigned false otherwise. * @throws InterruptedException */ public boolean waitForAssignment(HRegionInfo regionInfo) throws InterruptedException { ArrayList<HRegionInfo> regionSet = new ArrayList<>(1); regionSet.add(regionInfo); return waitForAssignment(regionSet, true, Long.MAX_VALUE); } /** * Waits until the specified region has completed assignment, or the deadline is reached. */ protected boolean waitForAssignment(final Collection<HRegionInfo> regionSet, final boolean waitTillAllAssigned, final int reassigningRegions, final long minEndTime) throws InterruptedException { long deadline = minEndTime + bulkPerRegionOpenTimeGuesstimate * (reassigningRegions + 1); if (deadline < 0) { // Overflow deadline = Long.MAX_VALUE; // wait forever } return waitForAssignment(regionSet, waitTillAllAssigned, deadline); } /** * Waits until the specified region has completed assignment, or the deadline is reached. * @param regionSet set of region to wait on. the set is modified and the assigned regions removed * @param waitTillAllAssigned true if we should wait all the regions to be assigned * @param deadline the timestamp after which the wait is aborted * @return true if all the regions are assigned false otherwise. * @throws InterruptedException */ protected boolean waitForAssignment(final Collection<HRegionInfo> regionSet, final boolean waitTillAllAssigned, final long deadline) throws InterruptedException { // We're not synchronizing on regionsInTransition now because we don't use any iterator. while (!regionSet.isEmpty() && !server.isStopped() && deadline > System.currentTimeMillis()) { int failedOpenCount = 0; Iterator<HRegionInfo> regionInfoIterator = regionSet.iterator(); while (regionInfoIterator.hasNext()) { HRegionInfo hri = regionInfoIterator.next(); if (regionStates.isRegionOnline(hri) || regionStates.isRegionInState(hri, State.SPLITTING, State.SPLIT, State.MERGING, State.MERGED)) { regionInfoIterator.remove(); } else if (regionStates.isRegionInState(hri, State.FAILED_OPEN)) { failedOpenCount++; } } if (!waitTillAllAssigned) { // No need to wait, let assignment going on asynchronously break; } if (!regionSet.isEmpty()) { if (failedOpenCount == regionSet.size()) { // all the regions we are waiting had an error on open. break; } regionStates.waitForUpdate(100); } } return regionSet.isEmpty(); } /** * Assigns the hbase:meta region or a replica. * <p> * Assumes that hbase:meta is currently closed and is not being actively served by * any RegionServer. * @param hri TODO */ public void assignMeta(HRegionInfo hri) throws KeeperException { regionStates.updateRegionState(hri, State.OFFLINE); assign(hri); } /** * Assigns specified regions retaining assignments, if any. * <p> * This is a synchronous call and will return once every region has been * assigned. If anything fails, an exception is thrown * @throws InterruptedException * @throws IOException */ public void assign(Map<HRegionInfo, ServerName> regions) throws IOException, InterruptedException { if (regions == null || regions.isEmpty()) { return; } List<ServerName> servers = serverManager.createDestinationServersList(); if (servers == null || servers.isEmpty()) { throw new IOException("Found no destination server to assign region(s)"); } // Reuse existing assignment info Map<ServerName, List<HRegionInfo>> bulkPlan = balancer.retainAssignment(regions, servers); if (bulkPlan == null) { throw new IOException("Unable to determine a plan to assign region(s)"); } processBogusAssignments(bulkPlan); assign(regions.size(), servers.size(), "retainAssignment=true", bulkPlan); } /** * Assigns specified regions round robin, if any. * <p> * This is a synchronous call and will return once every region has been * assigned. If anything fails, an exception is thrown * @throws InterruptedException * @throws IOException */ public void assign(List<HRegionInfo> regions) throws IOException, InterruptedException { if (regions == null || regions.isEmpty()) { return; } List<ServerName> servers = serverManager.createDestinationServersList(); if (servers == null || servers.isEmpty()) { throw new IOException("Found no destination server to assign region(s)"); } // Generate a round-robin bulk assignment plan Map<ServerName, List<HRegionInfo>> bulkPlan = balancer.roundRobinAssignment(regions, servers); if (bulkPlan == null) { throw new IOException("Unable to determine a plan to assign region(s)"); } processBogusAssignments(bulkPlan); assign(regions.size(), servers.size(), "round-robin=true", bulkPlan); } private void assign(int regions, int totalServers, String message, Map<ServerName, List<HRegionInfo>> bulkPlan) throws InterruptedException, IOException { int servers = bulkPlan.size(); if (servers == 1 || (regions < bulkAssignThresholdRegions && servers < bulkAssignThresholdServers)) { // Not use bulk assignment. This could be more efficient in small // cluster, especially mini cluster for testing, so that tests won't time out if (LOG.isTraceEnabled()) { LOG.trace("Not using bulk assignment since we are assigning only " + regions + " region(s) to " + servers + " server(s)"); } // invoke assignment (async) ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions); for (Map.Entry<ServerName, List<HRegionInfo>> plan: bulkPlan.entrySet()) { if (!assign(plan.getKey(), plan.getValue()) && !server.isStopped()) { for (HRegionInfo region: plan.getValue()) { if (!regionStates.isRegionOnline(region)) { invokeAssign(region); if (!region.getTable().isSystemTable()) { userRegionSet.add(region); } } } } } // wait for assignment completion if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) { LOG.debug("some user regions are still in transition: " + userRegionSet); } } else { LOG.info("Bulk assigning " + regions + " region(s) across " + totalServers + " server(s), " + message); // Use fixed count thread pool assigning. BulkAssigner ba = new GeneralBulkAssigner( this.server, bulkPlan, this, bulkAssignWaitTillAllAssigned); ba.bulkAssign(); LOG.info("Bulk assigning done"); } } /** * Assigns all user regions, if any exist. Used during cluster startup. * <p> * This is a synchronous call and will return once every region has been * assigned. If anything fails, an exception is thrown and the cluster * should be shutdown. * @throws InterruptedException * @throws IOException */ private void assignAllUserRegions(Map<HRegionInfo, ServerName> allRegions) throws IOException, InterruptedException { if (allRegions == null || allRegions.isEmpty()) return; // Determine what type of assignment to do on startup boolean retainAssignment = server.getConfiguration(). getBoolean("hbase.master.startup.retainassign", true); Set<HRegionInfo> regionsFromMetaScan = allRegions.keySet(); if (retainAssignment) { assign(allRegions); } else { List<HRegionInfo> regions = new ArrayList<>(regionsFromMetaScan); assign(regions); } for (HRegionInfo hri : regionsFromMetaScan) { TableName tableName = hri.getTable(); if (!tableStateManager.isTableState(tableName, TableState.State.ENABLED)) { setEnabledTable(tableName); } } // assign all the replicas that were not recorded in the meta assign(replicaRegionsNotRecordedInMeta(regionsFromMetaScan, (MasterServices)server)); } /** * Get number of replicas of a table */ private static int getNumReplicas(MasterServices master, TableName table) { int numReplica = 1; try { HTableDescriptor htd = master.getTableDescriptors().get(table); if (htd == null) { LOG.warn("master can not get TableDescriptor from table '" + table); } else { numReplica = htd.getRegionReplication(); } } catch (IOException e){ LOG.warn("Couldn't get the replication attribute of the table " + table + " due to " + e.getMessage()); } return numReplica; } /** * Get a list of replica regions that are: * not recorded in meta yet. We might not have recorded the locations * for the replicas since the replicas may not have been online yet, master restarted * in the middle of assigning, ZK erased, etc. * @param regionsRecordedInMeta the list of regions we know are recorded in meta * either as a default, or, as the location of a replica * @param master * @return list of replica regions * @throws IOException */ public static List<HRegionInfo> replicaRegionsNotRecordedInMeta( Set<HRegionInfo> regionsRecordedInMeta, MasterServices master)throws IOException { List<HRegionInfo> regionsNotRecordedInMeta = new ArrayList<>(); for (HRegionInfo hri : regionsRecordedInMeta) { TableName table = hri.getTable(); if(master.getTableDescriptors().get(table) == null) continue; int desiredRegionReplication = getNumReplicas(master, table); for (int i = 0; i < desiredRegionReplication; i++) { HRegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hri, i); if (regionsRecordedInMeta.contains(replica)) continue; regionsNotRecordedInMeta.add(replica); } } return regionsNotRecordedInMeta; } /** * Rebuild the list of user regions and assignment information. * Updates regionstates with findings as we go through list of regions. * @return set of servers not online that hosted some regions according to a scan of hbase:meta * @throws IOException */ Set<ServerName> rebuildUserRegions() throws IOException, KeeperException { Set<TableName> disabledOrEnablingTables = tableStateManager.getTablesInStates( TableState.State.DISABLED, TableState.State.ENABLING); Set<TableName> disabledOrDisablingOrEnabling = tableStateManager.getTablesInStates( TableState.State.DISABLED, TableState.State.DISABLING, TableState.State.ENABLING); // Region assignment from META List<Result> results = MetaTableAccessor.fullScanRegions(server.getConnection()); // Get any new but slow to checkin region server that joined the cluster Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet(); // Set of offline servers to be returned Set<ServerName> offlineServers = new HashSet<>(); // Iterate regions in META for (Result result : results) { if (result == null && LOG.isDebugEnabled()){ LOG.debug("null result from meta - ignoring but this is strange."); continue; } // keep a track of replicas to close. These were the replicas of the originally // unmerged regions. The master might have closed them before but it mightn't // maybe because it crashed. PairOfSameType<HRegionInfo> p = MetaTableAccessor.getMergeRegions(result); if (p.getFirst() != null && p.getSecond() != null) { int numReplicas = getNumReplicas(server, p.getFirst().getTable()); for (HRegionInfo merge : p) { for (int i = 1; i < numReplicas; i++) { replicasToClose.add(RegionReplicaUtil.getRegionInfoForReplica(merge, i)); } } } RegionLocations rl = MetaTableAccessor.getRegionLocations(result); if (rl == null) { continue; } HRegionLocation[] locations = rl.getRegionLocations(); if (locations == null) { continue; } for (HRegionLocation hrl : locations) { if (hrl == null) continue; HRegionInfo regionInfo = hrl.getRegionInfo(); if (regionInfo == null) continue; int replicaId = regionInfo.getReplicaId(); State state = RegionStateStore.getRegionState(result, replicaId); // keep a track of replicas to close. These were the replicas of the split parents // from the previous life of the master. The master should have closed them before // but it couldn't maybe because it crashed if (replicaId == 0 && state.equals(State.SPLIT)) { for (HRegionLocation h : locations) { replicasToClose.add(h.getRegionInfo()); } } ServerName lastHost = hrl.getServerName(); ServerName regionLocation = RegionStateStore.getRegionServer(result, replicaId); regionStates.createRegionState(regionInfo, state, regionLocation, lastHost); if (!regionStates.isRegionInState(regionInfo, State.OPEN)) { // Region is not open (either offline or in transition), skip continue; } TableName tableName = regionInfo.getTable(); if (!onlineServers.contains(regionLocation)) { // Region is located on a server that isn't online offlineServers.add(regionLocation); } else if (!disabledOrEnablingTables.contains(tableName)) { // Region is being served and on an active server // add only if region not in disabled or enabling table regionStates.regionOnline(regionInfo, regionLocation); balancer.regionOnline(regionInfo, regionLocation); } // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts if (!disabledOrDisablingOrEnabling.contains(tableName) && !getTableStateManager().isTableState(tableName, TableState.State.ENABLED)) { setEnabledTable(tableName); } } } return offlineServers; } /** * Processes list of regions in transition at startup */ void processRegionsInTransition(Collection<RegionState> regionsInTransition) { // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions // in case the RPC call is not sent out yet before the master was shut down // since we update the state before we send the RPC call. We can't update // the state after the RPC call. Otherwise, we don't know what's happened // to the region if the master dies right after the RPC call is out. for (RegionState regionState: regionsInTransition) { LOG.info("Processing " + regionState); ServerName serverName = regionState.getServerName(); // Server could be null in case of FAILED_OPEN when master cannot find a region plan. In that // case, try assigning it here. if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) { LOG.info("Server " + serverName + " isn't online. SSH will handle this"); continue; // SSH will handle it } HRegionInfo regionInfo = regionState.getRegion(); RegionState.State state = regionState.getState(); switch (state) { case CLOSED: invokeAssign(regionState.getRegion()); break; case PENDING_OPEN: retrySendRegionOpen(regionState); break; case PENDING_CLOSE: retrySendRegionClose(regionState); break; case FAILED_CLOSE: case FAILED_OPEN: invokeUnAssign(regionInfo); break; default: // No process for other states break; } } } /** * At master failover, for pending_open region, make sure * sendRegionOpen RPC call is sent to the target regionserver */ private void retrySendRegionOpen(final RegionState regionState) { this.executorService.submit( new EventHandler(server, EventType.M_MASTER_RECOVERY) { @Override public void process() throws IOException { HRegionInfo hri = regionState.getRegion(); ServerName serverName = regionState.getServerName(); ReentrantLock lock = locker.acquireLock(hri.getEncodedName()); try { for (int i = 1; i <= maximumAttempts; i++) { if (!serverManager.isServerOnline(serverName) || server.isStopped() || server.isAborted()) { return; // No need any more } try { if (!regionState.equals(regionStates.getRegionState(hri))) { return; // Region is not in the expected state any more } List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST; if (shouldAssignFavoredNodes(hri)) { FavoredNodesManager fnm = ((MasterServices)server).getFavoredNodesManager(); favoredNodes = fnm.getFavoredNodesWithDNPort(hri); } serverManager.sendRegionOpen(serverName, hri, favoredNodes); return; // we're done } catch (Throwable t) { if (t instanceof RemoteException) { t = ((RemoteException) t).unwrapRemoteException(); } if (t instanceof FailedServerException && i < maximumAttempts) { // In case the server is in the failed server list, no point to // retry too soon. Retry after the failed_server_expiry time try { Configuration conf = this.server.getConfiguration(); long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); if (LOG.isDebugEnabled()) { LOG.debug(serverName + " is on failed server list; waiting " + sleepTime + "ms", t); } Thread.sleep(sleepTime); continue; } catch (InterruptedException ie) { LOG.warn("Failed to assign " + hri.getRegionNameAsString() + " since interrupted", ie); regionStates.updateRegionState(hri, State.FAILED_OPEN); Thread.currentThread().interrupt(); return; } } if (serverManager.isServerOnline(serverName) && t instanceof java.net.SocketTimeoutException) { i--; // reset the try count } else { LOG.info("Got exception in retrying sendRegionOpen for " + regionState + "; try=" + i + " of " + maximumAttempts, t); } Threads.sleep(100); } } // Run out of attempts regionStates.updateRegionState(hri, State.FAILED_OPEN); } finally { lock.unlock(); } } }); } /** * At master failover, for pending_close region, make sure * sendRegionClose RPC call is sent to the target regionserver */ private void retrySendRegionClose(final RegionState regionState) { this.executorService.submit( new EventHandler(server, EventType.M_MASTER_RECOVERY) { @Override public void process() throws IOException { HRegionInfo hri = regionState.getRegion(); ServerName serverName = regionState.getServerName(); ReentrantLock lock = locker.acquireLock(hri.getEncodedName()); try { for (int i = 1; i <= maximumAttempts; i++) { if (!serverManager.isServerOnline(serverName) || server.isStopped() || server.isAborted()) { return; // No need any more } try { if (!regionState.equals(regionStates.getRegionState(hri))) { return; // Region is not in the expected state any more } serverManager.sendRegionClose(serverName, hri, null); return; // Done. } catch (Throwable t) { if (t instanceof RemoteException) { t = ((RemoteException) t).unwrapRemoteException(); } if (t instanceof FailedServerException && i < maximumAttempts) { // In case the server is in the failed server list, no point to // retry too soon. Retry after the failed_server_expiry time try { Configuration conf = this.server.getConfiguration(); long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); if (LOG.isDebugEnabled()) { LOG.debug(serverName + " is on failed server list; waiting " + sleepTime + "ms", t); } Thread.sleep(sleepTime); continue; } catch (InterruptedException ie) { LOG.warn("Failed to unassign " + hri.getRegionNameAsString() + " since interrupted", ie); regionStates.updateRegionState(hri, RegionState.State.FAILED_CLOSE); Thread.currentThread().interrupt(); return; } } if (serverManager.isServerOnline(serverName) && t instanceof java.net.SocketTimeoutException) { i--; // reset the try count } else { LOG.info("Got exception in retrying sendRegionClose for " + regionState + "; try=" + i + " of " + maximumAttempts, t); } Threads.sleep(100); } } // Run out of attempts regionStates.updateRegionState(hri, State.FAILED_CLOSE); } finally { lock.unlock(); } } }); } /** * Set Regions in transitions metrics. * This takes an iterator on the RegionInTransition map (CLSM), and is not synchronized. * This iterator is not fail fast, which may lead to stale read; but that's better than * creating a copy of the map for metrics computation, as this method will be invoked * on a frequent interval. */ public void updateRegionsInTransitionMetrics() { long currentTime = System.currentTimeMillis(); int totalRITs = 0; int totalRITsOverThreshold = 0; long oldestRITTime = 0; int ritThreshold = this.server.getConfiguration(). getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000); for (RegionState state: regionStates.getRegionsInTransition()) { totalRITs++; long ritTime = currentTime - state.getStamp(); if (ritTime > ritThreshold) { // more than the threshold totalRITsOverThreshold++; } if (oldestRITTime < ritTime) { oldestRITTime = ritTime; } } if (this.metricsAssignmentManager != null) { this.metricsAssignmentManager.updateRITOldestAge(oldestRITTime); this.metricsAssignmentManager.updateRITCount(totalRITs); this.metricsAssignmentManager.updateRITCountOverThreshold(totalRITsOverThreshold); } } /** * @param region Region whose plan we are to clear. */ private void clearRegionPlan(final HRegionInfo region) { synchronized (this.regionPlans) { this.regionPlans.remove(region.getEncodedName()); } } /** * Wait on region to clear regions-in-transition. * @param hri Region to wait on. * @throws IOException */ public void waitOnRegionToClearRegionsInTransition(final HRegionInfo hri) throws IOException, InterruptedException { waitOnRegionToClearRegionsInTransition(hri, -1L); } /** * Wait on region to clear regions-in-transition or time out * @param hri * @param timeOut Milliseconds to wait for current region to be out of transition state. * @return True when a region clears regions-in-transition before timeout otherwise false * @throws InterruptedException */ public boolean waitOnRegionToClearRegionsInTransition(final HRegionInfo hri, long timeOut) throws InterruptedException { if (!regionStates.isRegionInTransition(hri)) { return true; } long end = (timeOut <= 0) ? Long.MAX_VALUE : EnvironmentEdgeManager.currentTime() + timeOut; // There is already a timeout monitor on regions in transition so I // should not have to have one here too? LOG.info("Waiting for " + hri.getEncodedName() + " to leave regions-in-transition, timeOut=" + timeOut + " ms."); while (!this.server.isStopped() && regionStates.isRegionInTransition(hri)) { regionStates.waitForUpdate(100); if (EnvironmentEdgeManager.currentTime() > end) { LOG.info("Timed out on waiting for " + hri.getEncodedName() + " to be assigned."); return false; } } if (this.server.isStopped()) { LOG.info("Giving up wait on regions in transition because stoppable.isStopped is set"); return false; } return true; } void invokeAssign(HRegionInfo regionInfo) { threadPoolExecutorService.submit(new AssignCallable(this, regionInfo)); } void invokeAssignLater(HRegionInfo regionInfo, long sleepMillis) { scheduledThreadPoolExecutor.schedule(new DelayedAssignCallable( new AssignCallable(this, regionInfo)), sleepMillis, TimeUnit.MILLISECONDS); } void invokeUnAssign(HRegionInfo regionInfo) { threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo)); } public boolean isCarryingMeta(ServerName serverName) { return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO); } public boolean isCarryingMetaReplica(ServerName serverName, int replicaId) { return isCarryingRegion(serverName, RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId)); } public boolean isCarryingMetaReplica(ServerName serverName, HRegionInfo metaHri) { return isCarryingRegion(serverName, metaHri); } /** * Check if the shutdown server carries the specific region. * @return whether the serverName currently hosts the region */ private boolean isCarryingRegion(ServerName serverName, HRegionInfo hri) { RegionState regionState = regionStates.getRegionTransitionState(hri); ServerName transitionAddr = regionState != null? regionState.getServerName(): null; if (transitionAddr != null) { boolean matchTransitionAddr = transitionAddr.equals(serverName); LOG.debug("Checking region=" + hri.getRegionNameAsString() + ", transitioning on server=" + matchTransitionAddr + " server being checked: " + serverName + ", matches=" + matchTransitionAddr); return matchTransitionAddr; } ServerName assignedAddr = regionStates.getRegionServerOfRegion(hri); boolean matchAssignedAddr = serverName.equals(assignedAddr); LOG.debug("based on AM, current region=" + hri.getRegionNameAsString() + " is on server=" + assignedAddr + ", server being checked: " + serverName); return matchAssignedAddr; } /** * Clean out crashed server removing any assignments. * @param sn Server that went down. * @return list of regions in transition on this server */ public List<HRegionInfo> cleanOutCrashedServerReferences(final ServerName sn) { // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { for (Iterator <Map.Entry<String, RegionPlan>> i = this.regionPlans.entrySet().iterator(); i.hasNext();) { Map.Entry<String, RegionPlan> e = i.next(); ServerName otherSn = e.getValue().getDestination(); // The name will be null if the region is planned for a random assign. if (otherSn != null && otherSn.equals(sn)) { // Use iterator's remove else we'll get CME i.remove(); } } } List<HRegionInfo> rits = regionStates.serverOffline(sn); for (Iterator<HRegionInfo> it = rits.iterator(); it.hasNext(); ) { HRegionInfo hri = it.next(); String encodedName = hri.getEncodedName(); // We need a lock on the region as we could update it Lock lock = locker.acquireLock(encodedName); try { RegionState regionState = regionStates.getRegionTransitionState(encodedName); if (regionState == null || (regionState.getServerName() != null && !regionState.isOnServer(sn)) || !RegionStates.isOneOfStates(regionState, State.PENDING_OPEN, State.OPENING, State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) { LOG.info("Skip " + regionState + " since it is not opening/failed_close" + " on the dead server any more: " + sn); it.remove(); } else { if (tableStateManager.isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { regionStates.regionOffline(hri); it.remove(); continue; } // Mark the region offline and assign it again by SSH regionStates.updateRegionState(hri, State.OFFLINE); } } finally { lock.unlock(); } } return rits; } /** * @param plan Plan to execute. */ public void balance(final RegionPlan plan) { HRegionInfo hri = plan.getRegionInfo(); TableName tableName = hri.getTable(); if (tableStateManager.isTableState(tableName, TableState.State.DISABLED, TableState.State.DISABLING)) { LOG.info("Ignored moving region of disabling/disabled table " + tableName); return; } // Move the region only if it's assigned String encodedName = hri.getEncodedName(); ReentrantLock lock = locker.acquireLock(encodedName); try { if (!regionStates.isRegionOnline(hri)) { RegionState state = regionStates.getRegionState(encodedName); LOG.info("Ignored moving region not assigned: " + hri + ", " + (state == null ? "not in region states" : state)); return; } synchronized (this.regionPlans) { this.regionPlans.put(plan.getRegionName(), plan); } unassign(hri, plan.getDestination()); } finally { lock.unlock(); } } public void stop() { // Shutdown the threadpool executor service threadPoolExecutorService.shutdownNow(); regionStateStore.stop(); } protected void setEnabledTable(TableName tableName) { try { this.tableStateManager.setTableState(tableName, TableState.State.ENABLED); } catch (IOException e) { // here we can abort as it is the start up flow String errorMsg = "Unable to ensure that the table " + tableName + " will be" + " enabled because of a ZooKeeper issue"; LOG.error(errorMsg); this.server.abort(errorMsg, e); } } @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION", justification="Worth fixing but not the end of the world.") private String onRegionFailedOpen(final RegionState current, final HRegionInfo hri, final ServerName serverName) { // The region must be opening on this server. // If current state is failed_open on the same server, // it could be a reportRegionTransition RPC retry. if (current == null || !current.isOpeningOrFailedOpenOnServer(serverName)) { return hri.getShortNameToLog() + " is not opening on " + serverName; } // Just return in case of retrying if (current.isFailedOpen()) { return null; } String encodedName = hri.getEncodedName(); // FindBugs: AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION Worth fixing!!! AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName); if (failedOpenCount == null) { failedOpenCount = new AtomicInteger(); // No need to use putIfAbsent, or extra synchronization since // this whole handleRegion block is locked on the encoded region // name, and failedOpenTracker is updated only in this block failedOpenTracker.put(encodedName, failedOpenCount); } if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) { regionStates.updateRegionState(hri, State.FAILED_OPEN); // remove the tracking info to save memory, also reset // the count for next open initiative failedOpenTracker.remove(encodedName); } else { if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) { // Log a warning message if a meta region failedOpenCount exceeds maximumAttempts // so that we are aware of potential problem if it persists for a long time. LOG.warn("Failed to open the hbase:meta region " + hri.getRegionNameAsString() + " after" + failedOpenCount.get() + " retries. Continue retrying."); } // Handle this the same as if it were opened and then closed. RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED); if (regionState != null) { // When there are more than one region server a new RS is selected as the // destination and the same is updated in the region plan. (HBASE-5546) if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || replicasToClose.contains(hri)) { offlineDisabledRegion(hri); return null; } regionStates.updateRegionState(hri, RegionState.State.CLOSED); // This below has to do w/ online enable/disable of a table removeClosedRegion(hri); try { getRegionPlan(hri, true); } catch (HBaseIOException e) { LOG.warn("Failed to get region plan", e); } // Have the current thread sleep a bit before resubmitting the RPC request long sleepTime = backoffPolicy.getBackoffTime(retryConfig, failedOpenTracker.get(encodedName).get()); invokeAssignLater(hri, sleepTime); } } // Null means no error return null; } private String onRegionOpen(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be opening on this server. // If current state is already opened on the same server, // it could be a reportRegionTransition RPC retry. if (current == null || !current.isOpeningOrOpenedOnServer(serverName)) { return hri.getShortNameToLog() + " is not opening on " + serverName; } // Just return in case of retrying if (current.isOpened()) { return null; } long openSeqNum = transition.hasOpenSeqNum() ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM; if (openSeqNum < 0) { return "Newly opened region has invalid open seq num " + openSeqNum; } regionOnline(hri, serverName, openSeqNum); // reset the count, if any failedOpenTracker.remove(hri.getEncodedName()); if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { invokeUnAssign(hri); } return null; } private String onRegionClosed(final RegionState current, final HRegionInfo hri, final ServerName serverName) { // Region will be usually assigned right after closed. When a RPC retry comes // in, the region may already have moved away from closed state. However, on the // region server side, we don't care much about the response for this transition. // We only make sure master has got and processed this report, either // successfully or not. So this is fine, not a problem at all. if (current == null || !current.isClosingOrClosedOnServer(serverName)) { return hri.getShortNameToLog() + " is not closing on " + serverName; } // Just return in case of retrying if (current.isClosed()) { return null; } if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || replicasToClose.contains(hri)) { offlineDisabledRegion(hri); return null; } regionStates.updateRegionState(hri, RegionState.State.CLOSED); sendRegionClosedNotification(hri); // This below has to do w/ online enable/disable of a table removeClosedRegion(hri); invokeAssign(hri); return null; } private String onRegionReadyToSplit(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be opened on this server. // If current state is already splitting on the same server, // it could be a reportRegionTransition RPC retry. if (current == null || !current.isSplittingOrOpenedOnServer(serverName)) { return hri.getShortNameToLog() + " is not opening on " + serverName; } if (!((HMaster)server).getSplitOrMergeTracker().isSplitOrMergeEnabled( MasterSwitchType.SPLIT)) { return "split switch is off!"; } // Just return in case of retrying if (current.isSplitting()) { return null; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a != null || rs_b != null) { return "Some daughter is already existing. " + "a=" + rs_a + ", b=" + rs_b; } // Server holding is not updated at this stage. // It is done after PONR. regionStates.updateRegionState(hri, State.SPLITTING); regionStates.createRegionState( a, State.SPLITTING_NEW, serverName, null); regionStates.createRegionState( b, State.SPLITTING_NEW, serverName, null); return null; } private String onRegionSplitPONR(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be splitting on this server, and the daughters must be in // splitting_new state. To check RPC retry, we use server holding info. if (current == null || !current.isSplittingOnServer(serverName)) { return hri.getShortNameToLog() + " is not splitting on " + serverName; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); // Master could have restarted and lost the new region // states, if so, they must be lost together if (rs_a == null && rs_b == null) { rs_a = regionStates.createRegionState( a, State.SPLITTING_NEW, serverName, null); rs_b = regionStates.createRegionState( b, State.SPLITTING_NEW, serverName, null); } if (rs_a == null || !rs_a.isSplittingNewOnServer(serverName) || rs_b == null || !rs_b.isSplittingNewOnServer(serverName)) { return "Some daughter is not known to be splitting on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } // Just return in case of retrying if (!regionStates.isRegionOnServer(hri, serverName)) { return null; } try { regionStates.splitRegion(hri, a, b, serverName); processFavoredNodesForDaughters(hri, a ,b); } catch (IOException ioe) { LOG.info("Failed to record split region " + hri.getShortNameToLog()); return "Failed to record the splitting in meta"; } return null; } public void assignDaughterRegions( final HRegionInfo parentHRI, final HRegionInfo daughterAHRI, final HRegionInfo daughterBHRI) throws InterruptedException, IOException { //Offline the parent region regionOffline(parentHRI, State.SPLIT); //Set daughter regions to offline regionStates.prepareAssignDaughters(daughterAHRI, daughterBHRI); // Assign daughter regions invokeAssign(daughterAHRI); invokeAssign(daughterBHRI); Callable<Object> splitReplicasCallable = new Callable<Object>() { @Override public Object call() { doSplittingOfReplicas(parentHRI, daughterAHRI, daughterBHRI); return null; } }; threadPoolExecutorService.submit(splitReplicasCallable); // wait for assignment completion ArrayList<HRegionInfo> regionAssignSet = new ArrayList<>(2); regionAssignSet.add(daughterAHRI); regionAssignSet.add(daughterBHRI); while (!waitForAssignment(regionAssignSet, true, regionAssignSet.size(), Long.MAX_VALUE)) { LOG.debug("some user regions are still in transition: " + regionAssignSet); } } private String onRegionSplit(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be splitting on this server, and the daughters must be in // splitting_new state. // If current state is already split on the same server, // it could be a reportRegionTransition RPC retry. if (current == null || !current.isSplittingOrSplitOnServer(serverName)) { return hri.getShortNameToLog() + " is not splitting on " + serverName; } // Just return in case of retrying if (current.isSplit()) { return null; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a == null || !rs_a.isSplittingNewOnServer(serverName) || rs_b == null || !rs_b.isSplittingNewOnServer(serverName)) { return "Some daughter is not known to be splitting on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } if (TEST_SKIP_SPLIT_HANDLING) { return "Skipping split message, TEST_SKIP_SPLIT_HANDLING is set"; } regionOffline(hri, State.SPLIT); regionOnline(a, serverName, 1); regionOnline(b, serverName, 1); // User could disable the table before master knows the new region. if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { invokeUnAssign(a); invokeUnAssign(b); } else { Callable<Object> splitReplicasCallable = new Callable<Object>() { @Override public Object call() { doSplittingOfReplicas(hri, a, b); return null; } }; threadPoolExecutorService.submit(splitReplicasCallable); } return null; } private String onRegionSplitReverted(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be splitting on this server, and the daughters must be in // splitting_new state. // If the region is in open state, it could be an RPC retry. if (current == null || !current.isSplittingOrOpenedOnServer(serverName)) { return hri.getShortNameToLog() + " is not splitting on " + serverName; } // Just return in case of retrying if (current.isOpened()) { return null; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a == null || !rs_a.isSplittingNewOnServer(serverName) || rs_b == null || !rs_b.isSplittingNewOnServer(serverName)) { return "Some daughter is not known to be splitting on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } regionOnline(hri, serverName); regionOffline(a); regionOffline(b); if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { invokeUnAssign(hri); } return null; } private String onRegionReadyToMerge(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be new, and the daughters must be open on this server. // If the region is in merge_new state, it could be an RPC retry. if (current != null && !current.isMergingNewOnServer(serverName)) { return "Merging daughter region already exists, p=" + current; } if (!((HMaster)server).getSplitOrMergeTracker().isSplitOrMergeEnabled( MasterSwitchType.MERGE)) { return "merge switch is off!"; } // Just return in case of retrying if (current != null) { return null; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); Set<String> encodedNames = new HashSet<>(2); encodedNames.add(a.getEncodedName()); encodedNames.add(b.getEncodedName()); Map<String, Lock> locks = locker.acquireLocks(encodedNames); try { RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a == null || !rs_a.isOpenedOnServer(serverName) || rs_b == null || !rs_b.isOpenedOnServer(serverName)) { return "Some daughter is not in a state to merge on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } regionStates.updateRegionState(a, State.MERGING); regionStates.updateRegionState(b, State.MERGING); regionStates.createRegionState( hri, State.MERGING_NEW, serverName, null); return null; } finally { for (Lock lock: locks.values()) { lock.unlock(); } } } private String onRegionMergePONR(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be in merging_new state, and the daughters must be // merging. To check RPC retry, we use server holding info. if (current != null && !current.isMergingNewOnServer(serverName)) { return hri.getShortNameToLog() + " is not merging on " + serverName; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) { return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } // Master could have restarted and lost the new region state if (current == null) { regionStates.createRegionState( hri, State.MERGING_NEW, serverName, null); } // Just return in case of retrying if (regionStates.isRegionOnServer(hri, serverName)) { return null; } try { regionStates.mergeRegions(hri, a, b, serverName); } catch (IOException ioe) { LOG.info("Failed to record merged region " + hri.getShortNameToLog()); return "Failed to record the merging in meta"; } return null; } public void assignMergedRegion( final HRegionInfo mergedRegion, final HRegionInfo daughterAHRI, final HRegionInfo daughterBHRI) throws InterruptedException, IOException { //Offline the daughter regions regionOffline(daughterAHRI, State.MERGED); regionOffline(daughterBHRI, State.MERGED); //Set merged region to offline regionStates.prepareAssignMergedRegion(mergedRegion); // Assign merged region invokeAssign(mergedRegion); Callable<Object> mergeReplicasCallable = new Callable<Object>() { @Override public Object call() { doMergingOfReplicas(mergedRegion, daughterAHRI, daughterBHRI); return null; } }; threadPoolExecutorService.submit(mergeReplicasCallable); // wait for assignment completion ArrayList<HRegionInfo> regionAssignSet = new ArrayList<>(1); regionAssignSet.add(mergedRegion); while (!waitForAssignment(regionAssignSet, true, regionAssignSet.size(), Long.MAX_VALUE)) { LOG.debug("The merged region " + mergedRegion + " is still in transition. "); } regionStateListener.onRegionMerged(mergedRegion); } private String onRegionMerged(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be in merging_new state, and the daughters must be // merging on this server. // If current state is already opened on the same server, // it could be a reportRegionTransition RPC retry. if (current == null || !current.isMergingNewOrOpenedOnServer(serverName)) { return hri.getShortNameToLog() + " is not merging on " + serverName; } // Just return in case of retrying if (current.isOpened()) { return null; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) { return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } regionOffline(a, State.MERGED); regionOffline(b, State.MERGED); regionOnline(hri, serverName, 1); try { processFavoredNodesForMerge(hri, a, b); } catch (IOException e) { LOG.error("Error while processing favored nodes after merge.", e); return StringUtils.stringifyException(e); } // User could disable the table before master knows the new region. if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { invokeUnAssign(hri); } else { Callable<Object> mergeReplicasCallable = new Callable<Object>() { @Override public Object call() { doMergingOfReplicas(hri, a, b); return null; } }; threadPoolExecutorService.submit(mergeReplicasCallable); } return null; } private String onRegionMergeReverted(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) { // The region must be in merging_new state, and the daughters must be // merging on this server. // If the region is in offline state, it could be an RPC retry. if (current == null || !current.isMergingNewOrOfflineOnServer(serverName)) { return hri.getShortNameToLog() + " is not merging on " + serverName; } // Just return in case of retrying if (current.isOffline()) { return null; } final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); RegionState rs_b = regionStates.getRegionState(b); if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) { return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b; } // Always bring the children back online. Even if they are not offline // there's no harm in making them online again. regionOnline(a, serverName); regionOnline(b, serverName); // Only offline the merging region if it is known to exist. RegionState rs_p = regionStates.getRegionState(hri); if (rs_p != null) { regionOffline(hri); } if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { invokeUnAssign(a); invokeUnAssign(b); } return null; } private void doMergingOfReplicas(HRegionInfo mergedHri, final HRegionInfo hri_a, final HRegionInfo hri_b) { // Close replicas for the original unmerged regions. create/assign new replicas // for the merged parent. List<HRegionInfo> unmergedRegions = new ArrayList<>(); unmergedRegions.add(hri_a); unmergedRegions.add(hri_b); Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(unmergedRegions); Collection<List<HRegionInfo>> c = map.values(); for (List<HRegionInfo> l : c) { for (HRegionInfo h : l) { if (!RegionReplicaUtil.isDefaultReplica(h)) { LOG.debug("Unassigning un-merged replica " + h); unassign(h); } } } int numReplicas = getNumReplicas(server, mergedHri.getTable()); List<HRegionInfo> regions = new ArrayList<>(); for (int i = 1; i < numReplicas; i++) { regions.add(RegionReplicaUtil.getRegionInfoForReplica(mergedHri, i)); } try { assign(regions); } catch (IOException ioe) { LOG.warn("Couldn't assign all replica(s) of region " + mergedHri + " because of " + ioe.getMessage()); } catch (InterruptedException ie) { LOG.warn("Couldn't assign all replica(s) of region " + mergedHri+ " because of " + ie.getMessage()); } } private void doSplittingOfReplicas(final HRegionInfo parentHri, final HRegionInfo hri_a, final HRegionInfo hri_b) { // create new regions for the replica, and assign them to match with the // current replica assignments. If replica1 of parent is assigned to RS1, // the replica1s of daughters will be on the same machine int numReplicas = getNumReplicas(server, parentHri.getTable()); // unassign the old replicas List<HRegionInfo> parentRegion = new ArrayList<>(); parentRegion.add(parentHri); Map<ServerName, List<HRegionInfo>> currentAssign = regionStates.getRegionAssignments(parentRegion); Collection<List<HRegionInfo>> c = currentAssign.values(); for (List<HRegionInfo> l : c) { for (HRegionInfo h : l) { if (!RegionReplicaUtil.isDefaultReplica(h)) { LOG.debug("Unassigning parent's replica " + h); unassign(h); } } } // assign daughter replicas Map<HRegionInfo, ServerName> map = new HashMap<>(); for (int i = 1; i < numReplicas; i++) { prepareDaughterReplicaForAssignment(hri_a, parentHri, i, map); prepareDaughterReplicaForAssignment(hri_b, parentHri, i, map); } try { assign(map); } catch (IOException e) { LOG.warn("Caught exception " + e + " while trying to assign replica(s) of daughter(s)"); } catch (InterruptedException e) { LOG.warn("Caught exception " + e + " while trying to assign replica(s) of daughter(s)"); } } private void prepareDaughterReplicaForAssignment(HRegionInfo daughterHri, HRegionInfo parentHri, int replicaId, Map<HRegionInfo, ServerName> map) { HRegionInfo parentReplica = RegionReplicaUtil.getRegionInfoForReplica(parentHri, replicaId); HRegionInfo daughterReplica = RegionReplicaUtil.getRegionInfoForReplica(daughterHri, replicaId); LOG.debug("Created replica region for daughter " + daughterReplica); ServerName sn; if ((sn = regionStates.getRegionServerOfRegion(parentReplica)) != null) { map.put(daughterReplica, sn); } else { List<ServerName> servers = serverManager.getOnlineServersList(); sn = servers.get((new Random(System.currentTimeMillis())).nextInt(servers.size())); map.put(daughterReplica, sn); } } public Set<HRegionInfo> getReplicasToClose() { return replicasToClose; } public Map<String, AtomicInteger> getFailedOpenTracker() {return failedOpenTracker;} /** * A region is offline. The new state should be the specified one, * if not null. If the specified state is null, the new state is Offline. * The specified state can be Split/Merged/Offline/null only. */ private void regionOffline(final HRegionInfo regionInfo, final State state) { regionStates.regionOffline(regionInfo, state); removeClosedRegion(regionInfo); // remove the region plan as well just in case. clearRegionPlan(regionInfo); balancer.regionOffline(regionInfo); // Tell our listeners that a region was closed sendRegionClosedNotification(regionInfo); // also note that all the replicas of the primary should be closed if (state != null && state.equals(State.SPLIT)) { Collection<HRegionInfo> c = new ArrayList<>(1); c.add(regionInfo); Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(c); Collection<List<HRegionInfo>> allReplicas = map.values(); for (List<HRegionInfo> list : allReplicas) { replicasToClose.addAll(list); } } else if (state != null && state.equals(State.MERGED)) { Collection<HRegionInfo> c = new ArrayList<>(1); c.add(regionInfo); Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(c); Collection<List<HRegionInfo>> allReplicas = map.values(); for (List<HRegionInfo> list : allReplicas) { replicasToClose.addAll(list); } } } private void sendRegionOpenedNotification(final HRegionInfo regionInfo, final ServerName serverName) { if (!this.listeners.isEmpty()) { for (AssignmentListener listener : this.listeners) { listener.regionOpened(regionInfo, serverName); } } } private void sendRegionClosedNotification(final HRegionInfo regionInfo) { if (!this.listeners.isEmpty()) { for (AssignmentListener listener : this.listeners) { listener.regionClosed(regionInfo); } } } /** * Try to update some region states. If the state machine prevents * such update, an error message is returned to explain the reason. * * It's expected that in each transition there should have just one * region for opening/closing, 3 regions for splitting/merging. * These regions should be on the server that requested the change. * * Region state machine. Only these transitions * are expected to be triggered by a region server. * * On the state transition: * (1) Open/Close should be initiated by master * (a) Master sets the region to pending_open/pending_close * in memory and hbase:meta after sending the request * to the region server * (b) Region server reports back to the master * after open/close is done (either success/failure) * (c) If region server has problem to report the status * to master, it must be because the master is down or some * temporary network issue. Otherwise, the region server should * abort since it must be a bug. If the master is not accessible, * the region server should keep trying until the server is * stopped or till the status is reported to the (new) master * (d) If region server dies in the middle of opening/closing * a region, SSH picks it up and finishes it * (e) If master dies in the middle, the new master recovers * the state during initialization from hbase:meta. Region server * can report any transition that has not been reported to * the previous active master yet * (2) Split/merge is initiated by region servers * (a) To split a region, a region server sends a request * to master to try to set a region to splitting, together with * two daughters (to be created) to splitting new. If approved * by the master, the splitting can then move ahead * (b) To merge two regions, a region server sends a request to * master to try to set the new merged region (to be created) to * merging_new, together with two regions (to be merged) to merging. * If it is ok with the master, the merge can then move ahead * (c) Once the splitting/merging is done, the region server * reports the status back to the master either success/failure. * (d) Other scenarios should be handled similarly as for * region open/close */ public String onRegionTransition(final ServerName serverName, final RegionStateTransition transition) { TransitionCode code = transition.getTransitionCode(); HRegionInfo hri = HRegionInfo.convert(transition.getRegionInfo(0)); Lock lock = locker.acquireLock(hri.getEncodedName()); try { RegionState current = regionStates.getRegionState(hri); if (LOG.isDebugEnabled()) { LOG.debug("Got transition " + code + " for " + (current != null ? current.toString() : hri.getShortNameToLog()) + " from " + serverName); } String errorMsg = null; switch (code) { case OPENED: errorMsg = onRegionOpen(current, hri, serverName, transition); break; case FAILED_OPEN: errorMsg = onRegionFailedOpen(current, hri, serverName); break; case CLOSED: errorMsg = onRegionClosed(current, hri, serverName); break; case READY_TO_SPLIT: try { regionStateListener.onRegionSplit(hri); errorMsg = onRegionReadyToSplit(current, hri, serverName, transition); } catch (IOException exp) { if (exp instanceof QuotaExceededException) { server.getRegionNormalizer().planSkipped(hri, PlanType.SPLIT); } errorMsg = StringUtils.stringifyException(exp); } break; case SPLIT_PONR: errorMsg = onRegionSplitPONR(current, hri, serverName, transition); break; case SPLIT: errorMsg = onRegionSplit(current, hri, serverName, transition); break; case SPLIT_REVERTED: errorMsg = onRegionSplitReverted(current, hri, serverName, transition); if (org.apache.commons.lang.StringUtils.isEmpty(errorMsg)) { try { regionStateListener.onRegionSplitReverted(hri); } catch (IOException exp) { LOG.warn(StringUtils.stringifyException(exp)); } } break; case READY_TO_MERGE: errorMsg = onRegionReadyToMerge(current, hri, serverName, transition); break; case MERGE_PONR: errorMsg = onRegionMergePONR(current, hri, serverName, transition); break; case MERGED: try { errorMsg = onRegionMerged(current, hri, serverName, transition); regionStateListener.onRegionMerged(hri); } catch (IOException exp) { errorMsg = StringUtils.stringifyException(exp); } break; case MERGE_REVERTED: errorMsg = onRegionMergeReverted(current, hri, serverName, transition); break; default: errorMsg = "Unexpected transition code " + code; } if (errorMsg != null) { LOG.info("Could not transition region from " + current + " on " + code + " by " + serverName + ": " + errorMsg); } return errorMsg; } finally { lock.unlock(); } } private void processBogusAssignments(Map<ServerName, List<HRegionInfo>> bulkPlan) { if (bulkPlan.containsKey(LoadBalancer.BOGUS_SERVER_NAME)) { // Found no plan for some regions, put those regions in RIT for (HRegionInfo hri : bulkPlan.get(LoadBalancer.BOGUS_SERVER_NAME)) { regionStates.updateRegionState(hri, State.FAILED_OPEN); } bulkPlan.remove(LoadBalancer.BOGUS_SERVER_NAME); } } /** * @return Instance of load balancer */ public LoadBalancer getBalancer() { return this.balancer; } public Map<ServerName, List<HRegionInfo>> getSnapShotOfAssignment(Collection<HRegionInfo> infos) { return getRegionStates().getRegionAssignments(infos); } void setRegionStateListener(RegionStateListener listener) { this.regionStateListener = listener; } private class DelayedAssignCallable implements Runnable { Callable callable; public DelayedAssignCallable(Callable callable) { this.callable = callable; } @Override public void run() { threadPoolExecutorService.submit(callable); } } }