/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionTransition; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; /** * Region state accountant. It holds the states of all regions in the memory. * In normal scenario, it should match the meta table and the true region states. * * This map is used by AssignmentManager to track region states. */ @InterfaceAudience.Private public class RegionStates { private static final Log LOG = LogFactory.getLog(RegionStates.class); /** * Regions currently in transition. */ final HashMap<String, RegionState> regionsInTransition; /** * Region encoded name to state map. * All the regions should be in this map. */ private final Map<String, RegionState> regionStates; /** * Server to regions assignment map. * Contains the set of regions currently assigned to a given server. */ private final Map<ServerName, Set<HRegionInfo>> serverHoldings; /** * Region to server assignment map. * Contains the server a given region is currently assigned to. */ private final TreeMap<HRegionInfo, ServerName> regionAssignments; private final ServerManager serverManager; private final Server server; RegionStates(final Server master, final ServerManager serverManager) { regionStates = new HashMap<String, RegionState>(); regionsInTransition = new HashMap<String, RegionState>(); serverHoldings = new HashMap<ServerName, Set<HRegionInfo>>(); regionAssignments = new TreeMap<HRegionInfo, ServerName>(); this.serverManager = serverManager; this.server = master; } /** * @return an unmodifiable the region assignment map */ @SuppressWarnings("unchecked") public synchronized Map<HRegionInfo, ServerName> getRegionAssignments() { return (Map<HRegionInfo, ServerName>)regionAssignments.clone(); } public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) { return regionAssignments.get(hri); } /** * Get regions in transition and their states */ @SuppressWarnings("unchecked") public synchronized Map<String, RegionState> getRegionsInTransition() { return (Map<String, RegionState>)regionsInTransition.clone(); } /** * @return True if specified region in transition. */ public synchronized boolean isRegionInTransition(final HRegionInfo hri) { return regionsInTransition.containsKey(hri.getEncodedName()); } /** * @return True if specified region in transition. */ public synchronized boolean isRegionInTransition(final String regionName) { return regionsInTransition.containsKey(regionName); } /** * @return True if any region in transition. */ public synchronized boolean isRegionsInTransition() { return !regionsInTransition.isEmpty(); } /** * @return True if specified region assigned. */ public synchronized boolean isRegionAssigned(final HRegionInfo hri) { return regionAssignments.containsKey(hri); } /** * @return the server the specified region assigned to; null if not assigned. */ public synchronized ServerName getAssignedServer(final HRegionInfo hri) { return regionAssignments.get(hri); } /** * Wait for the state map to be updated by assignment manager. */ public synchronized void waitForUpdate( final long timeout) throws InterruptedException { this.wait(timeout); } /** * Get region transition state */ public synchronized RegionState getRegionTransitionState(final HRegionInfo hri) { return regionsInTransition.get(hri.getEncodedName()); } /** * Get region transition state */ public synchronized RegionState getRegionTransitionState(final String regionName) { return regionsInTransition.get(regionName); } /** * Add a list of regions to RegionStates. The initial state is OFFLINE. * If any region is already in RegionStates, that region will be skipped. */ public synchronized void createRegionStates( final List<HRegionInfo> hris) { for (HRegionInfo hri: hris) { createRegionState(hri); } } /** * Add a region to RegionStates. The initial state is OFFLINE. * If it is already in RegionStates, this call has no effect, * and the original state is returned. */ public synchronized RegionState createRegionState(final HRegionInfo hri) { String regionName = hri.getEncodedName(); RegionState regionState = regionStates.get(regionName); if (regionState != null) { LOG.warn("Tried to create a state of a region already in RegionStates " + hri + ", used existing state: " + regionState + ", ignored new state: state=OFFLINE, server=null"); } else { regionState = new RegionState(hri, State.OFFLINE); regionStates.put(regionName, regionState); } return regionState; } /** * Update a region state. If it is not splitting, * it will be put in transition if not already there. */ public synchronized RegionState updateRegionState( final HRegionInfo hri, final State state) { RegionState regionState = regionStates.get(hri.getEncodedName()); ServerName serverName = (regionState == null || state == State.CLOSED || state == State.OFFLINE) ? null : regionState.getServerName(); return updateRegionState(hri, state, serverName); } /** * Update a region state. If it is not splitting, * it will be put in transition if not already there. * * If we can't find the region info based on the region name in * the transition, log a warning and return null. */ public synchronized RegionState updateRegionState( final RegionTransition transition, final State state) { byte[] regionName = transition.getRegionName(); HRegionInfo regionInfo = getRegionInfo(regionName); if (regionInfo == null) { String prettyRegionName = HRegionInfo.prettyPrint( HRegionInfo.encodeRegionName(regionName)); LOG.warn("Failed to find region " + prettyRegionName + " in updating its state to " + state + " based on region transition " + transition); return null; } return updateRegionState(regionInfo, state, transition.getServerName()); } /** * Update a region state. If it is not splitting, * it will be put in transition if not already there. */ public synchronized RegionState updateRegionState( final HRegionInfo hri, final State state, final ServerName serverName) { ServerName newServerName = serverName; if (serverName != null && (state == State.CLOSED || state == State.OFFLINE)) { LOG.warn("Closed region " + hri + " still on " + serverName + "? Ignored, reset it to null"); newServerName = null; } String regionName = hri.getEncodedName(); RegionState regionState = new RegionState( hri, state, System.currentTimeMillis(), newServerName); RegionState oldState = regionStates.put(regionName, regionState); LOG.info("Region " + hri + " transitioned from " + oldState + " to " + regionState); if (state != State.SPLITTING && (newServerName != null || (state != State.PENDING_CLOSE && state != State.CLOSING))) { regionsInTransition.put(regionName, regionState); } // notify the change this.notifyAll(); return regionState; } /** * A region is online, won't be in transition any more. * We can't confirm it is really online on specified region server * because it hasn't been put in region server's online region list yet. */ public synchronized void regionOnline( final HRegionInfo hri, final ServerName serverName) { String regionName = hri.getEncodedName(); RegionState oldState = regionStates.get(regionName); if (oldState == null) { LOG.warn("Online a region not in RegionStates: " + hri); } else { State state = oldState.getState(); ServerName sn = oldState.getServerName(); if (state != State.OPEN || sn == null || !sn.equals(serverName)) { LOG.debug("Online a region with current state=" + state + ", expected state=OPEN" + ", assigned to server: " + sn + " expected " + serverName); } } updateRegionState(hri, State.OPEN, serverName); regionsInTransition.remove(regionName); ServerName oldServerName = regionAssignments.put(hri, serverName); if (!serverName.equals(oldServerName)) { LOG.info("Onlined region " + hri + " on " + serverName); Set<HRegionInfo> regions = serverHoldings.get(serverName); if (regions == null) { regions = new HashSet<HRegionInfo>(); serverHoldings.put(serverName, regions); } regions.add(hri); if (oldServerName != null) { LOG.info("Offlined region " + hri + " from " + oldServerName); serverHoldings.get(oldServerName).remove(hri); } } } /** * A region is offline, won't be in transition any more. */ public synchronized void regionOffline(final HRegionInfo hri) { String regionName = hri.getEncodedName(); RegionState oldState = regionStates.get(regionName); if (oldState == null) { LOG.warn("Offline a region not in RegionStates: " + hri); } else { State state = oldState.getState(); ServerName sn = oldState.getServerName(); if (state != State.OFFLINE || sn != null) { LOG.debug("Online a region with current state=" + state + ", expected state=OFFLINE" + ", assigned to server: " + sn + ", expected null"); } } updateRegionState(hri, State.OFFLINE); regionsInTransition.remove(regionName); ServerName oldServerName = regionAssignments.remove(hri); if (oldServerName != null) { LOG.info("Offlined region " + hri + " from " + oldServerName); serverHoldings.get(oldServerName).remove(hri); } } /** * A server is offline, all regions on it are dead. */ public synchronized List<RegionState> serverOffline(final ServerName sn) { // Clean up this server from map of servers to regions, and remove all regions // of this server from online map of regions. List<RegionState> rits = new ArrayList<RegionState>(); Set<HRegionInfo> assignedRegions = serverHoldings.get(sn); if (assignedRegions == null) { assignedRegions = new HashSet<HRegionInfo>(); } for (HRegionInfo region : assignedRegions) { regionAssignments.remove(region); } // See if any of the regions that were online on this server were in RIT // If they are, normal timeouts will deal with them appropriately so // let's skip a manual re-assignment. for (RegionState state : regionsInTransition.values()) { if (assignedRegions.contains(state.getRegion())) { rits.add(state); } else if (sn.equals(state.getServerName())) { // Region is in transition on this region server, and this // region is not open on this server. So the region must be // moving to this server from another one (i.e. opening or // pending open on this server, was open on another one if (state.isPendingOpen() || state.isOpening()) { state.setTimestamp(0); // timeout it, let timeout monitor reassign } else { LOG.warn("THIS SHOULD NOT HAPPEN: unexpected state " + state + " of region in transition on server " + sn); } } } assignedRegions.clear(); this.notifyAll(); return rits; } /** * Gets the online regions of the specified table. * This method looks at the in-memory state. It does not go to <code>.META.</code>. * Only returns <em>online</em> regions. If a region on this table has been * closed during a disable, etc., it will be included in the returned list. * So, the returned list may not necessarily be ALL regions in this table, its * all the ONLINE regions in the table. * @param tableName * @return Online regions from <code>tableName</code> */ public synchronized List<HRegionInfo> getRegionsOfTable(byte[] tableName) { List<HRegionInfo> tableRegions = new ArrayList<HRegionInfo>(); // boundary needs to have table's name but regionID 0 so that it is sorted // before all table's regions. HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L); for (HRegionInfo hri: regionAssignments.tailMap(boundary).keySet()) { if(!Bytes.equals(hri.getTableName(), tableName)) break; tableRegions.add(hri); } return tableRegions; } /** * Wait on region to clear regions-in-transition. * <p> * If the region isn't in transition, returns immediately. Otherwise, method * blocks until the region is out of transition. */ public synchronized void waitOnRegionToClearRegionsInTransition( final HRegionInfo hri) throws InterruptedException { if (!isRegionInTransition(hri)) return; while(!server.isStopped() && isRegionInTransition(hri)) { RegionState rs = getRegionState(hri); LOG.info("Waiting on " + rs + " to clear regions-in-transition"); waitForUpdate(100); } if (server.isStopped()) { LOG.info("Giving up wait on region in " + "transition because stoppable.isStopped is set"); } } /** * Waits until the specified region has completed assignment. * <p> * If the region is already assigned, returns immediately. Otherwise, method * blocks until the region is assigned. */ public synchronized void waitForAssignment( final HRegionInfo hri) throws InterruptedException { if (!isRegionAssigned(hri)) return; while(!server.isStopped() && !isRegionAssigned(hri)) { RegionState rs = getRegionState(hri); LOG.info("Waiting on " + rs + " to be assigned"); waitForUpdate(100); } if (server.isStopped()) { LOG.info("Giving up wait on region " + "assignment because stoppable.isStopped is set"); } } /** * Compute the average load across all region servers. * Currently, this uses a very naive computation - just uses the number of * regions being served, ignoring stats about number of requests. * @return the average load */ protected synchronized double getAverageLoad() { int numServers = 0, totalLoad = 0; for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) { Set<HRegionInfo> regions = e.getValue(); ServerName serverName = e.getKey(); int regionCount = regions.size(); if (regionCount > 0 || serverManager.isServerOnline(serverName)) { totalLoad += regionCount; numServers++; } } return numServers == 0 ? 0.0 : (double)totalLoad / (double)numServers; } /** * This is an EXPENSIVE clone. Cloning though is the safest thing to do. * Can't let out original since it can change and at least the load balancer * wants to iterate this exported list. We need to synchronize on regions * since all access to this.servers is under a lock on this.regions. * * @return A clone of current assignments by table. */ protected Map<String, Map<ServerName, List<HRegionInfo>>> getAssignmentsByTable() { Map<String, Map<ServerName, List<HRegionInfo>>> result = new HashMap<String, Map<ServerName,List<HRegionInfo>>>(); synchronized (this) { if (!server.getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false)) { Map<ServerName, List<HRegionInfo>> svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size()); for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) { svrToRegions.put(e.getKey(), new ArrayList<HRegionInfo>(e.getValue())); } result.put("ensemble", svrToRegions); } else { for (Map.Entry<ServerName, Set<HRegionInfo>> e: serverHoldings.entrySet()) { for (HRegionInfo hri: e.getValue()) { if (hri.isMetaRegion() || hri.isRootRegion()) continue; String tablename = hri.getTableNameAsString(); Map<ServerName, List<HRegionInfo>> svrToRegions = result.get(tablename); if (svrToRegions == null) { svrToRegions = new HashMap<ServerName, List<HRegionInfo>>(serverHoldings.size()); result.put(tablename, svrToRegions); } List<HRegionInfo> regions = svrToRegions.get(e.getKey()); if (regions == null) { regions = new ArrayList<HRegionInfo>(); svrToRegions.put(e.getKey(), regions); } regions.add(hri); } } } } Map<ServerName, ServerLoad> onlineSvrs = serverManager.getOnlineServers(); // Take care of servers w/o assignments. for (Map<ServerName, List<HRegionInfo>> map: result.values()) { for (ServerName svr: onlineSvrs.keySet()) { if (!map.containsKey(svr)) { map.put(svr, new ArrayList<HRegionInfo>()); } } } return result; } protected synchronized RegionState getRegionState(final HRegionInfo hri) { return regionStates.get(hri.getEncodedName()); } protected synchronized RegionState getRegionState(final String regionName) { return regionStates.get(regionName); } /** * Get the HRegionInfo from cache, if not there, from the META table * @param regionName * @return HRegionInfo for the region */ protected HRegionInfo getRegionInfo(final byte [] regionName) { String encodedName = HRegionInfo.encodeRegionName(regionName); RegionState regionState = regionStates.get(encodedName); if (regionState != null) { return regionState.getRegion(); } try { Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(server.getCatalogTracker(), regionName); HRegionInfo hri = p == null ? null : p.getFirst(); if (hri != null) { createRegionState(hri); } return hri; } catch (IOException e) { server.abort("Aborting because error occoured while reading " + Bytes.toStringBinary(regionName) + " from .META.", e); return null; } } }