/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.rsgroup;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.MetaTableAccessor.DefaultVisitorBase;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ProcedureInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.constraint.ConstraintException;
import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerListener;
import org.apache.hadoop.hbase.master.TableStateManager;
import org.apache.hadoop.hbase.net.Address;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos;
import org.apache.hadoop.hbase.protobuf.generated.RSGroupProtos;
import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
import org.apache.hadoop.hbase.security.access.AccessControlLists;
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.ServiceException;
/**
* This is an implementation of {@link RSGroupInfoManager} which makes
* use of an HBase table as the persistence store for the group information.
* It also makes use of zookeeper to store group information needed
* for bootstrapping during offline mode.
*
* <h2>Concurrency</h2>
* RSGroup state is kept locally in Maps. There is a rsgroup name to cached
* RSGroupInfo Map at {@link #rsGroupMap} and a Map of tables to the name of the
* rsgroup they belong too (in {@link #tableMap}). These Maps are persisted to the
* hbase:rsgroup table (and cached in zk) on each modification.
*
* <p>Mutations on state are synchronized but reads can continue without having
* to wait on an instance monitor, mutations do wholesale replace of the Maps on
* update -- Copy-On-Write; the local Maps of state are read-only, just-in-case
* (see flushConfig).
*
* <p>Reads must not block else there is a danger we'll deadlock.
*
* <p>Clients of this class, the {@link RSGroupAdminEndpoint} for example, want to query and
* then act on the results of the query modifying cache in zookeeper without another thread
* making intermediate modifications. These clients synchronize on the 'this' instance so
* no other has access concurrently. Reads must be able to continue concurrently.
*/
@InterfaceAudience.Private
class RSGroupInfoManagerImpl implements RSGroupInfoManager {
private static final Log LOG = LogFactory.getLog(RSGroupInfoManagerImpl.class);
/** Table descriptor for <code>hbase:rsgroup</code> catalog table */
private final static HTableDescriptor RSGROUP_TABLE_DESC;
static {
RSGROUP_TABLE_DESC = new HTableDescriptor(RSGROUP_TABLE_NAME);
RSGROUP_TABLE_DESC.addFamily(new HColumnDescriptor(META_FAMILY_BYTES));
RSGROUP_TABLE_DESC.setRegionSplitPolicyClassName(DisabledRegionSplitPolicy.class.getName());
try {
RSGROUP_TABLE_DESC.addCoprocessor(
MultiRowMutationEndpoint.class.getName(),
null, Coprocessor.PRIORITY_SYSTEM, null);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
// There two Maps are immutable and wholesale replaced on each modification
// so are safe to access concurrently. See class comment.
private volatile Map<String, RSGroupInfo> rsGroupMap = Collections.emptyMap();
private volatile Map<TableName, String> tableMap = Collections.emptyMap();
private final MasterServices masterServices;
private Table rsGroupTable;
private final ClusterConnection conn;
private final ZooKeeperWatcher watcher;
private final RSGroupStartupWorker rsGroupStartupWorker = new RSGroupStartupWorker();
// contains list of groups that were last flushed to persistent store
private Set<String> prevRSGroups = new HashSet<>();
private final ServerEventsListenerThread serverEventsListenerThread =
new ServerEventsListenerThread();
private RSGroupInfoManagerImpl(MasterServices masterServices) throws IOException {
this.masterServices = masterServices;
this.watcher = masterServices.getZooKeeper();
this.conn = masterServices.getClusterConnection();
}
private synchronized void init() throws IOException{
refresh();
rsGroupStartupWorker.start();
serverEventsListenerThread.start();
masterServices.getServerManager().registerListener(serverEventsListenerThread);
}
static RSGroupInfoManager getInstance(MasterServices master) throws IOException {
RSGroupInfoManagerImpl instance = new RSGroupInfoManagerImpl(master);
instance.init();
return instance;
}
@Override
public synchronized void addRSGroup(RSGroupInfo rsGroupInfo) throws IOException {
checkGroupName(rsGroupInfo.getName());
if (rsGroupMap.get(rsGroupInfo.getName()) != null ||
rsGroupInfo.getName().equals(RSGroupInfo.DEFAULT_GROUP)) {
throw new DoNotRetryIOException("Group already exists: "+ rsGroupInfo.getName());
}
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.put(rsGroupInfo.getName(), rsGroupInfo);
flushConfig(newGroupMap);
}
private RSGroupInfo getRSGroupInfo(final String groupName) throws DoNotRetryIOException {
RSGroupInfo rsGroupInfo = getRSGroup(groupName);
if (rsGroupInfo == null) {
throw new DoNotRetryIOException("RSGroup " + groupName + " does not exist");
}
return rsGroupInfo;
}
@Override
public synchronized Set<Address> moveServers(Set<Address> servers, String srcGroup,
String dstGroup) throws IOException {
RSGroupInfo src = getRSGroupInfo(srcGroup);
RSGroupInfo dst = getRSGroupInfo(dstGroup);
// If destination is 'default' rsgroup, only add servers that are online. If not online, drop
// it. If not 'default' group, add server to 'dst' rsgroup EVEN IF IT IS NOT online (could be a
// rsgroup of dead servers that are to come back later).
Set<Address> onlineServers = dst.getName().equals(RSGroupInfo.DEFAULT_GROUP)?
Utility.getOnlineServers(this.masterServices): null;
for (Address el: servers) {
src.removeServer(el);
if (onlineServers != null) {
if (!onlineServers.contains(el)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Dropping " + el + " during move-to-default rsgroup because not online");
}
continue;
}
}
dst.addServer(el);
}
Map<String,RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.put(src.getName(), src);
newGroupMap.put(dst.getName(), dst);
flushConfig(newGroupMap);
return dst.getServers();
}
@Override
public RSGroupInfo getRSGroupOfServer(Address serverHostPort) throws IOException {
for (RSGroupInfo info: rsGroupMap.values()) {
if (info.containsServer(serverHostPort)) {
return info;
}
}
return null;
}
@Override
public RSGroupInfo getRSGroup(String groupName) {
return rsGroupMap.get(groupName);
}
@Override
public String getRSGroupOfTable(TableName tableName) {
return tableMap.get(tableName);
}
@Override
public synchronized void moveTables(Set<TableName> tableNames, String groupName)
throws IOException {
if (groupName != null && !rsGroupMap.containsKey(groupName)) {
throw new DoNotRetryIOException("Group "+groupName+" does not exist or is a special group");
}
Map<String,RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
for(TableName tableName: tableNames) {
if (tableMap.containsKey(tableName)) {
RSGroupInfo src = new RSGroupInfo(newGroupMap.get(tableMap.get(tableName)));
src.removeTable(tableName);
newGroupMap.put(src.getName(), src);
}
if(groupName != null) {
RSGroupInfo dst = new RSGroupInfo(newGroupMap.get(groupName));
dst.addTable(tableName);
newGroupMap.put(dst.getName(), dst);
}
}
flushConfig(newGroupMap);
}
@Override
public synchronized void removeRSGroup(String groupName) throws IOException {
if (!rsGroupMap.containsKey(groupName) || groupName.equals(RSGroupInfo.DEFAULT_GROUP)) {
throw new DoNotRetryIOException("Group " + groupName + " does not exist or is a reserved "
+ "group");
}
Map<String,RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.remove(groupName);
flushConfig(newGroupMap);
}
@Override
public List<RSGroupInfo> listRSGroups() {
return Lists.newLinkedList(rsGroupMap.values());
}
@Override
public boolean isOnline() {
return rsGroupStartupWorker.isOnline();
}
@Override
public void moveServersAndTables(Set<Address> servers, Set<TableName> tables,
String srcGroup, String dstGroup) throws IOException {
//get server's group
RSGroupInfo srcGroupInfo = getRSGroupInfo(srcGroup);
RSGroupInfo dstGroupInfo = getRSGroupInfo(dstGroup);
//move servers
for (Address el: servers) {
srcGroupInfo.removeServer(el);
dstGroupInfo.addServer(el);
}
//move tables
for(TableName tableName: tables) {
srcGroupInfo.removeTable(tableName);
dstGroupInfo.addTable(tableName);
}
//flush changed groupinfo
Map<String,RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.put(srcGroupInfo.getName(), srcGroupInfo);
newGroupMap.put(dstGroupInfo.getName(), dstGroupInfo);
flushConfig(newGroupMap);
}
List<RSGroupInfo> retrieveGroupListFromGroupTable() throws IOException {
List<RSGroupInfo> rsGroupInfoList = Lists.newArrayList();
for (Result result : rsGroupTable.getScanner(new Scan())) {
RSGroupProtos.RSGroupInfo proto = RSGroupProtos.RSGroupInfo.parseFrom(
result.getValue(META_FAMILY_BYTES, META_QUALIFIER_BYTES));
rsGroupInfoList.add(RSGroupProtobufUtil.toGroupInfo(proto));
}
return rsGroupInfoList;
}
List<RSGroupInfo> retrieveGroupListFromZookeeper() throws IOException {
String groupBasePath = ZKUtil.joinZNode(watcher.znodePaths.baseZNode, rsGroupZNode);
List<RSGroupInfo> RSGroupInfoList = Lists.newArrayList();
//Overwrite any info stored by table, this takes precedence
try {
if(ZKUtil.checkExists(watcher, groupBasePath) != -1) {
for(String znode: ZKUtil.listChildrenAndWatchForNewChildren(watcher, groupBasePath)) {
byte[] data = ZKUtil.getData(watcher, ZKUtil.joinZNode(groupBasePath, znode));
if(data.length > 0) {
ProtobufUtil.expectPBMagicPrefix(data);
ByteArrayInputStream bis = new ByteArrayInputStream(
data, ProtobufUtil.lengthOfPBMagic(), data.length);
RSGroupInfoList.add(RSGroupProtobufUtil.toGroupInfo(
RSGroupProtos.RSGroupInfo.parseFrom(bis)));
}
}
LOG.debug("Read ZK GroupInfo count:" + RSGroupInfoList.size());
}
} catch (KeeperException|DeserializationException|InterruptedException e) {
throw new IOException("Failed to read rsGroupZNode",e);
}
return RSGroupInfoList;
}
@Override
public void refresh() throws IOException {
refresh(false);
}
/**
* Read rsgroup info from the source of truth, the hbase:rsgroup table.
* Update zk cache. Called on startup of the manager.
*/
private synchronized void refresh(boolean forceOnline) throws IOException {
List<RSGroupInfo> groupList = new LinkedList<>();
// Overwrite anything read from zk, group table is source of truth
// if online read from GROUP table
if (forceOnline || isOnline()) {
LOG.debug("Refreshing in Online mode.");
if (rsGroupTable == null) {
rsGroupTable = conn.getTable(RSGROUP_TABLE_NAME);
}
groupList.addAll(retrieveGroupListFromGroupTable());
} else {
LOG.debug("Refreshing in Offline mode.");
groupList.addAll(retrieveGroupListFromZookeeper());
}
// refresh default group, prune
NavigableSet<TableName> orphanTables = new TreeSet<>();
for(String entry: masterServices.getTableDescriptors().getAll().keySet()) {
orphanTables.add(TableName.valueOf(entry));
}
final List<TableName> specialTables;
if(!masterServices.isInitialized()) {
specialTables = Arrays.asList(AccessControlLists.ACL_TABLE_NAME, TableName.META_TABLE_NAME,
TableName.NAMESPACE_TABLE_NAME, RSGROUP_TABLE_NAME);
} else {
specialTables =
masterServices.listTableNamesByNamespace(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR);
}
for (TableName table : specialTables) {
orphanTables.add(table);
}
for (RSGroupInfo group: groupList) {
if(!group.getName().equals(RSGroupInfo.DEFAULT_GROUP)) {
orphanTables.removeAll(group.getTables());
}
}
// This is added to the last of the list so it overwrites the 'default' rsgroup loaded
// from region group table or zk
groupList.add(new RSGroupInfo(RSGroupInfo.DEFAULT_GROUP, getDefaultServers(),
orphanTables));
// populate the data
HashMap<String, RSGroupInfo> newGroupMap = Maps.newHashMap();
HashMap<TableName, String> newTableMap = Maps.newHashMap();
for (RSGroupInfo group : groupList) {
newGroupMap.put(group.getName(), group);
for(TableName table: group.getTables()) {
newTableMap.put(table, group.getName());
}
}
resetRSGroupAndTableMaps(newGroupMap, newTableMap);
updateCacheOfRSGroups(rsGroupMap.keySet());
}
private synchronized Map<TableName,String> flushConfigTable(Map<String,RSGroupInfo> groupMap)
throws IOException {
Map<TableName,String> newTableMap = Maps.newHashMap();
List<Mutation> mutations = Lists.newArrayList();
// populate deletes
for(String groupName : prevRSGroups) {
if(!groupMap.containsKey(groupName)) {
Delete d = new Delete(Bytes.toBytes(groupName));
mutations.add(d);
}
}
// populate puts
for(RSGroupInfo RSGroupInfo : groupMap.values()) {
RSGroupProtos.RSGroupInfo proto = RSGroupProtobufUtil.toProtoGroupInfo(RSGroupInfo);
Put p = new Put(Bytes.toBytes(RSGroupInfo.getName()));
p.addColumn(META_FAMILY_BYTES, META_QUALIFIER_BYTES, proto.toByteArray());
mutations.add(p);
for(TableName entry: RSGroupInfo.getTables()) {
newTableMap.put(entry, RSGroupInfo.getName());
}
}
if(mutations.size() > 0) {
multiMutate(mutations);
}
return newTableMap;
}
private synchronized void flushConfig()
throws IOException {
flushConfig(this.rsGroupMap);
}
private synchronized void flushConfig(Map<String, RSGroupInfo> newGroupMap)
throws IOException {
Map<TableName, String> newTableMap;
// For offline mode persistence is still unavailable
// We're refreshing in-memory state but only for default servers
if (!isOnline()) {
Map<String, RSGroupInfo> m = Maps.newHashMap(rsGroupMap);
RSGroupInfo oldDefaultGroup = m.remove(RSGroupInfo.DEFAULT_GROUP);
RSGroupInfo newDefaultGroup = newGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
if (!m.equals(newGroupMap) ||
!oldDefaultGroup.getTables().equals(newDefaultGroup.getTables())) {
throw new IOException("Only default servers can be updated during offline mode");
}
newGroupMap.put(RSGroupInfo.DEFAULT_GROUP, newDefaultGroup);
rsGroupMap = newGroupMap;
return;
}
newTableMap = flushConfigTable(newGroupMap);
// Make changes visible after having been persisted to the source of truth
resetRSGroupAndTableMaps(newGroupMap, newTableMap);
try {
String groupBasePath = ZKUtil.joinZNode(watcher.znodePaths.baseZNode, rsGroupZNode);
ZKUtil.createAndFailSilent(watcher, groupBasePath, ProtobufMagic.PB_MAGIC);
List<ZKUtil.ZKUtilOp> zkOps = new ArrayList<>(newGroupMap.size());
for(String groupName : prevRSGroups) {
if(!newGroupMap.containsKey(groupName)) {
String znode = ZKUtil.joinZNode(groupBasePath, groupName);
zkOps.add(ZKUtil.ZKUtilOp.deleteNodeFailSilent(znode));
}
}
for (RSGroupInfo RSGroupInfo : newGroupMap.values()) {
String znode = ZKUtil.joinZNode(groupBasePath, RSGroupInfo.getName());
RSGroupProtos.RSGroupInfo proto = RSGroupProtobufUtil.toProtoGroupInfo(RSGroupInfo);
LOG.debug("Updating znode: "+znode);
ZKUtil.createAndFailSilent(watcher, znode);
zkOps.add(ZKUtil.ZKUtilOp.deleteNodeFailSilent(znode));
zkOps.add(ZKUtil.ZKUtilOp.createAndFailSilent(znode,
ProtobufUtil.prependPBMagic(proto.toByteArray())));
}
LOG.debug("Writing ZK GroupInfo count: " + zkOps.size());
ZKUtil.multiOrSequential(watcher, zkOps, false);
} catch (KeeperException e) {
LOG.error("Failed to write to rsGroupZNode", e);
masterServices.abort("Failed to write to rsGroupZNode", e);
throw new IOException("Failed to write to rsGroupZNode",e);
}
updateCacheOfRSGroups(newGroupMap.keySet());
}
/**
* Make changes visible.
* Caller must be synchronized on 'this'.
*/
private void resetRSGroupAndTableMaps(Map<String, RSGroupInfo> newRSGroupMap,
Map<TableName, String> newTableMap) {
// Make maps Immutable.
this.rsGroupMap = Collections.unmodifiableMap(newRSGroupMap);
this.tableMap = Collections.unmodifiableMap(newTableMap);
}
/**
* Update cache of rsgroups.
* Caller must be synchronized on 'this'.
* @param currentGroups Current list of Groups.
*/
private void updateCacheOfRSGroups(final Set<String> currentGroups) {
this.prevRSGroups.clear();
this.prevRSGroups.addAll(currentGroups);
}
// Called by getDefaultServers. Presume it has lock in place.
private List<ServerName> getOnlineRS() throws IOException {
if (masterServices != null) {
return masterServices.getServerManager().getOnlineServersList();
}
LOG.debug("Reading online RS from zookeeper");
List<ServerName> servers = new LinkedList<>();
try {
for (String el: ZKUtil.listChildrenNoWatch(watcher, watcher.znodePaths.rsZNode)) {
servers.add(ServerName.parseServerName(el));
}
} catch (KeeperException e) {
throw new IOException("Failed to retrieve server list from zookeeper", e);
}
return servers;
}
// Called by ServerEventsListenerThread. Presume it has lock on this manager when it runs.
private SortedSet<Address> getDefaultServers() throws IOException {
SortedSet<Address> defaultServers = Sets.newTreeSet();
for (ServerName serverName : getOnlineRS()) {
Address server =
Address.fromParts(serverName.getHostname(), serverName.getPort());
boolean found = false;
for(RSGroupInfo rsgi: listRSGroups()) {
if(!RSGroupInfo.DEFAULT_GROUP.equals(rsgi.getName()) &&
rsgi.containsServer(server)) {
found = true;
break;
}
}
if (!found) {
defaultServers.add(server);
}
}
return defaultServers;
}
// Called by ServerEventsListenerThread. Synchronize on this because redoing
// the rsGroupMap then writing it out.
private synchronized void updateDefaultServers(SortedSet<Address> servers) throws IOException {
RSGroupInfo info = rsGroupMap.get(RSGroupInfo.DEFAULT_GROUP);
RSGroupInfo newInfo = new RSGroupInfo(info.getName(), servers, info.getTables());
HashMap<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.put(newInfo.getName(), newInfo);
flushConfig(newGroupMap);
}
/**
* Calls {@link RSGroupInfoManagerImpl#updateDefaultServers(SortedSet)} to update list of known
* servers. Notifications about server changes are received by registering {@link ServerListener}.
* As a listener, we need to return immediately, so the real work of updating the servers is
* done asynchronously in this thread.
*/
private class ServerEventsListenerThread extends Thread implements ServerListener {
private final Log LOG = LogFactory.getLog(ServerEventsListenerThread.class);
private boolean changed = false;
ServerEventsListenerThread() {
setDaemon(true);
}
@Override
public void serverAdded(ServerName serverName) {
serverChanged();
}
@Override
public void serverRemoved(ServerName serverName) {
serverChanged();
}
private synchronized void serverChanged() {
changed = true;
this.notify();
}
@Override
public void run() {
setName(ServerEventsListenerThread.class.getName() + "-" + masterServices.getServerName());
SortedSet<Address> prevDefaultServers = new TreeSet<>();
while(isMasterRunning(masterServices)) {
try {
LOG.info("Updating default servers.");
SortedSet<Address> servers = RSGroupInfoManagerImpl.this.getDefaultServers();
if (!servers.equals(prevDefaultServers)) {
RSGroupInfoManagerImpl.this.updateDefaultServers(servers);
prevDefaultServers = servers;
LOG.info("Updated with servers: "+servers.size());
}
try {
synchronized (this) {
if(!changed) {
wait();
}
changed = false;
}
} catch (InterruptedException e) {
LOG.warn("Interrupted", e);
}
} catch (IOException e) {
LOG.warn("Failed to update default servers", e);
}
}
}
}
private class RSGroupStartupWorker extends Thread {
private final Log LOG = LogFactory.getLog(RSGroupStartupWorker.class);
private volatile boolean online = false;
RSGroupStartupWorker() {
setDaemon(true);
}
@Override
public void run() {
setName(RSGroupStartupWorker.class.getName() + "-" + masterServices.getServerName());
if (waitForGroupTableOnline()) {
LOG.info("GroupBasedLoadBalancer is now online");
}
}
private boolean waitForGroupTableOnline() {
final List<HRegionInfo> foundRegions = new LinkedList<>();
final List<HRegionInfo> assignedRegions = new LinkedList<>();
final AtomicBoolean found = new AtomicBoolean(false);
final TableStateManager tsm = masterServices.getTableStateManager();
boolean createSent = false;
while (!found.get() && isMasterRunning(masterServices)) {
foundRegions.clear();
assignedRegions.clear();
found.set(true);
try {
conn.getTable(TableName.NAMESPACE_TABLE_NAME);
conn.getTable(RSGROUP_TABLE_NAME);
boolean rootMetaFound =
masterServices.getMetaTableLocator().verifyMetaRegionLocation(
conn, masterServices.getZooKeeper(), 1);
final AtomicBoolean nsFound = new AtomicBoolean(false);
if (rootMetaFound) {
MetaTableAccessor.Visitor visitor = new DefaultVisitorBase() {
@Override
public boolean visitInternal(Result row) throws IOException {
HRegionInfo info = MetaTableAccessor.getHRegionInfo(row);
if (info != null) {
Cell serverCell =
row.getColumnLatestCell(HConstants.CATALOG_FAMILY,
HConstants.SERVER_QUALIFIER);
if (RSGROUP_TABLE_NAME.equals(info.getTable()) && serverCell != null) {
ServerName sn =
ServerName.parseVersionedServerName(CellUtil.cloneValue(serverCell));
if (sn == null) {
found.set(false);
} else if (tsm.isTableState(RSGROUP_TABLE_NAME, TableState.State.ENABLED)) {
try {
ClientProtos.ClientService.BlockingInterface rs = conn.getClient(sn);
ClientProtos.GetRequest request =
RequestConverter.buildGetRequest(info.getRegionName(),
new Get(ROW_KEY));
rs.get(null, request);
assignedRegions.add(info);
} catch(Exception ex) {
LOG.debug("Caught exception while verifying group region", ex);
}
}
foundRegions.add(info);
}
if (TableName.NAMESPACE_TABLE_NAME.equals(info.getTable())) {
Cell cell = row.getColumnLatestCell(HConstants.CATALOG_FAMILY,
HConstants.SERVER_QUALIFIER);
ServerName sn = null;
if(cell != null) {
sn = ServerName.parseVersionedServerName(CellUtil.cloneValue(cell));
}
if (tsm.isTableState(TableName.NAMESPACE_TABLE_NAME,
TableState.State.ENABLED)) {
try {
ClientProtos.ClientService.BlockingInterface rs = conn.getClient(sn);
ClientProtos.GetRequest request =
RequestConverter.buildGetRequest(info.getRegionName(),
new Get(ROW_KEY));
rs.get(null, request);
nsFound.set(true);
} catch(Exception ex) {
LOG.debug("Caught exception while verifying group region", ex);
}
}
}
}
return true;
}
};
MetaTableAccessor.fullScanRegions(conn, visitor);
// if no regions in meta then we have to create the table
if (foundRegions.size() < 1 && rootMetaFound && !createSent && nsFound.get()) {
createRSGroupTable();
createSent = true;
}
LOG.info("RSGroup table=" + RSGROUP_TABLE_NAME + " isOnline=" + found.get()
+ ", regionCount=" + foundRegions.size() + ", assignCount="
+ assignedRegions.size() + ", rootMetaFound=" + rootMetaFound);
found.set(found.get() && assignedRegions.size() == foundRegions.size()
&& foundRegions.size() > 0);
} else {
LOG.info("Waiting for catalog tables to come online");
found.set(false);
}
if (found.get()) {
LOG.debug("With group table online, refreshing cached information.");
RSGroupInfoManagerImpl.this.refresh(true);
online = true;
//flush any inconsistencies between ZK and HTable
RSGroupInfoManagerImpl.this.flushConfig();
}
} catch (RuntimeException e) {
throw e;
} catch(Exception e) {
found.set(false);
LOG.warn("Failed to perform check", e);
}
try {
Thread.sleep(100);
} catch (InterruptedException e) {
LOG.info("Sleep interrupted", e);
}
}
return found.get();
}
private void createRSGroupTable() throws IOException {
Long procId = masterServices.createSystemTable(RSGROUP_TABLE_DESC);
// wait for region to be online
int tries = 600;
while (!(masterServices.getMasterProcedureExecutor().isFinished(procId))
&& masterServices.getMasterProcedureExecutor().isRunning()
&& tries > 0) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new IOException("Wait interrupted ", e);
}
tries--;
}
if(tries <= 0) {
throw new IOException("Failed to create group table in a given time.");
} else {
ProcedureInfo result = masterServices.getMasterProcedureExecutor().getResult(procId);
if (result != null && result.isFailed()) {
throw new IOException("Failed to create group table. " + result.getExceptionFullMessage());
}
}
}
public boolean isOnline() {
return online;
}
}
private static boolean isMasterRunning(MasterServices masterServices) {
return !masterServices.isAborted() && !masterServices.isStopped();
}
private void multiMutate(List<Mutation> mutations) throws IOException {
CoprocessorRpcChannel channel = rsGroupTable.coprocessorService(ROW_KEY);
MultiRowMutationProtos.MutateRowsRequest.Builder mmrBuilder
= MultiRowMutationProtos.MutateRowsRequest.newBuilder();
for (Mutation mutation : mutations) {
if (mutation instanceof Put) {
mmrBuilder.addMutationRequest(org.apache.hadoop.hbase.protobuf.ProtobufUtil.toMutation(
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType.PUT,
mutation));
} else if (mutation instanceof Delete) {
mmrBuilder.addMutationRequest(
org.apache.hadoop.hbase.protobuf.ProtobufUtil.toMutation(
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.
MutationType.DELETE, mutation));
} else {
throw new DoNotRetryIOException("multiMutate doesn't support "
+ mutation.getClass().getName());
}
}
MultiRowMutationProtos.MultiRowMutationService.BlockingInterface service =
MultiRowMutationProtos.MultiRowMutationService.newBlockingStub(channel);
try {
service.mutateRows(null, mmrBuilder.build());
} catch (ServiceException ex) {
ProtobufUtil.toIOException(ex);
}
}
private void checkGroupName(String groupName) throws ConstraintException {
if (!groupName.matches("[a-zA-Z0-9_]+")) {
throw new ConstraintException("RSGroup name should only contain alphanumeric characters");
}
}
}