/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase; import java.io.IOException; import java.util.HashMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterManager.ServiceType; import org.apache.hadoop.hbase.client.AdminProtocol; import org.apache.hadoop.hbase.client.ClientProtocol; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo; import org.apache.hadoop.hbase.util.Threads; import com.google.common.collect.Sets; /** * Manages the interactions with an already deployed distributed cluster (as opposed to * a pseudo-distributed, or mini/local cluster). This is used by integration and system tests. */ @InterfaceAudience.Private public class DistributedHBaseCluster extends HBaseCluster { private HBaseAdmin admin; private ClusterManager clusterManager; public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager) throws IOException { super(conf); this.clusterManager = clusterManager; this.admin = new HBaseAdmin(conf); this.initialClusterStatus = getClusterStatus(); } public void setClusterManager(ClusterManager clusterManager) { this.clusterManager = clusterManager; } public ClusterManager getClusterManager() { return clusterManager; } /** * Returns a ClusterStatus for this HBase cluster * @throws IOException */ @Override public ClusterStatus getClusterStatus() throws IOException { return admin.getClusterStatus(); } @Override public ClusterStatus getInitialClusterStatus() throws IOException { return initialClusterStatus; } @Override public void close() throws IOException { if (this.admin != null) { admin.close(); } } @Override public AdminProtocol getAdminProtocol(ServerName serverName) throws IOException { return admin.getConnection().getAdmin(serverName.getHostname(), serverName.getPort()); } @Override public ClientProtocol getClientProtocol(ServerName serverName) throws IOException { return admin.getConnection().getClient(serverName.getHostname(), serverName.getPort()); } @Override public void startRegionServer(String hostname) throws IOException { LOG.info("Starting RS on: " + hostname); clusterManager.start(ServiceType.HBASE_REGIONSERVER, hostname); } @Override public void killRegionServer(ServerName serverName) throws IOException { LOG.info("Aborting RS: " + serverName.getServerName()); clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname()); } @Override public void stopRegionServer(ServerName serverName) throws IOException { LOG.info("Stopping RS: " + serverName.getServerName()); clusterManager.stop(ServiceType.HBASE_REGIONSERVER, serverName.getHostname()); } @Override public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException { waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout); } private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout) throws IOException { LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName()); long start = System.currentTimeMillis(); while ((System.currentTimeMillis() - start) < timeout) { if (!clusterManager.isRunning(service, serverName.getHostname())) { return; } Threads.sleep(1000); } throw new IOException("did timeout waiting for service to stop:" + serverName); } @Override public MasterAdminProtocol getMasterAdmin() throws IOException { HConnection conn = HConnectionManager.getConnection(conf); return conn.getMasterAdmin(); } @Override public MasterMonitorProtocol getMasterMonitor() throws IOException { HConnection conn = HConnectionManager.getConnection(conf); return conn.getMasterMonitor(); } @Override public void startMaster(String hostname) throws IOException { LOG.info("Starting Master on: " + hostname); clusterManager.start(ServiceType.HBASE_MASTER, hostname); } @Override public void killMaster(ServerName serverName) throws IOException { LOG.info("Aborting Master: " + serverName.getServerName()); clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname()); } @Override public void stopMaster(ServerName serverName) throws IOException { LOG.info("Stopping Master: " + serverName.getServerName()); clusterManager.stop(ServiceType.HBASE_MASTER, serverName.getHostname()); } @Override public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException { waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout); } @Override public boolean waitForActiveAndReadyMaster(long timeout) throws IOException { long start = System.currentTimeMillis(); while (System.currentTimeMillis() - start < timeout) { try { getMasterAdmin(); return true; } catch (MasterNotRunningException m) { LOG.warn("Master not started yet " + m); } catch (ZooKeeperConnectionException e) { LOG.warn("Failed to connect to ZK " + e); } Threads.sleep(1000); } return false; } @Override public ServerName getServerHoldingRegion(byte[] regionName) throws IOException { HConnection connection = admin.getConnection(); HRegionLocation regionLoc = connection.locateRegion(regionName); if (regionLoc == null) { return null; } AdminProtocol client = connection.getAdmin(regionLoc.getHostname(), regionLoc.getPort()); ServerInfo info = ProtobufUtil.getServerInfo(client); return ProtobufUtil.toServerName(info.getServerName()); } @Override public void waitUntilShutDown() { //Simply wait for a few seconds for now (after issuing serverManager.kill throw new RuntimeException("Not implemented yet"); } @Override public void shutdown() throws IOException { //not sure we want this throw new RuntimeException("Not implemented yet"); } @Override public boolean isDistributedCluster() { return true; } @Override public void restoreClusterStatus(ClusterStatus initial) throws IOException { //TODO: caution: not tested throughly ClusterStatus current = getClusterStatus(); //restore masters //check whether current master has changed if (!ServerName.isSameHostnameAndPort(initial.getMaster(), current.getMaster())) { //master has changed, we would like to undo this. //1. Kill the current backups //2. Stop current master //3. Start a master at the initial hostname (if not already running as backup) //4. Start backup masters boolean foundOldMaster = false; for (ServerName currentBackup : current.getBackupMasters()) { if (!ServerName.isSameHostnameAndPort(currentBackup, initial.getMaster())) { stopMaster(currentBackup); } else { foundOldMaster = true; } } stopMaster(current.getMaster()); if (foundOldMaster) { //if initial master is not running as a backup startMaster(initial.getMaster().getHostname()); } waitForActiveAndReadyMaster(); //wait so that active master takes over //start backup masters for (ServerName backup : initial.getBackupMasters()) { //these are not started in backup mode, but we should already have an active master startMaster(backup.getHostname()); } } else { //current master has not changed, match up backup masters HashMap<String, ServerName> initialBackups = new HashMap<String, ServerName>(); HashMap<String, ServerName> currentBackups = new HashMap<String, ServerName>(); for (ServerName server : initial.getBackupMasters()) { initialBackups.put(server.getHostname(), server); } for (ServerName server : current.getBackupMasters()) { currentBackups.put(server.getHostname(), server); } for (String hostname : Sets.difference(initialBackups.keySet(), currentBackups.keySet())) { startMaster(hostname); } for (String hostname : Sets.difference(currentBackups.keySet(), initialBackups.keySet())) { stopMaster(currentBackups.get(hostname)); } } //restore region servers HashMap<String, ServerName> initialServers = new HashMap<String, ServerName>(); HashMap<String, ServerName> currentServers = new HashMap<String, ServerName>(); for (ServerName server : initial.getServers()) { initialServers.put(server.getHostname(), server); } for (ServerName server : current.getServers()) { currentServers.put(server.getHostname(), server); } for (String hostname : Sets.difference(initialServers.keySet(), currentServers.keySet())) { startRegionServer(hostname); } for (String hostname : Sets.difference(currentServers.keySet(), initialServers.keySet())) { stopRegionServer(currentServers.get(hostname)); } } }