/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.rsgroup;
import com.google.common.collect.Sets;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseCluster;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
// This tests that GroupBasedBalancer will use data in zk to do balancing during master startup.
// This does not test retain assignment.
// The tests brings up 3 RS, creates a new RS group 'my_group', moves 1 RS to 'my_group', assigns
// 'hbase:rsgroup' to 'my_group', and kill the only server in that group so that 'hbase:rsgroup'
// table isn't available. It then kills the active master and waits for backup master to come
// online. In new master, RSGroupInfoManagerImpl gets the data from zk and waits for the expected
// assignment with a timeout.
@Category(MediumTests.class)
public class TestRSGroupsOfflineMode {
private static final Log LOG = LogFactory.getLog(TestRSGroupsOfflineMode.class);
private static HMaster master;
private static Admin hbaseAdmin;
private static HBaseTestingUtility TEST_UTIL;
private static HBaseCluster cluster;
private final static long WAIT_TIMEOUT = 60000 * 5;
@Rule
public TestName name = new TestName();
@BeforeClass
public static void setUp() throws Exception {
TEST_UTIL = new HBaseTestingUtility();
TEST_UTIL.getConfiguration().set(
HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
RSGroupBasedLoadBalancer.class.getName());
TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
RSGroupAdminEndpoint.class.getName());
TEST_UTIL.getConfiguration().set(
ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART,
"1");
TEST_UTIL.startMiniCluster(2, 3);
cluster = TEST_UTIL.getHBaseCluster();
master = ((MiniHBaseCluster)cluster).getMaster();
master.balanceSwitch(false);
hbaseAdmin = TEST_UTIL.getAdmin();
//wait till the balancer is in online mode
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return master.isInitialized() &&
((RSGroupBasedLoadBalancer) master.getLoadBalancer()).isOnline() &&
master.getServerManager().getOnlineServersList().size() >= 3;
}
});
}
@AfterClass
public static void tearDown() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testOffline() throws Exception, InterruptedException {
// Table should be after group table name so it gets assigned later.
final TableName failoverTable = TableName.valueOf(name.getMethodName());
TEST_UTIL.createTable(failoverTable, Bytes.toBytes("f"));
final HRegionServer killRS = ((MiniHBaseCluster)cluster).getRegionServer(0);
final HRegionServer groupRS = ((MiniHBaseCluster)cluster).getRegionServer(1);
final HRegionServer failoverRS = ((MiniHBaseCluster)cluster).getRegionServer(2);
String newGroup = "my_group";
RSGroupAdmin groupAdmin = new RSGroupAdminClient(TEST_UTIL.getConnection());
groupAdmin.addRSGroup(newGroup);
if(master.getAssignmentManager().getRegionStates().getRegionAssignments()
.containsValue(failoverRS.getServerName())) {
for (HRegionInfo regionInfo : hbaseAdmin.getOnlineRegions(failoverRS.getServerName())) {
hbaseAdmin.move(regionInfo.getEncodedNameAsBytes(),
Bytes.toBytes(failoverRS.getServerName().getServerName()));
}
LOG.info("Waiting for region unassignments on failover RS...");
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override public boolean evaluate() throws Exception {
return master.getServerManager().getLoad(failoverRS.getServerName())
.getRegionsLoad().size() > 0;
}
});
}
// Move server to group and make sure all tables are assigned.
groupAdmin.moveServers(Sets.newHashSet(groupRS.getServerName().getAddress()), newGroup);
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return groupRS.getNumberOfOnlineRegions() < 1 &&
master.getAssignmentManager().getRegionStates().getRegionsInTransition().size() < 1;
}
});
// Move table to group and wait.
groupAdmin.moveTables(Sets.newHashSet(RSGroupInfoManager.RSGROUP_TABLE_NAME), newGroup);
LOG.info("Waiting for move table...");
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return groupRS.getNumberOfOnlineRegions() == 1;
}
});
groupRS.stop("die");
// Race condition here.
TEST_UTIL.getHBaseCluster().getMaster().stopMaster();
LOG.info("Waiting for offline mode...");
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return TEST_UTIL.getHBaseCluster().getMaster() != null &&
TEST_UTIL.getHBaseCluster().getMaster().isActiveMaster() &&
TEST_UTIL.getHBaseCluster().getMaster().isInitialized() &&
TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size()
<= 3;
}
});
// Get groupInfoManager from the new active master.
RSGroupInfoManager groupMgr = ((MiniHBaseCluster)cluster).getMaster().getMasterCoprocessorHost()
.findCoprocessors(RSGroupAdminEndpoint.class).get(0).getGroupInfoManager();
// Make sure balancer is in offline mode, since this is what we're testing.
assertFalse(groupMgr.isOnline());
// Verify the group affiliation that's loaded from ZK instead of tables.
assertEquals(newGroup,
groupMgr.getRSGroupOfTable(RSGroupInfoManager.RSGROUP_TABLE_NAME));
assertEquals(RSGroupInfo.DEFAULT_GROUP, groupMgr.getRSGroupOfTable(failoverTable));
// Kill final regionserver to see the failover happens for all tables except GROUP table since
// it's group does not have any online RS.
killRS.stop("die");
master = TEST_UTIL.getHBaseCluster().getMaster();
LOG.info("Waiting for new table assignment...");
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return failoverRS.getOnlineRegions(failoverTable).size() >= 1;
}
});
Assert.assertEquals(0, failoverRS.getOnlineRegions(RSGroupInfoManager.RSGROUP_TABLE_NAME).size());
// Need this for minicluster to shutdown cleanly.
master.stopMaster();
}
}