/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUTKey WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.locking;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ProcedureInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.locking.LockServiceClient;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.MasterRpcServices;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
import org.apache.hadoop.hbase.shaded.protobuf.generated.LockServiceProtos.*;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.hamcrest.core.IsInstanceOf;
import org.hamcrest.core.StringStartsWith;
import org.junit.rules.TestRule;
import org.junit.experimental.categories.Category;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.TestName;
import org.apache.hadoop.hbase.CategoryBasedTimeout;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@Category({MasterTests.class, SmallTests.class})
public class TestLockProcedure {
@Rule
public final TestRule timeout = CategoryBasedTimeout.builder().
withTimeout(this.getClass()).withLookingForStuckThread(true).build();
@Rule
public final ExpectedException exception = ExpectedException.none();
@Rule
public TestName testName = new TestName();
// crank this up if this test turns out to be flaky.
private static final int HEARTBEAT_TIMEOUT = 1000;
private static final int LOCAL_LOCKS_TIMEOUT = 2000;
private static final Log LOG = LogFactory.getLog(TestLockProcedure.class);
protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
private static MasterRpcServices masterRpcService;
private static ProcedureExecutor<MasterProcedureEnv> procExec;
private static String namespace = "namespace";
private static TableName tableName1 = TableName.valueOf(namespace, "table1");
private static List<HRegionInfo> tableRegions1;
private static TableName tableName2 = TableName.valueOf(namespace, "table2");
private static List<HRegionInfo> tableRegions2;
private String testMethodName;
private static void setupConf(Configuration conf) {
conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
conf.setBoolean("hbase.procedure.check.owner.set", false); // since rpc user will be null
conf.setInt(LockProcedure.REMOTE_LOCKS_TIMEOUT_MS_CONF, HEARTBEAT_TIMEOUT);
conf.setInt(LockProcedure.LOCAL_MASTER_LOCKS_TIMEOUT_MS_CONF, LOCAL_LOCKS_TIMEOUT);
}
@BeforeClass
public static void setupCluster() throws Exception {
setupConf(UTIL.getConfiguration());
UTIL.startMiniCluster(1);
UTIL.getAdmin().createNamespace(NamespaceDescriptor.create(namespace).build());
UTIL.createTable(tableName1, new byte[][]{"fam".getBytes()}, new byte[][] {"1".getBytes()});
UTIL.createTable(tableName2, new byte[][]{"fam".getBytes()}, new byte[][] {"1".getBytes()});
masterRpcService = UTIL.getHBaseCluster().getMaster().getMasterRpcServices();
procExec = UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
tableRegions1 = UTIL.getAdmin().getTableRegions(tableName1);
tableRegions2 = UTIL.getAdmin().getTableRegions(tableName2);
assert tableRegions1.size() > 0;
assert tableRegions2.size() > 0;
}
@AfterClass
public static void cleanupTest() throws Exception {
try {
UTIL.shutdownMiniCluster();
} catch (Exception e) {
LOG.warn("failure shutting down cluster", e);
}
}
@Before
public void setup() throws Exception {
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
testMethodName = testName.getMethodName();
}
@After
public void tearDown() throws Exception {
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
// Kill all running procedures.
for (ProcedureInfo procInfo : procExec.listProcedures()) {
Procedure proc = procExec.getProcedure(procInfo.getProcId());
if (proc == null) continue;
procExec.abort(procInfo.getProcId());
ProcedureTestingUtility.waitProcedure(procExec, proc);
}
assertEquals(0, procExec.getEnvironment().getProcedureScheduler().size());
}
private LockRequest getNamespaceLock(String namespace, String description) {
return LockServiceClient.buildLockRequest(LockType.EXCLUSIVE,
namespace, null, null, description, HConstants.NO_NONCE, HConstants.NO_NONCE);
}
private LockRequest getTableExclusiveLock(TableName tableName, String description) {
return LockServiceClient.buildLockRequest(LockType.EXCLUSIVE,
null, tableName, null, description, HConstants.NO_NONCE, HConstants.NO_NONCE);
}
private LockRequest getRegionLock(List<HRegionInfo> regionInfos, String description) {
return LockServiceClient.buildLockRequest(LockType.EXCLUSIVE,
null, null, regionInfos, description, HConstants.NO_NONCE, HConstants.NO_NONCE);
}
private void validateLockRequestException(LockRequest lockRequest, String message)
throws Exception {
exception.expect(ServiceException.class);
exception.expectCause(IsInstanceOf.instanceOf(DoNotRetryIOException.class));
exception.expectMessage(
StringStartsWith.startsWith("org.apache.hadoop.hbase.DoNotRetryIOException: "
+ "java.lang.IllegalArgumentException: " + message));
masterRpcService.requestLock(null, lockRequest);
}
@Test
public void testLockRequestValidationEmptyDescription() throws Exception {
validateLockRequestException(getNamespaceLock("", ""), "Empty description");
}
@Test
public void testLockRequestValidationEmptyNamespaceName() throws Exception {
validateLockRequestException(getNamespaceLock("", "desc"), "Empty namespace");
}
@Test
public void testLockRequestValidationRegionsFromDifferentTable() throws Exception {
List<HRegionInfo> regions = new ArrayList<>();
regions.addAll(tableRegions1);
regions.addAll(tableRegions2);
validateLockRequestException(getRegionLock(regions, "desc"),
"All regions should be from same table");
}
/**
* Returns immediately if the lock is acquired.
* @throws TimeoutException if lock couldn't be acquired.
*/
private boolean awaitForLocked(long procId, long timeoutInMs) throws Exception {
long deadline = System.currentTimeMillis() + timeoutInMs;
while (System.currentTimeMillis() < deadline) {
LockHeartbeatResponse response = masterRpcService.lockHeartbeat(null,
LockHeartbeatRequest.newBuilder().setProcId(procId).build());
if (response.getLockStatus() == LockHeartbeatResponse.LockStatus.LOCKED) {
assertEquals(response.getTimeoutMs(), HEARTBEAT_TIMEOUT);
LOG.debug(String.format("Proc id %s acquired lock.", procId));
return true;
}
Thread.sleep(100);
}
return false;
}
private long queueLock(LockRequest lockRequest) throws ServiceException {
LockResponse response = masterRpcService.requestLock(null, lockRequest);
return response.getProcId();
}
private void sendHeartbeatAndCheckLocked(long procId, boolean isLocked) throws ServiceException {
LockHeartbeatResponse response = masterRpcService.lockHeartbeat(null,
LockHeartbeatRequest.newBuilder().setProcId(procId).build());
if (isLocked) {
assertEquals(LockHeartbeatResponse.LockStatus.LOCKED, response.getLockStatus());
} else {
assertEquals(LockHeartbeatResponse.LockStatus.UNLOCKED, response.getLockStatus());
}
LOG.debug(String.format("Proc id %s : %s.", procId, response.getLockStatus()));
}
private void releaseLock(long procId) throws ServiceException {
masterRpcService.lockHeartbeat(null,
LockHeartbeatRequest.newBuilder().setProcId(procId).setKeepAlive(false).build());
}
@Test
public void testUpdateHeartbeatAndUnlockForTable() throws Exception {
LockRequest lock = getTableExclusiveLock(tableName1, testMethodName);
final long procId = queueLock(lock);
assertTrue(awaitForLocked(procId, 2000));
Thread.sleep(HEARTBEAT_TIMEOUT /2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(HEARTBEAT_TIMEOUT /2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(HEARTBEAT_TIMEOUT /2);
sendHeartbeatAndCheckLocked(procId, true);
releaseLock(procId);
sendHeartbeatAndCheckLocked(procId, false);
ProcedureTestingUtility.waitProcedure(procExec, procId);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
@Test
public void testAbort() throws Exception {
LockRequest lock = getTableExclusiveLock(tableName1, testMethodName);
final long procId = queueLock(lock);
assertTrue(awaitForLocked(procId, 2000));
assertTrue(procExec.abort(procId));
sendHeartbeatAndCheckLocked(procId, false);
ProcedureTestingUtility.waitProcedure(procExec, procId);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
@Test
public void testUpdateHeartbeatAndUnlockForNamespace() throws Exception {
LockRequest lock = getNamespaceLock(namespace, testMethodName);
final long procId = queueLock(lock);
assertTrue(awaitForLocked(procId, 2000));
Thread.sleep(HEARTBEAT_TIMEOUT /2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(HEARTBEAT_TIMEOUT /2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(HEARTBEAT_TIMEOUT /2);
sendHeartbeatAndCheckLocked(procId, true);
releaseLock(procId);
sendHeartbeatAndCheckLocked(procId, false);
ProcedureTestingUtility.waitProcedure(procExec, procId);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
@Test
public void testTimeout() throws Exception {
LockRequest lock = getNamespaceLock(namespace, testMethodName);
final long procId = queueLock(lock);
assertTrue(awaitForLocked(procId, 2000));
Thread.sleep(HEARTBEAT_TIMEOUT / 2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(HEARTBEAT_TIMEOUT / 2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(2 * HEARTBEAT_TIMEOUT);
sendHeartbeatAndCheckLocked(procId, false);
ProcedureTestingUtility.waitProcedure(procExec, procId);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
@Test
public void testMultipleLocks() throws Exception {
LockRequest nsLock = getNamespaceLock(namespace, testMethodName);
LockRequest tableLock1 = getTableExclusiveLock(tableName1, testMethodName);
LockRequest tableLock2 = getTableExclusiveLock(tableName2, testMethodName);
LockRequest regionsLock1 = getRegionLock(tableRegions1, testMethodName);
LockRequest regionsLock2 = getRegionLock(tableRegions2, testMethodName);
// Acquire namespace lock, then queue other locks.
long nsProcId = queueLock(nsLock);
assertTrue(awaitForLocked(nsProcId, 2000));
sendHeartbeatAndCheckLocked(nsProcId, true);
long table1ProcId = queueLock(tableLock1);
long table2ProcId = queueLock(tableLock2);
long regions1ProcId = queueLock(regionsLock1);
long regions2ProcId = queueLock(regionsLock2);
// Assert tables & region locks are waiting because of namespace lock.
Thread.sleep(HEARTBEAT_TIMEOUT / 2);
sendHeartbeatAndCheckLocked(nsProcId, true);
sendHeartbeatAndCheckLocked(table1ProcId, false);
sendHeartbeatAndCheckLocked(table2ProcId, false);
sendHeartbeatAndCheckLocked(regions1ProcId, false);
sendHeartbeatAndCheckLocked(regions2ProcId, false);
// Release namespace lock and assert tables locks are acquired but not region lock
releaseLock(nsProcId);
assertTrue(awaitForLocked(table1ProcId, 2000));
assertTrue(awaitForLocked(table2ProcId, 2000));
sendHeartbeatAndCheckLocked(regions1ProcId, false);
sendHeartbeatAndCheckLocked(regions2ProcId, false);
// Release table1 lock and assert region lock is acquired.
releaseLock(table1ProcId);
sendHeartbeatAndCheckLocked(table1ProcId, false);
assertTrue(awaitForLocked(regions1ProcId, 2000));
sendHeartbeatAndCheckLocked(table2ProcId, true);
sendHeartbeatAndCheckLocked(regions2ProcId, false);
// Release table2 lock and assert region lock is acquired.
releaseLock(table2ProcId);
sendHeartbeatAndCheckLocked(table2ProcId, false);
assertTrue(awaitForLocked(regions2ProcId, 2000));
sendHeartbeatAndCheckLocked(regions1ProcId, true);
sendHeartbeatAndCheckLocked(regions2ProcId, true);
// Release region locks.
releaseLock(regions1ProcId);
releaseLock(regions2ProcId);
sendHeartbeatAndCheckLocked(regions1ProcId, false);
sendHeartbeatAndCheckLocked(regions2ProcId, false);
ProcedureTestingUtility.waitAllProcedures(procExec);
ProcedureTestingUtility.assertProcNotFailed(procExec, nsProcId);
ProcedureTestingUtility.assertProcNotFailed(procExec, table1ProcId);
ProcedureTestingUtility.assertProcNotFailed(procExec, table2ProcId);
ProcedureTestingUtility.assertProcNotFailed(procExec, regions1ProcId);
ProcedureTestingUtility.assertProcNotFailed(procExec, regions2ProcId);
}
// Test latch is decreased in count when lock is acquired.
@Test
public void testLatch() throws Exception {
CountDownLatch latch = new CountDownLatch(1);
// MasterRpcServices don't set latch with LockProcedure, so create one and submit it directly.
LockProcedure lockProc = new LockProcedure(UTIL.getConfiguration(),
TableName.valueOf("table"), LockProcedure.LockType.EXCLUSIVE, "desc", latch);
procExec.submitProcedure(lockProc);
assertTrue(latch.await(2000, TimeUnit.MILLISECONDS));
releaseLock(lockProc.getProcId());
ProcedureTestingUtility.waitProcedure(procExec, lockProc.getProcId());
ProcedureTestingUtility.assertProcNotFailed(procExec, lockProc.getProcId());
}
// LockProcedures with latch are considered local locks.
@Test
public void testLocalLockTimeout() throws Exception {
CountDownLatch latch = new CountDownLatch(1);
// MasterRpcServices don't set latch with LockProcedure, so create one and submit it directly.
LockProcedure lockProc = new LockProcedure(UTIL.getConfiguration(),
TableName.valueOf("table"), LockProcedure.LockType.EXCLUSIVE, "desc", latch);
procExec.submitProcedure(lockProc);
assertTrue(awaitForLocked(lockProc.getProcId(), 2000));
Thread.sleep(LOCAL_LOCKS_TIMEOUT / 2);
assertTrue(lockProc.isLocked());
Thread.sleep(2 * LOCAL_LOCKS_TIMEOUT);
assertFalse(lockProc.isLocked());
releaseLock(lockProc.getProcId());
ProcedureTestingUtility.waitProcedure(procExec, lockProc.getProcId());
ProcedureTestingUtility.assertProcNotFailed(procExec, lockProc.getProcId());
}
private void testRemoteLockRecovery(LockRequest lock) throws Exception {
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
final long procId = queueLock(lock);
assertTrue(awaitForLocked(procId, 2000));
// wait for proc Executor to die, then restart it and wait for Lock Procedure to get started.
ProcedureTestingUtility.waitProcedure(procExec, procId);
assertEquals(false, procExec.isRunning());
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
ProcedureTestingUtility.restart(procExec);
while (!procExec.isStarted(procId)) {
Thread.sleep(250);
}
assertEquals(true, procExec.isRunning());
// After recovery, remote locks should reacquire locks and function normally.
assertTrue(awaitForLocked(procId, 2000));
Thread.sleep(HEARTBEAT_TIMEOUT/2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(HEARTBEAT_TIMEOUT/2);
sendHeartbeatAndCheckLocked(procId, true);
Thread.sleep(2 * HEARTBEAT_TIMEOUT);
sendHeartbeatAndCheckLocked(procId, false);
ProcedureTestingUtility.waitProcedure(procExec, procId);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
@Test(timeout = 20000)
public void testRemoteTableLockRecovery() throws Exception {
LockRequest lock = getTableExclusiveLock(tableName1, testMethodName);
testRemoteLockRecovery(lock);
}
@Test(timeout = 20000)
public void testRemoteNamespaceLockRecovery() throws Exception {
LockRequest lock = getNamespaceLock(namespace, testMethodName);
testRemoteLockRecovery(lock);
}
@Test(timeout = 20000)
public void testRemoteRegionLockRecovery() throws Exception {
LockRequest lock = getRegionLock(tableRegions1, testMethodName);
testRemoteLockRecovery(lock);
}
@Test (timeout = 20000)
public void testLocalMasterLockRecovery() throws Exception {
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
CountDownLatch latch = new CountDownLatch(1);
LockProcedure lockProc = new LockProcedure(UTIL.getConfiguration(),
TableName.valueOf("table"), LockProcedure.LockType.EXCLUSIVE, "desc", latch);
procExec.submitProcedure(lockProc);
assertTrue(latch.await(2000, TimeUnit.MILLISECONDS));
// wait for proc Executor to die, then restart it and wait for Lock Procedure to get started.
ProcedureTestingUtility.waitProcedure(procExec, lockProc.getProcId());
assertEquals(false, procExec.isRunning());
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
// remove zk lock node otherwise recovered lock will keep waiting on it.
ProcedureTestingUtility.restart(procExec);
while (!procExec.isStarted(lockProc.getProcId())) {
Thread.sleep(250);
}
assertEquals(true, procExec.isRunning());
LockProcedure proc = (LockProcedure) procExec.getProcedure(lockProc.getProcId());
assertTrue(proc == null || !proc.isLocked());
ProcedureTestingUtility.waitProcedure(procExec, lockProc.getProcId());
ProcedureTestingUtility.assertProcNotFailed(procExec, lockProc.getProcId());
}
}