/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.geode.distributed.internal.deadlock; import org.apache.geode.test.dunit.ThreadUtils; import org.junit.experimental.categories.Category; import org.junit.Test; import static org.junit.Assert.*; import com.jayway.awaitility.Awaitility; import org.apache.geode.test.dunit.cache.internal.JUnit4CacheTestCase; import org.apache.geode.test.dunit.internal.JUnit4DistributedTestCase; import org.apache.geode.test.junit.categories.DistributedTest; import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.junit.experimental.categories.Category; import org.apache.geode.cache.CacheFactory; import org.apache.geode.cache.execute.Function; import org.apache.geode.cache.execute.FunctionContext; import org.apache.geode.cache.execute.FunctionService; import org.apache.geode.cache.execute.ResultCollector; import org.apache.geode.cache30.CacheTestCase; import org.apache.geode.distributed.DistributedLockService; import org.apache.geode.distributed.DistributedSystemDisconnectedException; import org.apache.geode.distributed.LockServiceDestroyedException; import org.apache.geode.distributed.internal.membership.InternalDistributedMember; import org.apache.geode.test.dunit.Assert; import org.apache.geode.test.dunit.AsyncInvocation; import org.apache.geode.test.dunit.Host; import org.apache.geode.test.dunit.Invoke; import org.apache.geode.test.dunit.LogWriterUtils; import org.apache.geode.test.dunit.SerializableCallable; import org.apache.geode.test.dunit.SerializableRunnable; import org.apache.geode.test.dunit.VM; import org.apache.geode.test.junit.categories.FlakyTest; @Category(DistributedTest.class) public class GemFireDeadlockDetectorDUnitTest extends JUnit4CacheTestCase { private static final Set<Thread> stuckThreads = Collections.synchronizedSet(new HashSet<Thread>()); @Override public final void preTearDownCacheTestCase() throws Exception { disconnectAllFromDS(); } private void stopStuckThreads() { Invoke.invokeInEveryVM(new SerializableRunnable() { public void run() { for (Thread thread : stuckThreads) { thread.interrupt(); disconnectFromDS(); try { thread.join(30000); assertTrue(!thread.isAlive()); } catch (InterruptedException e) { Assert.fail("interrupted", e); } } } }); } public GemFireDeadlockDetectorDUnitTest() { super(); } @Test public void testNoDeadlock() { Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); // Make sure a deadlock from a previous test is cleared. disconnectAllFromDS(); createCache(vm0); createCache(vm1); getSystem(); GemFireDeadlockDetector detect = new GemFireDeadlockDetector(); assertEquals(null, detect.find().findCycle()); } private static final Lock lock = new ReentrantLock(); @Category(FlakyTest.class) // GEODE-516 & GEODE-576: async actions, thread sleeps, time sensitive @Test public void testDistributedDeadlockWithFunction() throws Throwable { Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); getSystem(); InternalDistributedMember member1 = createCache(vm0); final InternalDistributedMember member2 = createCache(vm1); // Have two threads lock locks on different members in different orders. // This thread locks the lock member1 first, then member2. AsyncInvocation async1 = lockTheLocks(vm0, member2); // This thread locks the lock member2 first, then member1. AsyncInvocation async2 = lockTheLocks(vm1, member1); Thread.sleep(5000); GemFireDeadlockDetector detect = new GemFireDeadlockDetector(); LinkedList<Dependency> deadlock = detect.find().findCycle(); LogWriterUtils.getLogWriter().info("Deadlock=" + DeadlockDetector.prettyFormat(deadlock)); assertEquals(8, deadlock.size()); stopStuckThreads(); async1.getResult(30000); async2.getResult(30000); } private AsyncInvocation lockTheLocks(VM vm0, final InternalDistributedMember member) { return vm0.invokeAsync(new SerializableRunnable() { public void run() { lock.lock(); try { Thread.sleep(1000); } catch (InterruptedException e) { Assert.fail("interrupted", e); } ResultCollector collector = FunctionService.onMember(member).execute(new TestFunction()); // wait the function to lock the lock on member. collector.getResult(); lock.unlock(); } }); } @Test public void testDistributedDeadlockWithDLock() throws Throwable { Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); getBlackboard().initBlackboard(); getSystem(); AsyncInvocation async1 = lockTheDLocks(vm0, "one", "two"); AsyncInvocation async2 = lockTheDLocks(vm1, "two", "one"); Awaitility.await("waiting for locks to be acquired").atMost(60, TimeUnit.SECONDS) .until(Awaitility.matches(() -> assertTrue(getBlackboard().isGateSignaled("one")))); Awaitility.await("waiting for locks to be acquired").atMost(60, TimeUnit.SECONDS) .until(Awaitility.matches(() -> assertTrue(getBlackboard().isGateSignaled("two")))); GemFireDeadlockDetector detect = new GemFireDeadlockDetector(); LinkedList<Dependency> deadlock = detect.find().findCycle(); assertTrue(deadlock != null); System.out.println("Deadlock=" + DeadlockDetector.prettyFormat(deadlock)); assertEquals(4, deadlock.size()); disconnectAllFromDS(); try { waitForAsyncInvocation(async1, 45, TimeUnit.SECONDS); } finally { waitForAsyncInvocation(async2, 45, TimeUnit.SECONDS); } } private void waitForAsyncInvocation(AsyncInvocation async1, int howLong, TimeUnit units) throws java.util.concurrent.ExecutionException, InterruptedException { try { async1.get(howLong, units); } catch (TimeoutException e) { fail("test is leaving behind an async invocation thread"); } } private AsyncInvocation lockTheDLocks(VM vm, final String first, final String second) { return vm.invokeAsync(new SerializableRunnable() { public void run() { try { getCache(); DistributedLockService dls = DistributedLockService.create("deadlock_test", getSystem()); dls.lock(first, 10 * 1000, -1); getBlackboard().signalGate(first); getBlackboard().waitForGate(second, 30, TimeUnit.SECONDS); // this will block since the other DUnit VM will have locked the second key try { dls.lock(second, 10 * 1000, -1); } catch (LockServiceDestroyedException expected) { // this is ok, the test is terminating } catch (DistributedSystemDisconnectedException expected) { // this is ok, the test is terminating } } catch (Exception e) { throw new RuntimeException("test failed", e); } } }); } private InternalDistributedMember createCache(VM vm) { return (InternalDistributedMember) vm.invoke(new SerializableCallable() { public Object call() { getCache(); return getSystem().getDistributedMember(); } }); } private static class TestFunction implements Function { private static final int LOCK_WAIT_TIME = 1000; public boolean hasResult() { return true; } public void execute(FunctionContext context) { try { stuckThreads.add(Thread.currentThread()); lock.tryLock(LOCK_WAIT_TIME, TimeUnit.SECONDS); } catch (InterruptedException e) { // ingore } stuckThreads.remove(Thread.currentThread()); context.getResultSender().lastResult(null); } public String getId() { return getClass().getCanonicalName(); } public boolean optimizeForWrite() { return false; } public boolean isHA() { return false; } } }