package org.infinispan.statetransfer;
import static org.infinispan.distribution.DistributionTestHelper.isFirstOwner;
import static org.infinispan.util.BlockingLocalTopologyManager.LatchType;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertTrue;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
import java.util.function.Function;
import org.infinispan.Cache;
import org.infinispan.commands.read.GetCacheEntryCommand;
import org.infinispan.commands.remote.ClusteredGetCommand;
import org.infinispan.configuration.cache.CacheMode;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.configuration.cache.InterceptorConfiguration;
import org.infinispan.context.Flag;
import org.infinispan.context.InvocationContext;
import org.infinispan.distribution.BlockingInterceptor;
import org.infinispan.factories.annotations.Inject;
import org.infinispan.interceptors.BaseCustomAsyncInterceptor;
import org.infinispan.interceptors.DDAsyncInterceptor;
import org.infinispan.manager.CacheContainer;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.remoting.responses.Response;
import org.infinispan.remoting.responses.UnsureResponse;
import org.infinispan.remoting.rpc.RpcManager;
import org.infinispan.remoting.transport.Address;
import org.infinispan.test.MultipleCacheManagersTest;
import org.infinispan.test.TestingUtil;
import org.infinispan.test.fwk.CleanupAfterMethod;
import org.infinispan.tx.dld.ControlledRpcManager;
import org.infinispan.util.BaseControlledConsistentHashFactory;
import org.infinispan.util.BlockingLocalTopologyManager;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
/**
* Test multiple possible situations of interleaving between a remote get and state transfer.
*
* @author Pedro Ruivo
* @since 6.0
*/
@Test(groups = "functional", testName = "statetransfer.RemoteGetDuringStateTransferTest")
@CleanupAfterMethod
public class RemoteGetDuringStateTransferTest extends MultipleCacheManagersTest {
private final List<BlockingLocalTopologyManager> topologyManagerList =
Collections.synchronizedList(new ArrayList<BlockingLocalTopologyManager>(4));
private final List<ControlledRpcManager> rpcManagerList =
Collections.synchronizedList(new ArrayList<ControlledRpcManager>(4));
/*
Summary
T0 initial topology (NO_REBALANCE)
T1 state transfer started (READ_OLD_WRITE_ALL)
T2 state transfer finished but rebalance not complete (READ_ALL_WRITE_ALL)
T3 read new, write all topology (READ_NEW_WRITE_ALL)
T4 rebalance completed (NO_REBALANCE)
| sc | first request | process request 1 | receive response 1 | retry | process request 2 | receive response 2 |
| 010 | T0 | 1:T1 | T0 | N1 | | |
| 011 | T0 | 1:T1 | T1/T2/T3/T4 | N1 | | |
| [2] | T0 | 1:T2 | T0 | | | |
| [1] | T0 | 1:T2 | T1/T2/T3/T4 | N1 | | |
| [2] | T0 | 1:T3 | T0/T1 | | | |
| [2] | T0 | 1:T3 | T2 | Y* | 2:T0/T1 | |
| [4] | T0 | 1:T3 | T2 | Y* | 2:T2 | T0/T1 |
| 032_22 | T0 | 1:T3 | T2 | Y* | 2:T2 | T2/T3/T4 |
| 032_32 | T0 | 1:T3 | T2 | Y* | 2:T3/T4 | T2/T3/T4 |
| [2] | T0 | 1:T3 | T3 | Y | 2:T0/T1 | |
| [4] | T0 | 1:T3 | T3 | Y | 2:T2 | T0/T1/T2 |
| 033_23 | T0 | 1:T3 | T3 | Y | 2:T2 | T3/T4 |
| [4] | T0 | 1:T3 | T3 | Y | 2:T3/T4 | T0/T1/T2 |
| 033_33 | T0 | 1:T3 | T3 | Y | 2:T3/T4 | T3/T4 |
| [2] | T0 | 1:T3 | T4 | Y | 2:T0/T1/T2 | |
| [4] | T0 | 1:T3 | T4 | Y | 2:T3/T4 | T0/T1/T2/T3 |
| [1] | T0 | 1:T3 | T4 | Y | 2:T3/T4 | T4 |
| [2] | T0 | 1:T4 | T0/T1/T2 | | | |
| [4] | T0 | 1:T4 | T3/T4 | Y | 2:T0/T1/T2 | |
| [2] | T0 | 1:T4 | T3/T4 | Y | 2:T3/T4 | T0/T1/T2 |
| [1] | T0 | 1:T4 | T3/T4 | Y | 2:T3/T4 | T3/T4 |
| [4] | T1 | 1:T0 | T0 | | | |
| 101 | T1 | 1:T0 | T1/T2/T3/T4 | N1 | | |
| [4] | T1 | 1:T1 | T0 | | | |
| 111 | T1 | 1:T1 | T1/T2/T3/T4 | | | |
| [4] | T1 | 1:T2 | T0 | | | |
| [1] | T1 | 1:T2 | T1/T2/T3/T4 | N1 | | |
| [2] | T1 | 1:T3 | T2 | Y* | 2:T0/T1 | |
| [4] | T1 | 1:T3 | T2 | Y* | 2:T2 | T0/T1 |
| 132_22 | T1 | 1:T3 | T2 | Y* | 2:T2 | T2/T3/T4 |
| [4] | T1 | 1:T3 | T2 | Y* | 2:T3 | T0/T1 |
| 132_32 | T1 | 1:T3 | T2 | Y* | 2:T3 | T2/T3/T4 |
| [2] | T1 | 1:T3 | T2 | Y* | 2:T4 | T0/T1/T2 |
| [1] | T1 | 1:T3 | T2 | Y* | 2:T4 | T3/T4 |
| [2] | T1 | 1:T3 | T3 | Y | 2:T0/T1 | |
| [4] | T1 | 1:T3 | T3 | Y | 2:T2 | T0/T1/T2 |
| 133_23 | T1 | 1:T3 | T3 | Y | 2:T2 | T3/T4 |
| [4] | T1 | 1:T3 | T3 | Y | 2:T3 | T0/T1/T2 |
| 133_33 | T1 | 1:T3 | T3 | Y | 2:T3 | T3/T4 |
| [4] | T1 | 1:T3 | T3 | Y | 2:T4 | T0/T1/T2 |
| [1] | T1 | 1:T3 | T3 | Y | 2:T4 | T3/T4 |
| [2] | T1 | 1:T3 | T4 | Y | 2:T0/T1/T2 | |
| [1] | T1 | 1:T3 | T4 | Y | 2:T3/T4 | T4 |
| [4] | T1 | 1:T4 | T0/T1/T2 | | | |
| [2] | T1 | 1:T4 | T3/T4 | Y | 2:T0/T1/T2 | |
| [4] | T1 | 1:T4 | T3/T4 | Y | 2:T3/T4 | T0/T1/T2 |
| [1] | T1 | 1:T4 | T3/T4 | Y | 2:T3/T4 | T3/T4 |
| [2] | T2 | 1:T0 | | | | |
| [4] | T2 | 1: *, 2: * | T0/T1 | | | |
| 2112 | T2 | 1:T1, 2: T1 | T2/T3/T4 | N1 | | |
| 2122 | T2 | 1:T1, 2: T2 | T2/T3/T4 | N1+2 | | |
| 2132 | T2 | 1:T1, 2: T3/T4 | T2/T3/T4 | N1+2 | | |
| 2212 | T2 | 1:T2, 2: T1 | T2/T3/T4 | N1 | | |
| 2222 | T2 | 1:T2, 2: T2 | T2/T3/T4 | N1+2 | | |
| 2232 | T2 | 1:T2, 2: T3/T4 | T2/T3/T4 | N1+2 | | |
| 2312_22| T2 | 1:T3/T4, 2: T1 | T2/T3/T4 | Y | 2: T2 | T2/T3/T4 |
| 2312_32| T2 | 1:T3/T4, 2: T1 | T2/T3/T4 | Y | 2: T3/T4 | T2/T3/T4 |
| 2322 | T2 | 1:T3/T4, 2: T2 | T2/T3/T4 | N2 | | |
| 2332 | T2 | 1:T3/T4, 2: T3/T4 | T2/T3/T4 | N2 | | |
| [2] | T3 | 2: T0/T1 | | | | |
| [4] | T3 | 2: T2 | T0/T1/T2 | | | |
| 323 | T3 | 2: T2 | T3/T4 | N2 | | |
| [4] | T3 | 2: T3/T4 | T0/T1/T2 | | | |
| 333 | T3 | 2: T3/T4 | T3/T4 | N2 | | |
| [2] | T4 | 2: T0/T1/T2 | | | | |
| [4] | T4 | 2:T3 | T0/T1/T2/T3 | N2 | | |
| 434 | T4 | 2:T3 | T4 | N2 | | |
*) The retry will go to both node 1 and 2 but 1 in T3 will respond with UnsureResponse
[1] too similar to the previous scenario
[2] impossible because we topologies can't differ by more than 1 at the same time
[4] impossible, first response was received in later topology than second response
N1/N2/N1+2 We won't do a retry because we got successful response from node 1/2/both 1 and 2
A note for 2312_x2: while the two nodes cannot have topologies 3 and 1 at the same time, the two reads can arrive
at different times there.
*/
@AfterMethod(alwaysRun = true)
public final void unblockAll() {
//keep track of all controlled components. In case of failure, we need to unblock all otherwise we have to wait
//long time until the test is able to stop all cache managers.
for (BlockingLocalTopologyManager topologyManager : topologyManagerList) {
topologyManager.stopBlockingAll();
}
topologyManagerList.clear();
for (ControlledRpcManager rpcManager : rpcManagerList) {
rpcManager.stopBlocking();
}
rpcManagerList.clear();
}
/**
* ISPN-3315: In this scenario, a remote get is triggered and the reply received in a stable state. the old owner
* receives the request after the rebalance_start command.
*/
public void testScenario_010() throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = "key_010";
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final int currentTopologyId = currentTopologyId(cache(0));
rpcManager0.blockBefore(ClusteredGetCommand.class);
topologyManager0.startBlocking(LatchType.REBALANCE);
cache(0).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptorAfter(new AssertNoRetryInterceptor(), StateTransferInterceptor.class);
//remote get is processed in current topology id.
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
rpcManager0.waitForCommandToBlock();
FailReadsInterceptor fri = new FailReadsInterceptor();
NewNode joiner = addNode(null, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST).interceptor(fri));
topologyManager0.waitToBlock(LatchType.REBALANCE);
//wait until the rebalance_start arrives in old owner and let the remote get go
awaitForTopology(currentTopologyId + 1, cache(1));
rpcManager0.stopBlocking();
//check the value returned and make sure that the requestor is still in currentTopologyId (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
fri.assertNotHit();
assertTopologyId(currentTopologyId, cache(0));
topologyManager0.stopBlocking(LatchType.REBALANCE);
joiner.joinerFuture.get();
}
/**
* ISPN-3315: similar to scenario 010, the remote get is triggered in stable state but reply is received after the
* rebalance_start command. As in scenario 010, the owner receives the request after the rebalance_start command.
*/
public void testScenario_011() throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = "key_011";
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final int currentTopologyId = currentTopologyId(cache(0));
rpcManager0.blockBefore(ClusteredGetCommand.class);
topologyManager0.startBlocking(LatchType.CONFIRM_REBALANCE_PHASE);
cache(0).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptorAfter(new AssertNoRetryInterceptor(), StateTransferInterceptor.class);
//the remote get is triggered in the current topology id.
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
rpcManager0.waitForCommandToBlock();
FailReadsInterceptor fri = new FailReadsInterceptor();
NewNode joiner = addNode(null, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST).interceptor(fri));
topologyManager0.waitToBlock(LatchType.CONFIRM_REBALANCE_PHASE);
//wait until the rebalance start arrives in old owner and in the requestor. then let the remote get go.
awaitForTopology(currentTopologyId + 1, cache(1));
awaitForTopology(currentTopologyId + 1, cache(0));
rpcManager0.stopBlocking();
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
fri.assertNotHit();
assertTopologyId(currentTopologyId + 1, cache(1));
assertTopologyId(currentTopologyId + 1, cache(0));
topologyManager0.stopBlocking(LatchType.CONFIRM_REBALANCE_PHASE);
joiner.joinerFuture.get();
}
public void testScenario_101() throws Exception {
testScenario_1x1(0);
}
public void testScenario_111() throws Exception {
testScenario_1x1(1);
}
protected void testScenario_1x1(int topologyOnNode1) throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = String.format("key_1%d1", topologyOnNode1);
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final BlockingLocalTopologyManager topologyManager1 = replaceTopologyManager(manager(1));
final int currentTopologyId = currentTopologyId(cache(0));
rpcManager0.blockBefore(ClusteredGetCommand.class);
topologyManager0.startBlocking(LatchType.CONFIRM_REBALANCE_PHASE);
if (topologyOnNode1 == 0) {
topologyManager1.startBlocking(LatchType.REBALANCE);
}
cache(0).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptorAfter(new AssertNoRetryInterceptor(), StateTransferInterceptor.class);
FailReadsInterceptor fri = new FailReadsInterceptor();
NewNode joiner = addNode(null, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST).interceptor(fri));
//consistency check
awaitForTopology(currentTopologyId + 1, cache(0));
//the remote get is triggered after the rebalance_start and before the confirm_rebalance.
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
rpcManager0.waitForCommandToBlock();
//wait until the rebalance_start arrives in old owner
awaitForTopology(currentTopologyId + topologyOnNode1, cache(1));
rpcManager0.stopBlocking();
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
fri.assertNotHit();
assertTopologyId(currentTopologyId + 1, cache(0));
topologyManager1.stopBlocking(LatchType.REBALANCE);
topologyManager0.stopBlocking(LatchType.CONFIRM_REBALANCE_PHASE);
joiner.joinerFuture.get();
}
public void testScenario_032_22() throws Exception {
testScenario_03x_yx(2, 2);
}
public void testScenario_032_32() throws Exception {
testScenario_03x_yx(2, 3);
}
public void testScenario_033_23() throws Exception {
testScenario_03x_yx(3, 2);
}
public void testScenario_033_33() throws Exception {
testScenario_03x_yx(3, 3);
}
protected void testScenario_03x_yx(int topologyOnNode0, int topologyOnNode2) throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = String.format("key_03%d_%d%d", topologyOnNode0, topologyOnNode2, topologyOnNode0);
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final int currentTopologyId = currentTopologyId(cache(0));
rpcManager0.blockBefore(ClusteredGetCommand.class);
// allow read_old -> read_all but not read_all -> read_new
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
//consistency check. the remote get is triggered
assertTopologyId(currentTopologyId, cache(0));
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
rpcManager0.waitForCommandToBlock();
NewNode joiner = addNode(LatchType.CONSISTENT_HASH_UPDATE, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST)
.interceptor(new WaitForTopologyInterceptor(currentTopologyId + topologyOnNode2)));
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
if (topologyOnNode2 > 2) {
joiner.localTopologyManager.waitToBlockAndUnblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
//wait until the consistent_hash_update arrives in old owner
awaitForTopology(currentTopologyId + 3, cache(1));
awaitForTopology(currentTopologyId + 2, cache(0));
if (topologyOnNode0 > 2) {
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
awaitForTopology(currentTopologyId + 3, cache(0));
}
rpcManager0.stopBlocking();
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
assertTopologyId(currentTopologyId + topologyOnNode0, cache(0));
topologyManager0.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.localTopologyManager.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.joinerFuture.get();
}
public void testScenario_132_22() throws Exception {
testScenario_13x_yx(2, 2);
}
public void testScenario_132_32() throws Exception {
testScenario_13x_yx(2, 3);
}
public void testScenario_133_23() throws Exception {
testScenario_13x_yx(3, 2);
}
public void testScenario_133_33() throws Exception {
testScenario_13x_yx(3, 3);
}
protected void testScenario_13x_yx(int topologyOnNode0, int topologyOnNode2) throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = String.format("key_13%d_%d%d", topologyOnNode0, topologyOnNode2, topologyOnNode0);
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final int currentTopologyId = currentTopologyId(cache(0));
topologyManager0.startBlocking(LatchType.CONSISTENT_HASH_UPDATE);
rpcManager0.blockBefore(ClusteredGetCommand.class);
NewNode joiner = addNode(LatchType.CONSISTENT_HASH_UPDATE, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST)
.interceptor(new WaitForTopologyInterceptor(currentTopologyId + topologyOnNode2)));
//consistency check. the remote get is triggered
awaitForTopology(currentTopologyId + 1, cache(0));
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
rpcManager0.waitForCommandToBlock();
// allow read_old -> read_all but not read_all -> read_new
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
if (topologyOnNode2 > 2) {
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
//wait until the consistent_hash_update arrives in old owner
awaitForTopology(currentTopologyId + 3, cache(1));
awaitForTopology(currentTopologyId + 2, cache(0));
if (topologyOnNode0 > 2) {
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
awaitForTopology(currentTopologyId + 3, cache(0));
}
rpcManager0.stopBlocking();
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
assertTopologyId(currentTopologyId + topologyOnNode0, cache(0));
topologyManager0.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.localTopologyManager.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.joinerFuture.get();
}
public void testScenario_2112() throws Exception {
testScenario_2xy2(1, 1, 1, 1);
}
public void testScenario_2212() throws Exception {
testScenario_2xy2(2, 1, 1, 1);
}
public void testScenario_2122() throws Exception {
testScenario_2xy2(1, 2, 2, -1);
}
public void testScenario_2132() throws Exception {
testScenario_2xy2(1, 3, 2, -1);
}
public void testScenario_2222() throws Exception {
testScenario_2xy2(2, 2, 2, -1);
}
public void testScenario_2232() throws Exception {
testScenario_2xy2(2, 3, 2, -1);
}
public void testScenario_2322() throws Exception {
testScenario_2xy2(3, 2, 1, 2);
}
public void testScenario_2332() throws Exception {
testScenario_2xy2(3, 3, 1, 2);
}
protected void testScenario_2xy2(int topologyOnNode1, int topologyOnNode2, int expectedSuccessResponses, int expectSuccessFrom) throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = String.format("key_2%d%d2", topologyOnNode1, topologyOnNode2);
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final BlockingLocalTopologyManager topologyManager1 = replaceTopologyManager(manager(1));
final int currentTopologyId = currentTopologyId(cache(0));
topologyManager0.startBlocking(LatchType.CONSISTENT_HASH_UPDATE);
topologyManager1.startBlocking(LatchType.CONSISTENT_HASH_UPDATE);
cache(0).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptorAfter(new AssertNoRetryInterceptor(), StateTransferInterceptor.class);
WaitForTopologyInterceptor wfti = new WaitForTopologyInterceptor(currentTopologyId + topologyOnNode2);
NewNode joiner = addNode(LatchType.CONSISTENT_HASH_UPDATE, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST).interceptor(wfti));
// allow read_old -> read_all but not read_all -> read_new
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
if (topologyOnNode2 > 2) {
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
for (int i = 1; i < topologyOnNode1; ++i) {
topologyManager1.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
awaitForTopology(currentTopologyId + 2, cache(0));
awaitForTopology(currentTopologyId + topologyOnNode1, cache(1));
CyclicBarrier barrier1 = new CyclicBarrier(2);
cache(1).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptor(new BlockingInterceptor(barrier1, GetCacheEntryCommand.class, true, false), 0);
// TODO: add more determinism by waiting for all responses
rpcManager0.blockAfter(ClusteredGetCommand.class);
rpcManager0.checkResponses(responseMap -> {
int succesful = 0;
for (Map.Entry<Address, Response> rsp : responseMap.entrySet()) {
if (rsp.getValue().isSuccessful()) {
if (expectSuccessFrom >= 0) {
assertEquals(cacheManagers.get(expectSuccessFrom).getAddress(), rsp.getKey());
}
succesful++;
} else {
assertEquals(UnsureResponse.INSTANCE, rsp.getValue());
if (expectSuccessFrom >= 0) {
assertFalse(rsp.getKey().equals(cacheManagers.get(expectSuccessFrom).getAddress()));
}
}
}
assertTrue(succesful <= expectedSuccessResponses);
});
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
assertTopologyId(currentTopologyId + 2, cache(0));
assertTopologyId(currentTopologyId + topologyOnNode1, cache(1));
barrier1.await(10, TimeUnit.SECONDS);
topologyManager1.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
eventually(() -> wfti.stateTransferManager.getCacheTopology().getTopologyId() >= currentTopologyId + topologyOnNode2);
barrier1.await(10, TimeUnit.SECONDS);
rpcManager0.waitForCommandToBlock();
rpcManager0.stopBlocking();
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
topologyManager0.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.localTopologyManager.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.joinerFuture.get();
}
public void testScenario_2312_22() throws Exception {
testScenario_2312_x2(2);
}
public void testScenario_2312_32() throws Exception {
testScenario_2312_x2(3);
}
protected void testScenario_2312_x2(int retryTopologyOnNode2) throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = String.format("key_2312_%d2", retryTopologyOnNode2);
ownerCheckAndInit(cache(1), key, "v");
final ControlledRpcManager rpcManager0 = replaceRpcManager(cache(0));
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final BlockingLocalTopologyManager topologyManager1 = replaceTopologyManager(manager(1));
final int currentTopologyId = currentTopologyId(cache(0));
topologyManager0.startBlocking(LatchType.CONSISTENT_HASH_UPDATE);
topologyManager1.startBlocking(LatchType.CONSISTENT_HASH_UPDATE);
CyclicBarrier barrier1 = new CyclicBarrier(2);
CyclicBarrier barrier2 = new CyclicBarrier(2);
NewNode joiner = addNode(LatchType.CONSISTENT_HASH_UPDATE, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST)
.interceptor(new BlockingInterceptor(barrier2, GetCacheEntryCommand.class, true, false)));
// allow node0 up to T2 and node1 up to T3
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
for (int i = 1; i < 3; ++i) {
topologyManager1.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
awaitForTopology(currentTopologyId + 2, cache(0));
cache(1).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptor(new BlockingInterceptor(barrier1, GetCacheEntryCommand.class, false, false), 0);
rpcManager0.blockAfter(ClusteredGetCommand.class);
rpcManager0.checkResponses(responseMap -> {
assertEquals(responseMap.toString(), 2, responseMap.size());
for (Map.Entry<Address, Response> rsp : responseMap.entrySet()) {
assertEquals(UnsureResponse.INSTANCE, rsp.getValue());
}
});
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
// wait for read on node2
barrier2.await(10, TimeUnit.SECONDS);
barrier2.await(10, TimeUnit.SECONDS);
// unblock state transfer on node2, that should allow node1 to progress
for (int i = 1; i < retryTopologyOnNode2; ++i) {
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
awaitForTopology(currentTopologyId + 3, cache(1));
// unblock read on node1
barrier1.await(10, TimeUnit.SECONDS);
barrier1.await(10, TimeUnit.SECONDS);
rpcManager0.waitForCommandToBlock();
rpcManager0.stopBlocking();
// release retry on joiner
barrier2.await(10, TimeUnit.SECONDS);
barrier2.await(10, TimeUnit.SECONDS);
assertTopologyId(currentTopologyId + 2, cache(0));
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
topologyManager0.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
topologyManager1.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.localTopologyManager.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.joinerFuture.get();
}
public void testScenario_323() throws Exception {
testScenario_xyx(3, 2);
}
public void testScenario_333() throws Exception {
testScenario_xyx(3, 3);
}
public void testScenario_434() throws Exception {
testScenario_xyx(4, 3);
}
protected void testScenario_xyx(int topologyOnNode0, int topologyOnNode2) throws Exception {
assertClusterSize("Wrong cluster size.", 2);
final Object key = String.format("key_%d%d%d", topologyOnNode0, topologyOnNode2, topologyOnNode2);
ownerCheckAndInit(cache(1), key, "v");
final BlockingLocalTopologyManager topologyManager0 = replaceTopologyManager(manager(0));
final int currentTopologyId = currentTopologyId(cache(0));
topologyManager0.startBlocking(LatchType.CONSISTENT_HASH_UPDATE);
cache(0).getAdvancedCache().getAsyncInterceptorChain()
.addInterceptorAfter(new AssertNoRetryInterceptor(), StateTransferInterceptor.class);
FailReadsInterceptor fri = new FailReadsInterceptor();
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(fri, 0);
NewNode joiner = addNode(LatchType.CONSISTENT_HASH_UPDATE, cb -> cb.customInterceptors().addInterceptor()
.position(InterceptorConfiguration.Position.FIRST)
.interceptor(new WaitForTopologyInterceptor(currentTopologyId + topologyOnNode2)));
for (int i = 1; i < topologyOnNode0; ++i) {
topologyManager0.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
for (int i = 1; i < topologyOnNode2; ++i) {
joiner.localTopologyManager.unblockOnce(LatchType.CONSISTENT_HASH_UPDATE);
}
awaitForTopology(currentTopologyId + topologyOnNode0, cache(0));
Future<Object> remoteGetFuture = remoteGet(cache(0), key);
//check the value returned and make sure that the requestor is in the correct topology id (consistency check)
assertEquals("Wrong value from remote get.", "v", remoteGetFuture.get());
fri.assertNotHit();
assertTopologyId(currentTopologyId + topologyOnNode0, cache(0));
topologyManager0.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.localTopologyManager.stopBlocking(LatchType.CONSISTENT_HASH_UPDATE);
joiner.joinerFuture.get();
}
@Override
protected void createCacheManagers() throws Throwable {
createClusteredCaches(2, configuration());
}
private Future<Object> remoteGet(Cache cache, Object key) {
return fork(() -> cache.get(key));
}
private int currentTopologyId(Cache cache) {
return TestingUtil.extractComponent(cache, StateTransferManager.class).getCacheTopology().getTopologyId();
}
private void assertTopologyId(final int expectedTopologyId, final Cache cache) {
assertEquals(expectedTopologyId, currentTopologyId(cache));
}
private void awaitForTopology(final int expectedTopologyId, final Cache cache) {
eventually(() -> {
int currentTopologyId = currentTopologyId(cache);
assertTrue("Current topology is " + currentTopologyId, currentTopologyId <= expectedTopologyId);
return expectedTopologyId == currentTopologyId;
});
}
private void awaitUntilNotInDataContainer(final Cache cache, final Object key) {
eventually(() -> !cache.getAdvancedCache().getDataContainer().containsKey(key));
}
private NewNode addNode() {
return addNode(null, null);
}
private NewNode addNode(LatchType block, Consumer<ConfigurationBuilder> modifyConfiguration) {
NewNode newNode = new NewNode();
ConfigurationBuilder configurationBuilder = configuration();
if (modifyConfiguration != null) {
modifyConfiguration.accept(configurationBuilder);
}
EmbeddedCacheManager embeddedCacheManager = addClusterEnabledCacheManager(configurationBuilder);
newNode.localTopologyManager = replaceTopologyManager(embeddedCacheManager);
if (block != null) {
newNode.localTopologyManager.startBlocking(block);
}
topologyManagerList.add(newNode.localTopologyManager);
newNode.joinerFuture = fork(() -> {
waitForClusterToForm();
return null;
});
return newNode;
}
private void ownerCheckAndInit(Cache<Object, Object> owner, Object key, Object value) {
assertTrue(address(owner) + " should be the owner of " + key + ".", isFirstOwner(owner, key));
owner.put(key, value);
assertCacheValue(key, value);
}
private void assertCacheValue(Object key, Object value) {
for (Cache cache : caches()) {
assertEquals("Wrong value for key " + key + " on " + address(cache) + ".", value, cache.get(key));
}
}
private ConfigurationBuilder configuration() {
ConfigurationBuilder builder = getDefaultClusteredCacheConfig(CacheMode.DIST_SYNC, false);
builder.clustering()
.hash()
.numSegments(1)
.numOwners(1)
.consistentHashFactory(new SingleKeyConsistentHashFactory())
.stateTransfer()
.fetchInMemoryState(true);
return builder;
}
private BlockingLocalTopologyManager replaceTopologyManager(CacheContainer cacheContainer) {
BlockingLocalTopologyManager localTopologyManager = BlockingLocalTopologyManager.replaceTopologyManager(cacheContainer);
topologyManagerList.add(localTopologyManager);
return localTopologyManager;
}
private ControlledRpcManager replaceRpcManager(Cache cache) {
RpcManager manager = TestingUtil.extractComponent(cache, RpcManager.class);
ControlledRpcManager controlledRpcManager = new ControlledRpcManager(manager);
TestingUtil.replaceComponent(cache, RpcManager.class, controlledRpcManager, true);
rpcManagerList.add(controlledRpcManager);
return controlledRpcManager;
}
private <T extends StateTransferLock> T replaceStateTransferLock(Cache cache, Function<StateTransferLock, T> lockBuilder) {
StateTransferLock lock = TestingUtil.extractComponent(cache, StateTransferLock.class);
T controlledLock = lockBuilder.apply(lock);
TestingUtil.replaceComponent(cache, StateTransferLock.class, controlledLock, true);
return controlledLock;
}
@SuppressWarnings("unchecked")
public static class SingleKeyConsistentHashFactory extends BaseControlledConsistentHashFactory {
public SingleKeyConsistentHashFactory() {
super(1);
}
@Override
protected List<Address> createOwnersCollection(List<Address> members, int numberOfOwners, int segmentIndex) {
assertEquals("Wrong number of owners.", 1, numberOfOwners);
return Collections.singletonList(members.get(members.size() - 1));
}
}
private static class WaitForTopologyInterceptor extends DDAsyncInterceptor {
protected final int expectedTopologyId;
// ugly hooks to be able to access topology from test
private volatile StateTransferManager stateTransferManager;
private volatile StateTransferLock stateTransferLock;
private WaitForTopologyInterceptor(int expectedTopologyId) {
this.expectedTopologyId = expectedTopologyId;
}
@Inject
public void init(StateTransferManager stateTransferManager, StateTransferLock stateTransferLock) {
this.stateTransferManager = stateTransferManager;
this.stateTransferLock = stateTransferLock;
}
@Override
public Object visitGetCacheEntryCommand(InvocationContext ctx, GetCacheEntryCommand command) throws Throwable {
assertNotNull(stateTransferLock);
CompletableFuture<Void> topologyFuture = stateTransferLock.topologyFuture(expectedTopologyId);
if (topologyFuture != null) {
topologyFuture.get(10, TimeUnit.SECONDS);
}
assertEquals(expectedTopologyId, stateTransferManager.getCacheTopology().getTopologyId());
return invokeNext(ctx, command);
}
}
private static class FailReadsInterceptor extends BaseCustomAsyncInterceptor {
private final AtomicBoolean hit = new AtomicBoolean();
@Override
public Object visitGetCacheEntryCommand(InvocationContext ctx, GetCacheEntryCommand command) throws Throwable {
hit.set(true);
throw new IllegalStateException("Did not expect the command to be executed on node " + cache.getCacheManager().getAddress());
}
public void assertNotHit() {
assertFalse(hit.get());
}
}
private static class AssertNoRetryInterceptor extends DDAsyncInterceptor {
@Override
public Object visitGetCacheEntryCommand(InvocationContext ctx, GetCacheEntryCommand command) throws Throwable {
assertFalse(command.hasFlag(Flag.COMMAND_RETRY));
return invokeNextAndExceptionally(ctx, command, (rCtx, rCommand, t) -> {
assertFalse(t instanceof OutdatedTopologyException);
throw t;
});
}
}
private class NewNode {
Future<Void> joinerFuture;
BlockingLocalTopologyManager localTopologyManager;
}
}