package org.infinispan.statetransfer;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.infinispan.test.TestingUtil.extractComponent;
import static org.infinispan.test.TestingUtil.findInterceptor;
import static org.infinispan.test.TestingUtil.waitForNoRebalance;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNull;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import org.infinispan.Cache;
import org.infinispan.commands.ReplicableCommand;
import org.infinispan.commands.control.LockControlCommand;
import org.infinispan.commands.tx.CommitCommand;
import org.infinispan.commands.tx.PrepareCommand;
import org.infinispan.commands.write.PutKeyValueCommand;
import org.infinispan.configuration.cache.CacheMode;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.context.InvocationContext;
import org.infinispan.context.impl.FlagBitSets;
import org.infinispan.context.impl.TxInvocationContext;
import org.infinispan.interceptors.base.BaseCustomInterceptor;
import org.infinispan.interceptors.impl.EntryWrappingInterceptor;
import org.infinispan.interceptors.locking.PessimisticLockingInterceptor;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.test.MultipleCacheManagersTest;
import org.infinispan.test.fwk.CheckPoint;
import org.infinispan.test.fwk.CleanupAfterMethod;
import org.infinispan.topology.CacheTopology;
import org.infinispan.transaction.LockingMode;
import org.infinispan.util.ReplicatedControlledConsistentHashFactory;
import org.infinispan.util.concurrent.IsolationLevel;
import org.testng.annotations.Test;
/**
* Test that commands are properly retried during/after state transfer.
*
* @author Dan Berindei
* @since 7.2
*/
@Test(groups = "functional", testName = "statetransfer.ReplCommandRetryTest")
@CleanupAfterMethod
public class ReplCommandRetryTest extends MultipleCacheManagersTest {
@Override
protected void createCacheManagers() {
// do nothing, each test will create its own cache managers
}
private ConfigurationBuilder buildConfig(LockingMode lockingMode, Class<?> commandToBlock, boolean isOriginator) {
ConfigurationBuilder configurationBuilder = getDefaultClusteredCacheConfig(CacheMode.REPL_SYNC, lockingMode != null);
configurationBuilder.transaction().lockingMode(lockingMode);
// The coordinator will always be the primary owner
configurationBuilder.clustering().hash().numSegments(1)
.consistentHashFactory(new ReplicatedControlledConsistentHashFactory(0));
configurationBuilder.clustering().remoteTimeout(15000);
configurationBuilder.clustering().stateTransfer().fetchInMemoryState(true);
if (commandToBlock == LockControlCommand.class && !isOriginator) {
configurationBuilder.customInterceptors().addInterceptor()
.before(PessimisticLockingInterceptor.class).interceptor(new DelayInterceptor(commandToBlock));
} else {
configurationBuilder.customInterceptors().addInterceptor()
.after(EntryWrappingInterceptor.class).interceptor(new DelayInterceptor(commandToBlock));
}
configurationBuilder.locking().isolationLevel(IsolationLevel.READ_COMMITTED);
return configurationBuilder;
}
public void testRetryAfterJoinNonTransactional() throws Exception {
EmbeddedCacheManager cm1 = addClusterEnabledCacheManager(buildConfig(null, PutKeyValueCommand.class, true));
final Cache<Object, Object> c1 = cm1.getCache();
DelayInterceptor di1 = findInterceptor(c1, DelayInterceptor.class);
int initialTopologyId = extractComponent(c1, StateTransferManager.class).getCacheTopology().getTopologyId();
EmbeddedCacheManager cm2 = addClusterEnabledCacheManager(buildConfig(null, PutKeyValueCommand.class, false));
final Cache<Object, Object> c2 = cm2.getCache();
DelayInterceptor di2 = findInterceptor(c2, DelayInterceptor.class);
waitForStateTransfer(initialTopologyId + 4, c1, c2);
Future<Object> f = fork(() -> {
log.tracef("Initiating a put command on %s", c1);
c1.put("k", "v");
return null;
});
// The command is replicated to c2, and blocks in the DelayInterceptor on c2
di2.waitUntilBlocked(1);
// c3 joins, topology id changes
EmbeddedCacheManager cm3 = addClusterEnabledCacheManager(buildConfig(null, PutKeyValueCommand.class, false));
Cache<Object, Object> c3 = cm3.getCache();
DelayInterceptor di3 = findInterceptor(c3, DelayInterceptor.class);
waitForStateTransfer(initialTopologyId + 8, c1, c2, c3);
// Unblock the replicated command on c2.
log.tracef("Triggering retry 1");
di2.unblock(1);
// c2 will return UnsureResponse, and c1 will retry the command.
// c1 will send the command to c2 and c3, blocking on both in the DelayInterceptor
di2.waitUntilBlocked(2);
di3.waitUntilBlocked(1);
// Unblock the command with the new topology id on c2
di2.unblock(2);
// c4 joins, topology id changes
EmbeddedCacheManager cm4 = addClusterEnabledCacheManager(buildConfig(null, PutKeyValueCommand.class, false));
Cache<Object, Object> c4 = cm4.getCache();
DelayInterceptor di4 = findInterceptor(c4, DelayInterceptor.class);
waitForStateTransfer(initialTopologyId + 12, c1, c2, c3, c4);
// Unblock the command with the new topology id on c3.
log.tracef("Triggering retry 2");
di3.unblock(1);
// c3 will send an UnsureResponse, and c1 will retry the command.
// c1 will send the command to c2, c3, and c4, blocking everywhere in the DelayInterceptor
// Unblock every node except c1
di2.unblock(3);
di3.unblock(2);
di4.unblock(1);
// Now c1 blocks
di1.unblock(1);
log.tracef("Waiting for the put command to finish on %s", c1);
Object retval = f.get(10, TimeUnit.SECONDS);
log.tracef("Put command finished on %s", c1);
assertNull(retval);
// 1 for the last retry
assertEquals(1, di1.getCounter());
// 1 for the initial invocation + 1 for each retry
assertEquals(3, di2.getCounter());
// 1 for each retry
assertEquals(2, di3.getCounter());
// just the last retry
assertEquals(1, di4.getCounter());
}
public void testRetryAfterJoinLockControlCommand() throws Exception {
testRetryAfterJoinTransactional(LockingMode.PESSIMISTIC, LockControlCommand.class);
}
public void testRetryAfterJoinOnePhasePrepareCommand() throws Exception {
testRetryAfterJoinTransactional(LockingMode.PESSIMISTIC, PrepareCommand.class);
}
public void testRetryAfterJoinTwoPhasePrepareCommand() throws Exception {
testRetryAfterJoinTransactional(LockingMode.OPTIMISTIC, PrepareCommand.class);
}
public void testRetryAfterJoinCommitCommand() throws Exception {
testRetryAfterJoinTransactional(LockingMode.OPTIMISTIC, CommitCommand.class);
}
private void testRetryAfterJoinTransactional(LockingMode lockingMode, Class<?> commandClass) throws Exception {
EmbeddedCacheManager cm1 = addClusterEnabledCacheManager(buildConfig(lockingMode, commandClass, false));
final Cache<Object, Object> c1 = cm1.getCache();
DelayInterceptor di1 = findInterceptor(c1, DelayInterceptor.class);
int initialTopologyId = extractComponent(c1, StateTransferManager.class).getCacheTopology().getTopologyId();
EmbeddedCacheManager cm2 = addClusterEnabledCacheManager(buildConfig(lockingMode, commandClass, true));
final Cache<String, String> c2 = cm2.getCache();
DelayInterceptor di2 = findInterceptor(c2, DelayInterceptor.class);
waitForStateTransfer(initialTopologyId + 4, c1, c2);
Future<Object> f = fork(() -> {
// The LockControlCommand wouldn't be replicated if we initiated the transaction on the primary owner (c1)
log.tracef("Initiating a transaction on backup owner %s", c2);
c2.put("k", "v");
return null;
});
// The prepare command is replicated to cache c1, and it blocks in the DelayInterceptor
di1.waitUntilBlocked(1);
// c3 joins, topology id changes
EmbeddedCacheManager cm3 = addClusterEnabledCacheManager(buildConfig(lockingMode, commandClass, false));
Cache c3 = cm3.getCache();
DelayInterceptor di3 = findInterceptor(c3, DelayInterceptor.class);
waitForStateTransfer(initialTopologyId + 8, c1, c2, c3);
// Unblock the replicated command on c1.
// c1 will return an UnsureResponse, and c2 will retry (1)
log.tracef("Triggering retry 1 from node %s", c1);
di1.unblock(1);
// The prepare command will again block on c1 and c3
di1.waitUntilBlocked(2);
di3.waitUntilBlocked(1);
// c4 joins, topology id changes
EmbeddedCacheManager cm4 = addClusterEnabledCacheManager(buildConfig(lockingMode, commandClass, false));
Cache c4 = cm4.getCache();
DelayInterceptor di4 = findInterceptor(c4, DelayInterceptor.class);
waitForStateTransfer(initialTopologyId + 12, c1, c2, c3, c4);
// Unblock the replicated command on c1
di1.unblock(2);
// Unblock the replicated command on c3, c2 will retry (2)
log.tracef("Triggering retry 2 from %s", c3);
di3.unblock(1);
// Check that the c1, c3, and c4 all received the retried command
di1.unblock(3);
di3.unblock(2);
di4.unblock(1);
// Allow the command to finish on the originator (c2).
log.tracef("Finishing tx on %s", c2);
di2.unblock(1);
log.tracef("Waiting for the transaction to finish on %s", c2);
f.get(10, TimeUnit.SECONDS);
log.tracef("Transaction finished on %s", c2);
// 1 for the initial call + 1 for each retry (2)
assertEquals(di1.getCounter(), 3);
// 1 for the last retry
assertEquals(di2.getCounter(), 1);
// 1 for each retry
assertEquals(di3.getCounter(), 2);
// 1 for the last retry
assertEquals(di4.getCounter(), 1);
}
private void waitForStateTransfer(int expectedTopologyId, Cache... caches) {
waitForNoRebalance(caches);
for (Cache c : caches) {
CacheTopology cacheTopology = extractComponent(c, StateTransferManager.class).getCacheTopology();
assertEquals(String.format("Wrong topology on cache %s, expected %d and got %s", c, expectedTopologyId, cacheTopology),
expectedTopologyId, cacheTopology.getTopologyId());
}
}
private class DelayInterceptor extends BaseCustomInterceptor {
private final AtomicInteger counter = new AtomicInteger(0);
private final CheckPoint checkPoint = new CheckPoint();
private final Class<?> commandToBlock;
public DelayInterceptor(Class<?> commandToBlock) {
this.commandToBlock = commandToBlock;
}
public int getCounter() {
return counter.get();
}
public void waitUntilBlocked(int count) throws TimeoutException, InterruptedException {
String event = checkPoint.peek(5, SECONDS, "blocked_" + count + "_on_" + cache);
assertEquals("blocked_" + count + "_on_" + cache, event);
}
public void unblock(int count) throws InterruptedException, TimeoutException, BrokenBarrierException {
log.tracef("Unblocking command on cache %s", cache);
checkPoint.awaitStrict("blocked_" + count + "_on_" + cache, 5, SECONDS);
checkPoint.trigger("resume_" + count + "_on_" + cache);
}
@Override
public Object visitPutKeyValueCommand(InvocationContext ctx, PutKeyValueCommand command) throws Throwable {
Object result = super.visitPutKeyValueCommand(ctx, command);
if (!ctx.isInTxScope() && !command.hasAnyFlag(FlagBitSets.PUT_FOR_STATE_TRANSFER)) {
doBlock(ctx, command);
}
return result;
}
@Override
public Object visitLockControlCommand(TxInvocationContext ctx, LockControlCommand command) throws Throwable {
Object result = super.visitLockControlCommand(ctx, command);
if (!ctx.getCacheTransaction().isFromStateTransfer()) {
doBlock(ctx, command);
}
return result;
}
@Override
public Object visitPrepareCommand(TxInvocationContext ctx, PrepareCommand command) throws Throwable {
Object result = super.visitPrepareCommand(ctx, command);
if (!ctx.getCacheTransaction().isFromStateTransfer()) {
doBlock(ctx, command);
}
return result;
}
@Override
public Object visitCommitCommand(TxInvocationContext ctx, CommitCommand command) throws Throwable {
Object result = super.visitCommitCommand(ctx, command);
if (!ctx.getCacheTransaction().isFromStateTransfer()) {
doBlock(ctx, command);
}
return result;
}
private void doBlock(InvocationContext ctx, ReplicableCommand command) throws InterruptedException,
TimeoutException {
if (commandToBlock != command.getClass())
return;
log.tracef("Delaying command %s originating from %s", command, ctx.getOrigin());
Integer myCount = counter.incrementAndGet();
checkPoint.trigger("blocked_" + myCount + "_on_" + cache);
checkPoint.awaitStrict("resume_" + myCount + "_on_" + cache, 15, SECONDS);
log.tracef("Command unblocked: %s", command);
}
@Override
public String toString() {
return "DelayInterceptor{counter=" + counter + "}";
}
}
}