package org.infinispan.distribution.rehash;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.spy;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNotNull;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import org.infinispan.AdvancedCache;
import org.infinispan.commons.api.BasicCacheContainer;
import org.infinispan.configuration.cache.CacheMode;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.distribution.BlockingInterceptor;
import org.infinispan.interceptors.impl.EntryWrappingInterceptor;
import org.infinispan.manager.CacheContainer;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.partitionhandling.AvailabilityMode;
import org.infinispan.remoting.transport.Address;
import org.infinispan.test.MultipleCacheManagersTest;
import org.infinispan.test.TestingUtil;
import org.infinispan.test.fwk.CheckPoint;
import org.infinispan.test.fwk.CleanupAfterMethod;
import org.infinispan.topology.CacheTopology;
import org.infinispan.topology.LocalTopologyManager;
import org.infinispan.transaction.TransactionMode;
import org.infinispan.util.BaseControlledConsistentHashFactory;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.testng.annotations.Test;
/**
* Tests that a conditional write is retried properly if the write is unsuccessful on the primary owner
* because it became a non-owner and doesn't have the entry any more.
*
* See https://issues.jboss.org/browse/ISPN-3830
*
* @author Dan Berindei
*/
@Test(groups = "functional", testName = "distribution.rehash.NonTxPrimaryOwnerBecomingNonOwnerTest")
@CleanupAfterMethod
public class NonTxPrimaryOwnerBecomingNonOwnerTest extends MultipleCacheManagersTest {
private static final String CACHE_NAME = BasicCacheContainer.DEFAULT_CACHE_NAME;
@Override
protected void createCacheManagers() throws Throwable {
ConfigurationBuilder c = getConfigurationBuilder();
addClusterEnabledCacheManager(c);
addClusterEnabledCacheManager(c);
waitForClusterToForm();
}
private ConfigurationBuilder getConfigurationBuilder() {
ConfigurationBuilder c = new ConfigurationBuilder();
c.clustering().cacheMode(CacheMode.DIST_SYNC);
c.clustering().hash().numSegments(1).consistentHashFactory(new CustomConsistentHashFactory());
c.transaction().transactionMode(TransactionMode.NON_TRANSACTIONAL);
return c;
}
public void testPrimaryOwnerChangingDuringPut() throws Exception {
doTest(TestWriteOperation.PUT_CREATE);
}
public void testPrimaryOwnerChangingDuringPutIfAbsent() throws Exception {
doTest(TestWriteOperation.PUT_IF_ABSENT);
}
public void testPrimaryOwnerChangingDuringReplace() throws Exception {
doTest(TestWriteOperation.REPLACE);
}
public void testPrimaryOwnerChangingDuringReplaceExact() throws Exception {
doTest(TestWriteOperation.REPLACE_EXACT);
}
public void testPrimaryOwnerChangingDuringRemove() throws Exception {
doTest(TestWriteOperation.REMOVE);
}
public void testPrimaryOwnerChangingDuringRemoveExact() throws Exception {
doTest(TestWriteOperation.REMOVE_EXACT);
}
private void doTest(final TestWriteOperation op) throws Exception {
final String key = "testkey";
if (op.getPreviousValue() != null) {
cache(0, CACHE_NAME).put(key, op.getPreviousValue());
}
CheckPoint checkPoint = new CheckPoint();
LocalTopologyManager ltm0 = TestingUtil.extractGlobalComponent(manager(0), LocalTopologyManager.class);
int preJoinTopologyId = ltm0.getCacheTopology(CACHE_NAME).getTopologyId();
final AdvancedCache<Object, Object> cache0 = advancedCache(0);
addBlockingLocalTopologyManager(manager(0), checkPoint, preJoinTopologyId);
final AdvancedCache<Object, Object> cache1 = advancedCache(1);
addBlockingLocalTopologyManager(manager(1), checkPoint, preJoinTopologyId);
// Add a new member and block the rebalance before the final topology is installed
ConfigurationBuilder c = getConfigurationBuilder();
c.clustering().stateTransfer().awaitInitialTransfer(false);
addClusterEnabledCacheManager(c);
addBlockingLocalTopologyManager(manager(2), checkPoint, preJoinTopologyId);
log.tracef("Starting the cache on the joiner");
final AdvancedCache<Object,Object> cache2 = advancedCache(2);
int duringJoinTopologyId = preJoinTopologyId + 1;
checkPoint.trigger("allow_topology_" + duringJoinTopologyId + "_on_" + address(0));
checkPoint.trigger("allow_topology_" + duringJoinTopologyId + "_on_" + address(1));
checkPoint.trigger("allow_topology_" + duringJoinTopologyId + "_on_" + address(2));
// Wait for the write CH to contain the joiner everywhere
Stream.of(cache0, cache1, cache2).forEach(cache ->
eventuallyEquals(3, () -> cache.getRpcManager().getMembers().size()));
CacheTopology duringJoinTopology = ltm0.getCacheTopology(CACHE_NAME);
assertEquals(duringJoinTopologyId, duringJoinTopology.getTopologyId());
assertNotNull(duringJoinTopology.getPendingCH());
log.tracef("Rebalance started. Found key %s with current owners %s and pending owners %s", key,
duringJoinTopology.getCurrentCH().locateOwners(key), duringJoinTopology.getPendingCH().locateOwners(key));
// Every operation command will be blocked before reaching the distribution interceptor on cache0 (the originator)
CyclicBarrier beforeCache0Barrier = new CyclicBarrier(2);
BlockingInterceptor blockingInterceptor0 = new BlockingInterceptor<>(beforeCache0Barrier,
op.getCommandClass(), false, true);
cache0.getAsyncInterceptorChain().addInterceptorBefore(blockingInterceptor0, EntryWrappingInterceptor.class);
// Write from cache0 with cache0 as primary owner, cache2 will become the primary owner for the retry
Future<Object> future = fork(() -> op.perform(cache0, key));
// Block the write command on cache0
beforeCache0Barrier.await(10, TimeUnit.SECONDS);
// Allow the topology update to proceed on cache0
final int postJoinTopologyId = duringJoinTopologyId + 1;
checkPoint.trigger("allow_topology_" + postJoinTopologyId + "_on_" + address(0));
eventuallyEquals(postJoinTopologyId,
() -> cache0.getComponentRegistry().getStateTransferManager().getCacheTopology().getTopologyId());
// Allow the command to proceed
log.tracef("Unblocking the write command on node " + address(1));
beforeCache0Barrier.await(10, TimeUnit.SECONDS);
// Wait for the retry after the OutdatedTopologyException
beforeCache0Barrier.await(10, TimeUnit.SECONDS);
// Do not block during (possible) further retries, and allow it to proceed
blockingInterceptor0.suspend(true);
beforeCache0Barrier.await(10, TimeUnit.SECONDS);
// Allow the topology update to proceed on the other caches
checkPoint.triggerAll();
// Wait for the topology to change everywhere
TestingUtil.waitForNoRebalance(cache0, cache1, cache2);
// Check that the put command didn't fail
Object result = future.get(10, TimeUnit.SECONDS);
// TODO ISPN-7590: Return values are not reliable, if the command is retried after being applied to both backup
// owners the retry will provide incorrect return value
// assertEquals(op.getReturnValue(), result);
log.tracef("Write operation is done");
// Check the value on all the nodes
assertEquals(op.getValue(), cache0.get(key));
assertEquals(op.getValue(), cache1.get(key));
assertEquals(op.getValue(), cache2.get(key));
// Check that there are no leaked locks
assertFalse(cache0.getAdvancedCache().getLockManager().isLocked(key));
assertFalse(cache1.getAdvancedCache().getLockManager().isLocked(key));
assertFalse(cache2.getAdvancedCache().getLockManager().isLocked(key));
}
private static class CustomConsistentHashFactory extends BaseControlledConsistentHashFactory {
private CustomConsistentHashFactory() {
super(1);
}
@Override
protected List<Address> createOwnersCollection(List<Address> members, int numberOfOwners, int segmentIndex) {
assertEquals(2, numberOfOwners);
if (members.size() == 1)
return Arrays.asList(members.get(0));
else if (members.size() == 2)
return Arrays.asList(members.get(0), members.get(1));
else
return Arrays.asList(members.get(members.size() - 1), members.get(0));
}
}
private void addBlockingLocalTopologyManager(final EmbeddedCacheManager manager, final CheckPoint checkPoint,
final int currentTopologyId)
throws InterruptedException {
LocalTopologyManager component = TestingUtil.extractGlobalComponent(manager, LocalTopologyManager.class);
LocalTopologyManager spyLtm = spy(component);
doAnswer(invocation -> {
CacheTopology topology = (CacheTopology) invocation.getArguments()[1];
// Ignore the first topology update on the joiner, which is with the topology before the join
if (topology.getTopologyId() != currentTopologyId) {
checkPoint.trigger("pre_topology_" + topology.getTopologyId() + "_on_" + manager.getAddress());
checkPoint.await("allow_topology_" + topology.getTopologyId() + "_on_" + manager.getAddress(),
10, TimeUnit.SECONDS);
}
return invocation.callRealMethod();
}).when(spyLtm).handleTopologyUpdate(eq(CacheContainer.DEFAULT_CACHE_NAME), any(CacheTopology.class),
any(AvailabilityMode.class), anyInt(), any(Address.class));
TestingUtil.extractGlobalComponentRegistry(manager).registerComponent(spyLtm, LocalTopologyManager.class);
}
}