package org.infinispan.distribution.rehash;
import static org.infinispan.test.TestingUtil.waitForNoRebalance;
import static org.infinispan.test.concurrent.StateSequencerUtil.advanceOnInboundRpc;
import static org.infinispan.test.concurrent.StateSequencerUtil.advanceOnOutboundRpc;
import static org.infinispan.test.concurrent.StateSequencerUtil.matchCommand;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNull;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.infinispan.commands.ReplicableCommand;
import org.infinispan.configuration.cache.CacheMode;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.container.entries.ImmortalCacheEntry;
import org.infinispan.container.entries.InternalCacheEntry;
import org.infinispan.distribution.MagicKey;
import org.infinispan.manager.CacheContainer;
import org.infinispan.remoting.inboundhandler.DeliverOrder;
import org.infinispan.remoting.inboundhandler.PerCacheInboundInvocationHandler;
import org.infinispan.remoting.inboundhandler.Reply;
import org.infinispan.remoting.transport.Address;
import org.infinispan.statetransfer.StateChunk;
import org.infinispan.statetransfer.StateConsumer;
import org.infinispan.statetransfer.StateRequestCommand;
import org.infinispan.statetransfer.StateResponseCommand;
import org.infinispan.statetransfer.StateTransferManager;
import org.infinispan.test.MultipleCacheManagersTest;
import org.infinispan.test.TestingUtil;
import org.infinispan.test.concurrent.CommandMatcher;
import org.infinispan.test.concurrent.StateSequencer;
import org.infinispan.test.fwk.CleanupAfterMethod;
import org.infinispan.test.fwk.TestCacheManagerFactory;
import org.infinispan.util.ByteString;
import org.infinispan.util.ControlledConsistentHashFactory;
import org.testng.annotations.Test;
/**
* Start two rebalance operations by stopping two members of a cluster in sequence.
* Test that a delayed StateResponseCommand doesn't break state transfer.
* See https://issues.jboss.org/browse/ISPN-3120
*
* @author Dan Berindei
*/
@CleanupAfterMethod
@Test(groups = "functional", testName = "distribution.rehash.StateResponseOrderingTest")
public class StateResponseOrderingTest extends MultipleCacheManagersTest {
private ControlledConsistentHashFactory consistentHashFactory;
@Override
protected void createCacheManagers() throws Throwable {
consistentHashFactory = new ControlledConsistentHashFactory(new int[]{1, 2, 3}, new int[]{1, 2, 3});
ConfigurationBuilder builder = TestCacheManagerFactory.getDefaultCacheConfiguration(true);
builder.clustering().cacheMode(CacheMode.DIST_SYNC).hash().numOwners(3);
builder.clustering().hash().numSegments(2).consistentHashFactory(consistentHashFactory);
createCluster(builder, 4);
waitForClusterToForm();
}
public void testSimulatedOldStateResponse() throws Throwable {
// Initial owners for both segments are cache 1 and cache 2
// Start a rebalance, with cache 0 becoming an owner of both CH segments
// Block the first StateRequestCommand on cache 0
// While state transfer is blocked, simulate an old state response command on cache 0
// Check that the old command is ignored and state transfer completes successfully
StateSequencer sequencer = new StateSequencer();
sequencer.logicalThread("st", "st:block_state_request", "st:simulate_old_response", "st:resume_state_request");
cache(1).put("k1", "v1");
cache(2).put("k2", "v2");
cache(3).put("k3", "v3");
final StateTransferManager stm0 = advancedCache(0).getComponentRegistry().getStateTransferManager();
final int initialTopologyId = stm0.getCacheTopology().getTopologyId();
assertEquals(Arrays.asList(address(1), address(2), address(3)), stm0.getCacheTopology().getCurrentCH().locateOwners("k1"));
assertNull(stm0.getCacheTopology().getPendingCH());
// Block when cache 0 sends the first state request to cache 1
CommandMatcher segmentRequestMatcher = new CommandMatcher() {
@Override
public boolean accept(ReplicableCommand command) {
if (!(command instanceof StateRequestCommand))
return false;
StateRequestCommand stateRequestCommand = (StateRequestCommand) command;
if (stateRequestCommand.getType() != StateRequestCommand.Type.START_STATE_TRANSFER)
return false;
return stateRequestCommand.getTopologyId() == initialTopologyId + 1;
}
};
advanceOnOutboundRpc(sequencer, cache(0), segmentRequestMatcher)
.before("st:block_state_request", "st:resume_state_request");
// Cache 0 will become an owner and will request state from cache 1
consistentHashFactory.setOwnerIndexes(new int[]{0, 1, 2}, new int[]{0, 1, 2});
consistentHashFactory.triggerRebalance(cache(0));
sequencer.enter("st:simulate_old_response");
assertNotNull(stm0.getCacheTopology().getPendingCH());
assertEquals(Arrays.asList(address(0), address(1), address(2)), stm0.getCacheTopology().getPendingCH().locateOwners("k1"));
// Cache 0 didn't manage to request any segments yet, but it has registered all the inbound transfer tasks.
// We'll pretend it got a StateResponseCommand with an older topology id.
PerCacheInboundInvocationHandler handler = TestingUtil.extractComponent(cache(0), PerCacheInboundInvocationHandler.class);
StateChunk stateChunk0 = new StateChunk(0, Arrays.<InternalCacheEntry>asList(new ImmortalCacheEntry("k0", "v0")), true);
StateChunk stateChunk1 = new StateChunk(1, Arrays.<InternalCacheEntry>asList(new ImmortalCacheEntry("k0", "v0")), true);
StateResponseCommand stateResponseCommand = new StateResponseCommand(ByteString.fromString(CacheContainer.DEFAULT_CACHE_NAME),
address(1), initialTopologyId, Arrays.asList(stateChunk0, stateChunk1));
// Call with preserveOrder = true to force the execution in the same thread
stateResponseCommand.setOrigin(address(3));
stateResponseCommand.init(TestingUtil.extractComponent(cache(0), StateConsumer.class));
handler.handle(stateResponseCommand, new Reply() {
@Override
public void reply(Object returnValue) {
//no-op
}
}, DeliverOrder.PER_SENDER);
sequencer.exit("st:simulate_old_response");
waitForNoRebalance(cache(0), cache(1), cache(2), cache(3));
// Check that state wasn't lost
assertTrue(stm0.getCacheTopology().getReadConsistentHash().isKeyLocalToNode(address(0), "k1"));
assertTrue(stm0.getCacheTopology().getReadConsistentHash().isKeyLocalToNode(address(0), "k2"));
assertTrue(stm0.getCacheTopology().getReadConsistentHash().isKeyLocalToNode(address(0), "k3"));
assertEquals("v1", cache(0).get("k1"));
assertEquals("v2", cache(0).get("k2"));
assertEquals("v3", cache(0).get("k3"));
// Check that the old state response was ignored
assertNull(cache(0).get("k0"));
}
public void testStateResponseWhileRestartingBrokenTransfers() throws Throwable {
// The initial topology is different from the other method's
consistentHashFactory.setOwnerIndexes(new int[]{1, 2, 3}, new int[]{2, 1, 3});
consistentHashFactory.triggerRebalance(cache(0));
// waitForStableTopology doesn't work here, since the cache looks already "balanced"
// So we wait for the primary owner of segment 1 to change
eventually(new Condition() {
@Override
public boolean isSatisfied() throws Exception {
return advancedCache(0).getDistributionManager().getReadConsistentHash().locatePrimaryOwnerForSegment(1).equals(address(2));
}
});
// See https://issues.jboss.org/browse/ISPN-3120?focusedCommentId=12777231
// Start with segment 0 owned by [cache1, cache2, cache3], and segment 1 owned by [cache2, cache1, cache3]
// Trigger a rebalance with cache0 becoming an owner for both segments
// Wait for either cache1 or cache2 to send a StateResponseCommand
// Block the state response on cache0
// Kill the node that didn't receive the request
// Block new state requests from cache0 so that the killed node's segment doesn't have a transfer task
// Unblock the first state response
// Check that the StateResponseCommand hasn't marked state transfer as completed
// Unblock the new state request
// Wait for the state transfer to end and check that state hasn't been lost
StateSequencer sequencer = new StateSequencer();
sequencer.logicalThread("st", "st:block_first_state_response", "st:kill_node", "st:block_second_state_request",
"st:resume_first_state_response", "st:after_first_state_response", "st:check_incomplete",
"st:resume_second_state_request");
final AtomicReference<Address> firstResponseSender = new AtomicReference<>();
CommandMatcher firstStateResponseMatcher = new CommandMatcher() {
CommandMatcher realMatcher = matchCommand(StateResponseCommand.class).matchCount(0).build();
public boolean accept(ReplicableCommand command) {
if (!realMatcher.accept(command))
return false;
firstResponseSender.set(((StateResponseCommand) command).getOrigin());
return true;
}
};
advanceOnInboundRpc(sequencer, cache(0), firstStateResponseMatcher)
.before("st:block_first_state_response", "st:resume_first_state_response")
.after("st:after_first_state_response");
CommandMatcher secondStateRequestMatcher = new CommandMatcher() {
private final AtomicInteger counter = new AtomicInteger();
@Override
public boolean accept(ReplicableCommand command) {
if (command instanceof StateRequestCommand) {
StateRequestCommand stateRequestCommand = (StateRequestCommand) command;
if (stateRequestCommand.getType() == StateRequestCommand.Type.GET_TRANSACTIONS) {
// Commands 0 and 1 are sent during the first rebalance
// Command 2 is the first sent after the node is killed
if (counter.getAndIncrement() == 2)
return true;
log.debugf("Not blocking command %s", command);
}
}
return false;
}
};
advanceOnOutboundRpc(sequencer, cache(0), secondStateRequestMatcher)
.before("st:block_second_state_request", "st:resume_second_state_request");
final StateTransferManager stm0 = advancedCache(0).getComponentRegistry().getStateTransferManager();
MagicKey k1 = new MagicKey("k1", cache(1));
assertEquals(Arrays.asList(address(1), address(2), address(3)), stm0.getCacheTopology().getCurrentCH().locateOwners(k1));
cache(0).put(k1, "v1");
MagicKey k2 = new MagicKey("k2", cache(2));
assertEquals(Arrays.asList(address(2), address(1), address(3)), stm0.getCacheTopology().getCurrentCH().locateOwners(k2));
cache(0).put(k2, "v2");
// Start the rebalance
consistentHashFactory.setOwnerIndexes(new int[]{0, 1, 2}, new int[]{0, 2, 1});
consistentHashFactory.triggerRebalance(cache(0));
// Wait for cache0 to receive the state response
sequencer.enter("st:kill_node");
assertNotNull(stm0.getCacheTopology().getPendingCH());
// No need to update the owner indexes, the CH factory only knows about the cache members
int nodeToKeep = managerIndex(firstResponseSender.get());
int nodeToKill = nodeToKeep == 1 ? 2 : 1;
log.debugf("Blocked state response from %s, killing %s", firstResponseSender.get(), manager(nodeToKill));
cache(nodeToKill).stop();
eventually(new Condition() {
@Override
public boolean isSatisfied() throws Exception {
return stm0.getCacheTopology().getMembers().size() == 3;
}
});
sequencer.exit("st:kill_node");
sequencer.enter("st:check_incomplete");
assertTrue(stm0.isStateTransferInProgress());
sequencer.exit("st:check_incomplete");
// Only the 3 live caches are in the collection, wait for the rehash to end
waitForNoRebalance(cache(0), cache(nodeToKeep), cache(3));
assertTrue(stm0.getCacheTopology().getReadConsistentHash().isKeyLocalToNode(address(0), k1));
assertTrue(stm0.getCacheTopology().getReadConsistentHash().isKeyLocalToNode(address(0), k2));
assertEquals("v1", cache(0).get(k1));
assertEquals("v2", cache(0).get(k2));
}
}