package org.infinispan.partitionhandling;
import static org.infinispan.test.concurrent.StateSequencerUtil.advanceOnInboundRpc;
import static org.infinispan.test.concurrent.StateSequencerUtil.matchCommand;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.fail;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.infinispan.configuration.cache.CacheMode;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.distribution.MagicKey;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.manager.CacheContainer;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.partitionhandling.impl.PartitionHandlingManager;
import org.infinispan.remoting.transport.Address;
import org.infinispan.remoting.transport.Transport;
import org.infinispan.remoting.transport.jgroups.JGroupsAddress;
import org.infinispan.remoting.transport.jgroups.JGroupsTransport;
import org.infinispan.statetransfer.StateResponseCommand;
import org.infinispan.test.MultipleCacheManagersTest;
import org.infinispan.test.TestingUtil;
import org.infinispan.test.concurrent.StateSequencer;
import org.infinispan.topology.LocalTopologyManager;
import org.infinispan.util.ControlledConsistentHashFactory;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;
import org.jgroups.JChannel;
import org.jgroups.View;
import org.jgroups.protocols.DISCARD;
import org.jgroups.protocols.TP;
import org.jgroups.protocols.pbcast.GMS;
import org.jgroups.stack.ProtocolStack;
import org.testng.annotations.Test;
/**
* With a cluster made out of nodes {A,B,C,D}, tests that D crashes and before the state transfer finishes, another node
* C crashes. {A,B} should enter in degraded mode. The only way in which it could recover is explicitly, through JMX
* operations.
*/
@Test(groups = "functional", testName = "partitionhandling.NumOwnersNodeCrashInSequenceTest")
public class NumOwnersNodeCrashInSequenceTest extends MultipleCacheManagersTest {
private static Log log = LogFactory.getLog(NumOwnersNodeCrashInSequenceTest.class);
ControlledConsistentHashFactory cchf;
private ConfigurationBuilder configBuilder;
protected AvailabilityMode expectedAvailabilityMode;
public NumOwnersNodeCrashInSequenceTest() {
cleanup = CleanupPhase.AFTER_METHOD;
expectedAvailabilityMode = AvailabilityMode.DEGRADED_MODE;
}
@Override
protected void createCacheManagers() throws Throwable {
cchf = new ControlledConsistentHashFactory(new int[]{0, 1}, new int[]{1, 2},
new int[]{2, 3}, new int[]{3, 0});
configBuilder = getDefaultClusteredCacheConfig(CacheMode.DIST_SYNC);
configBuilder.clustering().partitionHandling().enabled(true);
configBuilder.clustering().hash().numSegments(4).stateTransfer().timeout(30000);
}
public void testNodeCrashedBeforeStFinished0() throws Exception {
testNodeCrashedBeforeStFinished(0, 1, 2, 3);
}
public void testNodeCrashedBeforeStFinished1() throws Exception {
testNodeCrashedBeforeStFinished(0, 2, 1, 3);
}
public void testNodeCrashedBeforeStFinished2() throws Exception {
testNodeCrashedBeforeStFinished(0, 3, 1, 2);
}
public void testNodeCrashedBeforeStFinished3() throws Exception {
testNodeCrashedBeforeStFinished(1, 2, 0, 3);
}
public void testNodeCrashedBeforeStFinished4() throws Exception {
testNodeCrashedBeforeStFinished(1, 3, 0, 2);
}
public void testNodeCrashedBeforeStFinished5() throws Exception {
testNodeCrashedBeforeStFinished(2, 3, 0, 1);
}
public void testNodeCrashedBeforeStFinished6() throws Exception {
testNodeCrashedBeforeStFinished(1, 2, 3, 0);
}
public void testNodeCrashedBeforeStFinished7() throws Exception {
testNodeCrashedBeforeStFinished(2, 3, 1, 0);
}
private void testNodeCrashedBeforeStFinished(final int a0, final int a1, final int c0, final int c1) throws Exception {
cchf.setOwnerIndexes(new int[]{a0, a1}, new int[]{a1, c0},
new int[]{c0, c1}, new int[]{c1, a0});
configBuilder.clustering().hash().consistentHashFactory(cchf);
createCluster(configBuilder, 4);
waitForClusterToForm();
Object k0 = new MagicKey("k1", cache(a0), cache(a1));
Object k1 = new MagicKey("k2", cache(a0), cache(a1));
Object k2 = new MagicKey("k3", cache(a1), cache(c0));
Object k3 = new MagicKey("k4", cache(a1), cache(c0));
Object k4 = new MagicKey("k5", cache(c0), cache(c1));
Object k5 = new MagicKey("k6", cache(c0), cache(c1));
Object k6 = new MagicKey("k7", cache(c1), cache(a0));
Object k7 = new MagicKey("k8", cache(c1), cache(a0));
final Object[] allKeys = new Object[] {k0, k1, k2, k3, k4, k5, k6, k7};
for (Object k : allKeys) cache(a0).put(k, k);
StateSequencer ss = new StateSequencer();
ss.logicalThread("main", "main:st_in_progress", "main:2nd_node_left", "main:cluster_degraded", "main:after_cluster_degraded");
advanceOnInboundRpc(ss, advancedCache(a1),
matchCommand(StateResponseCommand.class).matchCount(0).build())
.before("main:st_in_progress", "main:cluster_degraded");
// When the coordinator node stops gracefully there are two rebalance operations, one with the old coord
// and one with the new coord. The second
advanceOnInboundRpc(ss, advancedCache(a1),
matchCommand(StateResponseCommand.class).matchCount(1).build())
.before("main:after_cluster_degraded");
// Prepare for rebalance. Manager a1 will request state from c0 for segment 2
cchf.setMembersToUse(advancedCache(a0).getRpcManager().getTransport().getMembers());
cchf.setOwnerIndexes(new int[]{a0, a1}, new int[]{a1, c0},
new int[]{c0, a1}, new int[]{c0, a0});
Address missing = address(c1);
log.tracef("Before killing node %s", missing);
crashCacheManagers(manager(c1));
installNewView(advancedCache(a0).getRpcManager().getTransport().getMembers(), missing, manager(a0), manager(a1)
, manager(c0));
ss.enter("main:2nd_node_left");
missing = address(c0);
log.tracef("Killing 2nd node %s", missing);
crashCacheManagers(manager(c0));
installNewView(advancedCache(a0).getRpcManager().getTransport().getMembers(), missing, manager(a0), manager(a1));
final PartitionHandlingManager phm0 = TestingUtil.extractComponent(cache(a0), PartitionHandlingManager.class);
final PartitionHandlingManager phm1 = TestingUtil.extractComponent(cache(a1), PartitionHandlingManager.class);
eventually(new Condition() {
@Override
public boolean isSatisfied() throws Exception {
return phm0.getAvailabilityMode() == expectedAvailabilityMode && phm1.getAvailabilityMode() == expectedAvailabilityMode;
}
});
ss.exit("main:2nd_node_left");
log.trace("Testing condition");
ConsistentHash ch = cache(a0).getAdvancedCache().getDistributionManager().getReadConsistentHash();
assertEquals(3, ch.getMembers().size());
for (Object k : allKeys) {
Collection<Address> owners = ch.locateOwners(k);
try {
cache(a0).get(k);
if (owners.contains(address(c0)) || owners.contains(address(c1))) {
fail("get(" + k + ") should have failed on cache " + address(a0));
}
} catch (AvailabilityException e) {
}
try {
cache(a1).put(k, k);
if (owners.contains(address(c0)) || owners.contains(address(c1))) {
fail("put(" + k + ", v) should have failed on cache " + address(a0));
}
} catch (AvailabilityException e) {
}
}
log.debug("Changing partition availability mode back to AVAILABLE");
cchf.setOwnerIndexes(new int[]{a0, a1}, new int[]{a1, a0},
new int[]{a0, a1}, new int[]{a1, a0});
LocalTopologyManager ltm = TestingUtil.extractGlobalComponent(manager(a0), LocalTopologyManager.class);
ltm.setCacheAvailability(CacheContainer.DEFAULT_CACHE_NAME, AvailabilityMode.AVAILABLE);
TestingUtil.waitForNoRebalance(cache(a0), cache(a1));
eventuallyEquals(AvailabilityMode.AVAILABLE, phm0::getAvailabilityMode);
}
private void installNewView(List<Address> members, Address missing, EmbeddedCacheManager... where) {
log.tracef("installNewView:members=%s, missing=%s", members, missing);
final List<org.jgroups.Address> viewMembers = new ArrayList<org.jgroups.Address>();
for (Address a : members)
if (!a.equals(missing))
viewMembers.add(((JGroupsAddress) a).getJGroupsAddress());
int viewId = where[0].getTransport().getViewId() + 1;
View view = View.create(viewMembers.get(0), viewId, viewMembers.toArray(new org.jgroups.Address[viewMembers.size()]));
log.trace("Before installing new view:" + viewMembers);
for (EmbeddedCacheManager ecm : where) {
JChannel c = ((JGroupsTransport) ecm.getTransport()).getChannel();
((GMS) c.getProtocolStack().findProtocol(GMS.class)).installView(view);
}
}
/**
* Simulates a node crash, discarding all the messages from/to this node and then stopping the caches.
*/
protected void crashCacheManagers(EmbeddedCacheManager... cacheManagers) {
for (EmbeddedCacheManager cm : cacheManagers) {
JGroupsTransport t = (JGroupsTransport) cm.getGlobalComponentRegistry().getComponent(Transport.class);
JChannel channel = t.getChannel();
try {
DISCARD discard = new DISCARD();
discard.setDiscardAll(true);
channel.getProtocolStack().insertProtocol(discard, ProtocolStack.Position.ABOVE, TP.class);
} catch (Exception e) {
log.warn("Problems inserting discard", e);
throw new RuntimeException(e);
}
View view = View.create(channel.getAddress(), 100, channel.getAddress());
((GMS) channel.getProtocolStack().findProtocol(GMS.class)).installView(view);
}
TestingUtil.killCacheManagers(cacheManagers);
}
}