/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.cluster;
import com.hazelcast.config.Config;
import com.hazelcast.config.JoinConfig;
import com.hazelcast.config.ListenerConfig;
import com.hazelcast.config.NetworkConfig;
import com.hazelcast.core.Hazelcast;
import com.hazelcast.core.HazelcastInstance;
import com.hazelcast.core.LifecycleEvent;
import com.hazelcast.core.LifecycleEvent.LifecycleState;
import com.hazelcast.core.LifecycleListener;
import com.hazelcast.core.MemberAttributeEvent;
import com.hazelcast.core.MembershipAdapter;
import com.hazelcast.core.MembershipEvent;
import com.hazelcast.core.MembershipListener;
import com.hazelcast.instance.FirewallingNodeContext;
import com.hazelcast.instance.HazelcastInstanceFactory;
import com.hazelcast.map.merge.PassThroughMergePolicy;
import com.hazelcast.nio.tcp.FirewallingConnectionManager;
import com.hazelcast.spi.properties.GroupProperty;
import com.hazelcast.test.HazelcastSerialClassRunner;
import com.hazelcast.test.HazelcastTestSupport;
import com.hazelcast.test.annotation.NightlyTest;
import com.hazelcast.util.Clock;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import static com.hazelcast.cluster.ClusterState.ACTIVE;
import static com.hazelcast.cluster.ClusterState.FROZEN;
import static com.hazelcast.instance.HazelcastInstanceFactory.newHazelcastInstance;
import static com.hazelcast.internal.cluster.impl.AdvancedClusterStateTest.changeClusterStateEventually;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@RunWith(HazelcastSerialClassRunner.class)
@Category(NightlyTest.class)
public class SplitBrainHandlerTest extends HazelcastTestSupport {
@Before
@After
public void killAllHazelcastInstances() throws IOException {
HazelcastInstanceFactory.terminateAll();
}
@Test
public void testMulticast_ClusterMerge() throws Exception {
testClusterMerge(true);
}
@Test
public void testTcpIp_ClusterMerge() throws Exception {
testClusterMerge(false);
}
private void testClusterMerge(boolean multicast) throws Exception {
Config config1 = new Config();
config1.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
config1.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
String firstGroupName = generateRandomString(10);
config1.getGroupConfig().setName(firstGroupName);
NetworkConfig networkConfig1 = config1.getNetworkConfig();
JoinConfig join1 = networkConfig1.getJoin();
join1.getMulticastConfig().setEnabled(multicast);
join1.getTcpIpConfig().setEnabled(!multicast);
join1.getTcpIpConfig().addMember("127.0.0.1");
Config config2 = new Config();
config2.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
config2.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
String secondGroupName = generateRandomString(10);
config2.getGroupConfig().setName(secondGroupName);
NetworkConfig networkConfig2 = config2.getNetworkConfig();
JoinConfig join2 = networkConfig2.getJoin();
join2.getMulticastConfig().setEnabled(multicast);
join2.getTcpIpConfig().setEnabled(!multicast);
join2.getTcpIpConfig().addMember("127.0.0.1");
HazelcastInstance h1 = Hazelcast.newHazelcastInstance(config1);
HazelcastInstance h2 = Hazelcast.newHazelcastInstance(config2);
LifecycleCountingListener l = new LifecycleCountingListener();
h2.getLifecycleService().addLifecycleListener(l);
assertClusterSize(1, h1);
assertClusterSize(1, h2);
// warning: assuming group name will be visible to the split brain handler!
config1.getGroupConfig().setName(secondGroupName);
assertTrue(l.waitFor(LifecycleState.MERGED, 30));
assertEquals(1, l.getCount(LifecycleState.MERGING));
assertEquals(1, l.getCount(LifecycleState.MERGED));
assertClusterSize(2, h1, h2);
assertClusterState(ACTIVE, h1, h2);
}
@Test
public void testClusterShouldNotMergeDifferentGroupName() throws Exception {
Config config1 = new Config();
config1.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
config1.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
String firstGroupName = generateRandomString(10);
config1.getGroupConfig().setName(firstGroupName);
NetworkConfig networkConfig1 = config1.getNetworkConfig();
JoinConfig join1 = networkConfig1.getJoin();
join1.getMulticastConfig().setEnabled(true);
join1.getTcpIpConfig().addMember("127.0.0.1");
Config config2 = new Config();
config2.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
config2.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
String secondGroupName = generateRandomString(10);
config2.getGroupConfig().setName(secondGroupName);
NetworkConfig networkConfig2 = config2.getNetworkConfig();
JoinConfig join2 = networkConfig2.getJoin();
join2.getMulticastConfig().setEnabled(true);
join2.getTcpIpConfig().addMember("127.0.0.1");
HazelcastInstance h1 = Hazelcast.newHazelcastInstance(config1);
HazelcastInstance h2 = Hazelcast.newHazelcastInstance(config2);
LifecycleCountingListener l = new LifecycleCountingListener();
h2.getLifecycleService().addLifecycleListener(l);
assertClusterSize(1, h1);
assertClusterSize(1, h2);
HazelcastTestSupport.sleepSeconds(10);
assertEquals(0, l.getCount(LifecycleState.MERGING));
assertEquals(0, l.getCount(LifecycleState.MERGED));
assertClusterSize(1, h1);
assertClusterSize(1, h2);
}
private static class LifecycleCountingListener implements LifecycleListener {
Map<LifecycleState, AtomicInteger> counter = new ConcurrentHashMap<LifecycleState, AtomicInteger>();
BlockingQueue<LifecycleState> eventQueue = new LinkedBlockingQueue<LifecycleState>();
LifecycleCountingListener() {
for (LifecycleEvent.LifecycleState state : LifecycleEvent.LifecycleState.values()) {
counter.put(state, new AtomicInteger(0));
}
}
public void stateChanged(LifecycleEvent event) {
counter.get(event.getState()).incrementAndGet();
eventQueue.offer(event.getState());
}
int getCount(LifecycleEvent.LifecycleState state) {
return counter.get(state).get();
}
boolean waitFor(LifecycleEvent.LifecycleState state, int seconds) {
long remainingMillis = TimeUnit.SECONDS.toMillis(seconds);
while (remainingMillis >= 0) {
LifecycleEvent.LifecycleState received = null;
try {
long now = Clock.currentTimeMillis();
received = eventQueue.poll(remainingMillis, TimeUnit.MILLISECONDS);
remainingMillis -= (Clock.currentTimeMillis() - now);
} catch (InterruptedException e) {
return false;
}
if (received != null && received == state) {
return true;
}
}
return false;
}
}
@Test
public void testMulticast_MergeAfterSplitBrain() throws InterruptedException {
testMergeAfterSplitBrain(true);
}
@Test
public void testTcpIp_MergeAfterSplitBrain() throws InterruptedException {
testMergeAfterSplitBrain(false);
}
private void testMergeAfterSplitBrain(boolean multicast) throws InterruptedException {
String groupName = generateRandomString(10);
Config config = new Config();
config.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
config.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
config.getGroupConfig().setName(groupName);
NetworkConfig networkConfig = config.getNetworkConfig();
JoinConfig join = networkConfig.getJoin();
join.getMulticastConfig().setEnabled(multicast);
join.getTcpIpConfig().setEnabled(!multicast);
join.getTcpIpConfig().addMember("127.0.0.1");
HazelcastInstance h1 = Hazelcast.newHazelcastInstance(config);
HazelcastInstance h2 = Hazelcast.newHazelcastInstance(config);
HazelcastInstance h3 = Hazelcast.newHazelcastInstance(config);
assertClusterSize(3, h1, h3);
assertClusterSizeEventually(3, h2);
final CountDownLatch splitLatch = new CountDownLatch(2);
h3.getCluster().addMembershipListener(new MembershipListener() {
@Override
public void memberAdded(MembershipEvent membershipEvent) {
}
@Override
public void memberRemoved(MembershipEvent membershipEvent) {
splitLatch.countDown();
}
@Override
public void memberAttributeChanged(MemberAttributeEvent memberAttributeEvent) {
}
});
final CountDownLatch mergeLatch = new CountDownLatch(1);
h3.getLifecycleService().addLifecycleListener(new MergedEventLifeCycleListener(mergeLatch));
closeConnectionBetween(h1, h3);
closeConnectionBetween(h2, h3);
assertTrue(splitLatch.await(10, TimeUnit.SECONDS));
assertClusterSizeEventually(2, h1, h2);
assertClusterSize(1, h3);
assertTrue(mergeLatch.await(30, TimeUnit.SECONDS));
assertClusterSizeEventually(3, h1, h2, h3);
assertClusterState(ACTIVE, h1, h2, h3);
}
@Test
public void testTcpIpSplitBrainJoinsCorrectCluster() throws Exception {
// This port selection ensures that when h3 restarts it will try to join h4 instead of joining the nodes in cluster one
Config c1 = buildConfig(false, 15702);
Config c2 = buildConfig(false, 15704);
Config c3 = buildConfig(false, 15703);
Config c4 = buildConfig(false, 15701);
List<String> clusterOneMembers = Arrays.asList("127.0.0.1:15702", "127.0.0.1:15704");
List<String> clusterTwoMembers = Arrays.asList("127.0.0.1:15703", "127.0.0.1:15701");
c1.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterOneMembers);
c2.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterOneMembers);
c3.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterTwoMembers);
c4.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterTwoMembers);
final CountDownLatch latch = new CountDownLatch(2);
c3.addListenerConfig(new ListenerConfig(new MergedEventLifeCycleListener(latch)));
c4.addListenerConfig(new ListenerConfig(new MergedEventLifeCycleListener(latch)));
HazelcastInstance h1 = Hazelcast.newHazelcastInstance(c1);
HazelcastInstance h2 = Hazelcast.newHazelcastInstance(c2);
HazelcastInstance h3 = Hazelcast.newHazelcastInstance(c3);
HazelcastInstance h4 = Hazelcast.newHazelcastInstance(c4);
// We should have two clusters of two
assertClusterSize(2, h1, h2);
assertClusterSize(2, h3, h4);
List<String> allMembers = Arrays.asList("127.0.0.1:15701", "127.0.0.1:15704", "127.0.0.1:15703",
"127.0.0.1:15702");
/*
* This simulates restoring a network connection between h3 and the
* other cluster. But it only make h3 aware of the other cluster so for
* h4 to restart it will have to be notified by h3.
*/
h3.getConfig().getNetworkConfig().getJoin().getTcpIpConfig().setMembers(allMembers);
h4.getConfig().getNetworkConfig().getJoin().getTcpIpConfig().clear().setMembers(Collections.<String>emptyList());
assertTrue(latch.await(60, TimeUnit.SECONDS));
// Both nodes from cluster two should have joined cluster one
assertClusterSizeEventually(4, h1, h2, h3, h4);
}
@Test
public void testTcpIpSplitBrainStillWorks_WhenTargetDisappears() throws Exception {
// The ports are ordered like this so h3 will always attempt to merge with h1
Config c1 = buildConfig(false, 25701);
Config c2 = buildConfig(false, 25704);
Config c3 = buildConfig(false, 25703);
List<String> clusterOneMembers = Arrays.asList("127.0.0.1:25701");
List<String> clusterTwoMembers = Arrays.asList("127.0.0.1:25704");
List<String> clusterThreeMembers = Arrays.asList("127.0.0.1:25703");
c1.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterOneMembers);
c2.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterTwoMembers);
c3.getNetworkConfig().getJoin().getTcpIpConfig().setMembers(clusterThreeMembers);
final HazelcastInstance h1 = Hazelcast.newHazelcastInstance(c1);
final HazelcastInstance h2 = Hazelcast.newHazelcastInstance(c2);
final CountDownLatch latch = new CountDownLatch(1);
c3.addListenerConfig(new ListenerConfig(new LifecycleListener() {
public void stateChanged(final LifecycleEvent event) {
if (event.getState() == LifecycleState.MERGING) {
h1.shutdown();
} else if (event.getState() == LifecycleState.MERGED) {
latch.countDown();
}
}
}));
final HazelcastInstance h3 = Hazelcast.newHazelcastInstance(c3);
// We should have three clusters of one
assertClusterSize(1, h1);
assertClusterSize(1, h2);
assertClusterSize(1, h3);
List<String> allMembers = Arrays.asList("127.0.0.1:25701", "127.0.0.1:25704", "127.0.0.1:25703");
h3.getConfig().getNetworkConfig().getJoin().getTcpIpConfig().setMembers(allMembers);
assertTrue(latch.await(60, TimeUnit.SECONDS));
// Both nodes from cluster two should have joined cluster one
assertFalse(h1.getLifecycleService().isRunning());
assertClusterSize(2, h2, h3);
}
private static Config buildConfig(boolean multicastEnabled, int port) {
Config c = new Config();
c.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
c.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
NetworkConfig networkConfig = c.getNetworkConfig();
networkConfig.setPort(port).setPortAutoIncrement(false);
networkConfig.getJoin().getMulticastConfig().setEnabled(multicastEnabled);
networkConfig.getJoin().getTcpIpConfig().setEnabled(!multicastEnabled);
return c;
}
@Test
public void testMulticastJoin_DuringSplitBrainHandlerRunning() throws InterruptedException {
String groupName = generateRandomString(10);
final CountDownLatch latch = new CountDownLatch(1);
Config config1 = new Config();
// bigger port to make sure address.hashCode() check pass during merge!
config1.getNetworkConfig().setPort(5901);
config1.getGroupConfig().setName(groupName);
config1.setProperty(GroupProperty.WAIT_SECONDS_BEFORE_JOIN.getName(), "5");
config1.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "0");
config1.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "0");
config1.addListenerConfig(new ListenerConfig(new LifecycleListener() {
public void stateChanged(final LifecycleEvent event) {
switch (event.getState()) {
case MERGING:
case MERGED:
latch.countDown();
default:
break;
}
}
}));
Hazelcast.newHazelcastInstance(config1);
Thread.sleep(5000);
Config config2 = new Config();
config2.getGroupConfig().setName(groupName);
config2.getNetworkConfig().setPort(5701);
config2.setProperty(GroupProperty.WAIT_SECONDS_BEFORE_JOIN.getName(), "5");
config2.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "0");
config2.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "0");
Hazelcast.newHazelcastInstance(config2);
assertFalse("Latch should not be countdown!", latch.await(3, TimeUnit.SECONDS));
}
@Test
public void testMulticast_ClusterMerge_when_split_not_detected_by_master() throws InterruptedException {
testClusterMerge_when_split_not_detected_by_master(true);
}
@Test
// https://github.com/hazelcast/hazelcast/issues/8137
public void testTcpIp_ClusterMerge_when_split_not_detected_by_master() throws InterruptedException {
testClusterMerge_when_split_not_detected_by_master(false);
}
private void testClusterMerge_when_split_not_detected_by_master(boolean multicastEnabled) throws InterruptedException {
Config config = new Config();
String groupName = generateRandomString(10);
config.getGroupConfig().setName(groupName);
config.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MAX_NO_HEARTBEAT_SECONDS.getName(), "15");
config.setProperty(GroupProperty.MAX_JOIN_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MAX_JOIN_MERGE_TARGET_SECONDS.getName(), "10");
NetworkConfig networkConfig = config.getNetworkConfig();
networkConfig.getJoin().getMulticastConfig().setEnabled(multicastEnabled);
networkConfig.getJoin().getTcpIpConfig().setEnabled(!multicastEnabled).addMember("127.0.0.1");
final HazelcastInstance hz1 = newHazelcastInstance(config, "test-node1", new FirewallingNodeContext());
HazelcastInstance hz2 = newHazelcastInstance(config, "test-node2", new FirewallingNodeContext());
HazelcastInstance hz3 = newHazelcastInstance(config, "test-node3", new FirewallingNodeContext());
assertClusterSize(3, hz1, hz3);
assertClusterSizeEventually(3, hz2);
final CountDownLatch splitLatch = new CountDownLatch(2);
MembershipAdapter membershipAdapter = new MembershipAdapter() {
@Override
public void memberRemoved(MembershipEvent event) {
if (getNode(hz1).getLocalMember().equals(event.getMember())) {
splitLatch.countDown();
}
}
};
hz2.getCluster().addMembershipListener(membershipAdapter);
hz3.getCluster().addMembershipListener(membershipAdapter);
final CountDownLatch mergeLatch = new CountDownLatch(1);
hz1.getLifecycleService().addLifecycleListener(new MergedEventLifeCycleListener(mergeLatch));
// block n2 & n3 on n1
block(hz1, hz2);
block(hz1, hz3);
// remove and block n1 on n2 & n3
disconnect(hz2, hz1);
disconnect(hz3, hz1);
block(hz2, hz1);
block(hz3, hz1);
assertTrue(splitLatch.await(120, TimeUnit.SECONDS));
assertClusterSize(3, hz1);
assertClusterSize(2, hz2, hz3);
// unblock n2 on n1 and n1 on n2 & n3
// n1 still blocks access to n3
unblock(hz1, hz2);
unblock(hz2, hz1);
unblock(hz3, hz1);
assertTrue(mergeLatch.await(120, TimeUnit.SECONDS));
assertClusterSizeEventually(3, hz1, hz2, hz3);
assertMasterAddress(getAddress(hz2), hz1, hz2, hz3);
}
@Test
public void testClusterMerge_ignoresLiteMembers() {
String groupName = generateRandomString(10);
HazelcastInstance lite1 = newHazelcastInstance(buildConfig(groupName, true), "lite1", new FirewallingNodeContext());
HazelcastInstance lite2 = newHazelcastInstance(buildConfig(groupName, true), "lite2", new FirewallingNodeContext());
HazelcastInstance data1 = newHazelcastInstance(buildConfig(groupName, false), "data1", new FirewallingNodeContext());
HazelcastInstance data2 = newHazelcastInstance(buildConfig(groupName, false), "data2", new FirewallingNodeContext());
HazelcastInstance data3 = newHazelcastInstance(buildConfig(groupName, false), "data3", new FirewallingNodeContext());
assertClusterSize(5, lite1, data3);
assertClusterSizeEventually(5, lite2, data1, data2);
final CountDownLatch splitLatch = new CountDownLatch(6);
data2.getCluster().addMembershipListener(new MemberRemovedMembershipListener(splitLatch));
data3.getCluster().addMembershipListener(new MemberRemovedMembershipListener(splitLatch));
final CountDownLatch mergeLatch = new CountDownLatch(3);
lite1.getLifecycleService().addLifecycleListener(new MergedEventLifeCycleListener(mergeLatch));
lite2.getLifecycleService().addLifecycleListener(new MergedEventLifeCycleListener(mergeLatch));
data1.getLifecycleService().addLifecycleListener(new MergedEventLifeCycleListener(mergeLatch));
block(lite1, data2);
block(lite2, data2);
block(data1, data2);
block(lite1, data3);
block(lite2, data3);
block(data1, data3);
disconnect(data2, lite1);
disconnect(data2, lite2);
disconnect(data2, data1);
disconnect(data3, lite1);
disconnect(data3, lite2);
disconnect(data3, data1);
disconnect(lite1, data2);
disconnect(lite2, data2);
disconnect(data1, data2);
disconnect(lite1, data3);
disconnect(lite2, data3);
disconnect(data1, data3);
assertOpenEventually(splitLatch, 10);
assertClusterSizeEventually(3, lite1, lite2, data1);
assertClusterSize(2, data2, data3);
data1.getMap("default").put(1, "cluster1");
data3.getMap("default").put(1, "cluster2");
unblock(lite1, data2);
unblock(lite2, data2);
unblock(data1, data2);
unblock(lite1, data3);
unblock(lite2, data3);
unblock(data1, data3);
assertOpenEventually(mergeLatch, 120);
assertClusterSizeEventually(5, lite1, lite2, data1, data2, data3);
assertEquals("cluster1", lite1.getMap("default").get(1));
}
@Test
public void testClustersShouldNotMergeWhenBiggerClusterIsNotActive() {
String groupName = generateRandomString(10);
HazelcastInstance hz1 = newHazelcastInstance(buildConfig(groupName, false), "hz1", new FirewallingNodeContext());
HazelcastInstance hz2 = newHazelcastInstance(buildConfig(groupName, false), "hz2", new FirewallingNodeContext());
HazelcastInstance hz3 = newHazelcastInstance(buildConfig(groupName, false), "hz3", new FirewallingNodeContext());
assertClusterSize(3, hz1, hz3);
assertClusterSizeEventually(3, hz2);
final CountDownLatch splitLatch = new CountDownLatch(2);
hz3.getCluster().addMembershipListener(new MemberRemovedMembershipListener(splitLatch));
block(hz1, hz3);
block(hz2, hz3);
disconnect(hz3, hz1);
disconnect(hz3, hz2);
disconnect(hz1, hz3);
disconnect(hz2, hz3);
assertOpenEventually(splitLatch, 10);
assertClusterSizeEventually(2, hz1, hz2);
assertClusterSize(1, hz3);
changeClusterStateEventually(hz1, FROZEN);
unblock(hz1, hz3);
unblock(hz2, hz3);
sleepAtLeastSeconds(10);
assertClusterSize(2, hz1, hz2);
assertClusterSize(1, hz3);
}
@Test
public void testClustersShouldNotMergeWhenSmallerClusterIsNotActive() {
String groupName = generateRandomString(10);
HazelcastInstance hz1 = newHazelcastInstance(buildConfig(groupName, false), "hz1", new FirewallingNodeContext());
HazelcastInstance hz2 = newHazelcastInstance(buildConfig(groupName, false), "hz2", new FirewallingNodeContext());
HazelcastInstance hz3 = newHazelcastInstance(buildConfig(groupName, false), "hz3", new FirewallingNodeContext());
assertClusterSize(3, hz1, hz3);
assertClusterSizeEventually(3, hz2);
final CountDownLatch splitLatch = new CountDownLatch(2);
hz3.getCluster().addMembershipListener(new MemberRemovedMembershipListener(splitLatch));
block(hz1, hz3);
block(hz2, hz3);
disconnect(hz3, hz1);
disconnect(hz3, hz2);
disconnect(hz1, hz3);
disconnect(hz2, hz3);
assertOpenEventually(splitLatch, 10);
assertClusterSizeEventually(2, hz1, hz2);
assertClusterSize(1, hz3);
changeClusterStateEventually(hz3, FROZEN);
unblock(hz1, hz3);
unblock(hz2, hz3);
sleepAtLeastSeconds(10);
assertClusterSize(2, hz1, hz2);
assertClusterSize(1, hz3);
}
private void block(final HazelcastInstance source, final HazelcastInstance target) {
getFireWalledConnectionManager(source).block(getNode(target).address);
getFireWalledConnectionManager(target).block(getNode(source).address);
}
private void unblock(final HazelcastInstance source, final HazelcastInstance target) {
getFireWalledConnectionManager(source).unblock(getNode(target).address);
getFireWalledConnectionManager(target).unblock(getNode(source).address);
}
private void disconnect(final HazelcastInstance source, final HazelcastInstance target) {
suspectMember(getNode(source), getNode(target));
}
private Config buildConfig(final String groupName, final boolean liteMember) {
Config config = new Config();
config.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "5");
config.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "3");
config.getGroupConfig().setName(groupName);
config.setLiteMember(liteMember);
NetworkConfig networkConfig = config.getNetworkConfig();
JoinConfig join = networkConfig.getJoin();
join.getMulticastConfig().setEnabled(true);
config.getMapConfig("default").setMergePolicy(PassThroughMergePolicy.class.getName());
return config;
}
@Test
// https://github.com/hazelcast/hazelcast/issues/8137
public void testClusterMerge_when_split_not_detected_by_slave() throws InterruptedException {
Config config = new Config();
String groupName = generateRandomString(10);
config.getGroupConfig().setName(groupName);
config.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MAX_NO_HEARTBEAT_SECONDS.getName(), "15");
config.setProperty(GroupProperty.MAX_JOIN_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MAX_JOIN_MERGE_TARGET_SECONDS.getName(), "10");
NetworkConfig networkConfig = config.getNetworkConfig();
networkConfig.getJoin().getMulticastConfig().setEnabled(false);
networkConfig.getJoin().getTcpIpConfig()
.setEnabled(true).addMember("127.0.0.1");
HazelcastInstance hz1 = newHazelcastInstance(config, "test-node1", new FirewallingNodeContext());
HazelcastInstance hz2 = newHazelcastInstance(config, "test-node2", new FirewallingNodeContext());
final HazelcastInstance hz3 = newHazelcastInstance(config, "test-node3", new FirewallingNodeContext());
assertClusterSize(3, hz1, hz3);
assertClusterSizeEventually(3, hz2);
final CountDownLatch splitLatch = new CountDownLatch(2);
MembershipAdapter membershipAdapter = new MembershipAdapter() {
@Override
public void memberRemoved(MembershipEvent event) {
if (getNode(hz3).getLocalMember().equals(event.getMember())) {
splitLatch.countDown();
}
}
};
hz1.getCluster().addMembershipListener(membershipAdapter);
hz2.getCluster().addMembershipListener(membershipAdapter);
final CountDownLatch mergeLatch = new CountDownLatch(1);
hz3.getLifecycleService().addLifecycleListener(new MergedEventLifeCycleListener(mergeLatch));
block(hz3, hz1);
block(hz3, hz2);
disconnect(hz1, hz3);
disconnect(hz2, hz3);
block(hz1, hz3);
block(hz2, hz3);
assertTrue(splitLatch.await(30, TimeUnit.SECONDS));
assertClusterSize(2, hz1, hz2);
assertClusterSize(3, hz3);
unblock(hz3, hz1);
unblock(hz1, hz3);
unblock(hz2, hz3);
assertTrue(mergeLatch.await(120, TimeUnit.SECONDS));
assertClusterSizeEventually(3, hz1, hz2, hz3);
assertMasterAddress(getAddress(hz1), hz1, hz2, hz3);
}
@Test
// https://github.com/hazelcast/hazelcast/issues/8137
public void testClusterMerge_when_split_not_detected_by_slave_and_restart_during_merge() throws InterruptedException {
Config config = new Config();
String groupName = generateRandomString(10);
config.getGroupConfig().setName(groupName);
config.setProperty(GroupProperty.MERGE_FIRST_RUN_DELAY_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MERGE_NEXT_RUN_DELAY_SECONDS.getName(), "10");
config.setProperty(GroupProperty.MAX_NO_HEARTBEAT_SECONDS.getName(), "15");
config.setProperty(GroupProperty.MAX_JOIN_SECONDS.getName(), "40");
config.setProperty(GroupProperty.MAX_JOIN_MERGE_TARGET_SECONDS.getName(), "10");
NetworkConfig networkConfig = config.getNetworkConfig();
networkConfig.getJoin().getMulticastConfig().setEnabled(false);
networkConfig.getJoin().getTcpIpConfig()
.setEnabled(true).addMember("127.0.0.1:5701").addMember("127.0.0.1:5702").addMember("127.0.0.1:5703");
networkConfig.setPort(5702);
HazelcastInstance hz2 = newHazelcastInstance(config, "test-node2", new FirewallingNodeContext());
networkConfig.setPort(5703);
HazelcastInstance hz3 = newHazelcastInstance(config, "test-node3", new FirewallingNodeContext());
networkConfig.setPort(5701);
final HazelcastInstance hz1 = newHazelcastInstance(config, "test-node1", new FirewallingNodeContext());
assertClusterSize(3, hz2, hz1);
assertClusterSizeEventually(3, hz3);
final CountDownLatch splitLatch = new CountDownLatch(2);
MembershipAdapter membershipAdapter = new MembershipAdapter() {
@Override
public void memberRemoved(MembershipEvent event) {
if (getNode(hz1).getLocalMember().equals(event.getMember())) {
splitLatch.countDown();
}
}
};
hz2.getCluster().addMembershipListener(membershipAdapter);
hz3.getCluster().addMembershipListener(membershipAdapter);
final CountDownLatch mergingLatch = new CountDownLatch(1);
final CountDownLatch mergeLatch = new CountDownLatch(1);
LifecycleListener lifecycleListener = new LifecycleListener() {
@Override
public void stateChanged(LifecycleEvent event) {
if (event.getState() == LifecycleState.MERGING) {
mergingLatch.countDown();
}
if (event.getState() == LifecycleState.MERGED) {
mergeLatch.countDown();
}
}
};
hz1.getLifecycleService().addLifecycleListener(lifecycleListener);
block(hz1, hz2);
block(hz1, hz3);
disconnect(hz2, hz1);
disconnect(hz3, hz1);
block(hz2, hz1);
block(hz3, hz1);
assertTrue(splitLatch.await(20, TimeUnit.SECONDS));
assertClusterSize(2, hz2, hz3);
assertClusterSize(3, hz1);
unblock(hz1, hz2);
unblock(hz1, hz3);
unblock(hz2, hz1);
unblock(hz3, hz1);
assertTrue(mergingLatch.await(60, TimeUnit.SECONDS));
hz2.getLifecycleService().terminate();
hz2 = newHazelcastInstance(config, "test-node2", new FirewallingNodeContext());
assertTrue(mergeLatch.await(120, TimeUnit.SECONDS));
assertClusterSizeEventually(3, hz1, hz2, hz3);
assertMasterAddress(getAddress(hz3), hz1, hz2, hz3);
}
private static FirewallingConnectionManager getFireWalledConnectionManager(HazelcastInstance hz) {
return (FirewallingConnectionManager) getConnectionManager(hz);
}
public static class MergedEventLifeCycleListener
implements LifecycleListener {
private final CountDownLatch mergeLatch;
public MergedEventLifeCycleListener(CountDownLatch mergeLatch) {
this.mergeLatch = mergeLatch;
}
public void stateChanged(LifecycleEvent event) {
if (event.getState() == LifecycleState.MERGED) {
mergeLatch.countDown();
}
}
}
private static class MemberRemovedMembershipListener
implements MembershipListener {
private final CountDownLatch splitLatch;
public MemberRemovedMembershipListener(CountDownLatch splitLatch) {
this.splitLatch = splitLatch;
}
@Override
public void memberAdded(MembershipEvent membershipEvent) {
}
@Override
public void memberRemoved(MembershipEvent membershipEvent) {
splitLatch.countDown();
}
@Override
public void memberAttributeChanged(MemberAttributeEvent memberAttributeEvent) {
}
}
}