/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.geode.distributed; import static org.apache.geode.distributed.ConfigurationProperties.DISABLE_AUTO_RECONNECT; import static org.apache.geode.distributed.ConfigurationProperties.ENABLE_CLUSTER_CONFIGURATION; import static org.apache.geode.distributed.ConfigurationProperties.ENABLE_NETWORK_PARTITION_DETECTION; import static org.apache.geode.distributed.ConfigurationProperties.LOCATORS; import static org.apache.geode.distributed.ConfigurationProperties.LOG_LEVEL; import static org.apache.geode.distributed.ConfigurationProperties.MCAST_PORT; import static org.apache.geode.distributed.ConfigurationProperties.MCAST_TTL; import static org.apache.geode.distributed.ConfigurationProperties.MEMBER_TIMEOUT; import static org.apache.geode.distributed.ConfigurationProperties.SECURITY_PEER_AUTHENTICATOR; import static org.apache.geode.distributed.ConfigurationProperties.SECURITY_PEER_AUTH_INIT; import static org.apache.geode.distributed.ConfigurationProperties.SSL_CIPHERS; import static org.apache.geode.distributed.ConfigurationProperties.SSL_ENABLED_COMPONENTS; import static org.apache.geode.distributed.ConfigurationProperties.SSL_KEYSTORE; import static org.apache.geode.distributed.ConfigurationProperties.SSL_KEYSTORE_PASSWORD; import static org.apache.geode.distributed.ConfigurationProperties.SSL_KEYSTORE_TYPE; import static org.apache.geode.distributed.ConfigurationProperties.SSL_LOCATOR_ALIAS; import static org.apache.geode.distributed.ConfigurationProperties.SSL_PROTOCOLS; import static org.apache.geode.distributed.ConfigurationProperties.SSL_REQUIRE_AUTHENTICATION; import static org.apache.geode.distributed.ConfigurationProperties.SSL_TRUSTSTORE; import static org.apache.geode.distributed.ConfigurationProperties.SSL_TRUSTSTORE_PASSWORD; import static org.apache.geode.distributed.ConfigurationProperties.START_LOCATOR; import static org.apache.geode.distributed.ConfigurationProperties.USE_CLUSTER_CONFIGURATION; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.jayway.awaitility.Awaitility; import org.apache.geode.ForcedDisconnectException; import org.apache.geode.GemFireConfigException; import org.apache.geode.LogWriter; import org.apache.geode.SystemConnectException; import org.apache.geode.cache.Cache; import org.apache.geode.cache.CacheFactory; import org.apache.geode.cache.Region; import org.apache.geode.cache.RegionShortcut; import org.apache.geode.distributed.internal.DistributionException; import org.apache.geode.distributed.internal.DistributionManager; import org.apache.geode.distributed.internal.InternalDistributedSystem; import org.apache.geode.distributed.internal.InternalLocator; import org.apache.geode.distributed.internal.MembershipListener; import org.apache.geode.distributed.internal.membership.InternalDistributedMember; import org.apache.geode.distributed.internal.membership.MembershipManager; import org.apache.geode.distributed.internal.membership.MembershipTestHook; import org.apache.geode.distributed.internal.membership.NetView; import org.apache.geode.distributed.internal.membership.gms.MembershipManagerHelper; import org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeaveTestHelper; import org.apache.geode.internal.Assert; import org.apache.geode.internal.AvailablePort; import org.apache.geode.internal.AvailablePortHelper; import org.apache.geode.internal.cache.GemFireCacheImpl; import org.apache.geode.internal.logging.InternalLogWriter; import org.apache.geode.internal.logging.LocalLogWriter; import org.apache.geode.internal.security.SecurableCommunicationChannel; import org.apache.geode.internal.tcp.Connection; import org.apache.geode.test.dunit.DistributedTestUtils; import org.apache.geode.test.dunit.Host; import org.apache.geode.test.dunit.IgnoredException; import org.apache.geode.test.dunit.LogWriterUtils; import org.apache.geode.test.dunit.NetworkUtils; import org.apache.geode.test.dunit.SerializableRunnable; import org.apache.geode.test.dunit.VM; import org.apache.geode.test.dunit.Wait; import org.apache.geode.test.dunit.internal.JUnit4DistributedTestCase; import org.apache.geode.test.junit.categories.DistributedTest; import org.apache.geode.test.junit.categories.FlakyTest; import org.apache.geode.test.junit.categories.MembershipTest; import org.apache.geode.util.test.TestUtil; import org.junit.Test; import org.junit.experimental.categories.Category; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.concurrent.TimeUnit; /** * Tests the ability of the {@link Locator} API to start and stop locators running in remote VMs. * * @since GemFire 4.0 */ @Category({DistributedTest.class, MembershipTest.class}) public class LocatorDUnitTest extends JUnit4DistributedTestCase { static volatile InternalDistributedSystem system = null; static TestHook hook; /** * Creates a new <code>LocatorDUnitTest</code> */ public LocatorDUnitTest() { super(); } private static final String WAIT2_MS_NAME = "LocatorDUnitTest.WAIT2_MS"; private static final int WAIT2_MS_DEFAULT = 5000; // 2000 -- see bug 36470 private static final int WAIT2_MS = Integer.getInteger(WAIT2_MS_NAME, WAIT2_MS_DEFAULT).intValue(); protected int port1; private int port2; @Override public final void postSetUp() throws Exception { port1 = -1; port2 = -1; IgnoredException.addIgnoredException("Removing shunned member"); } @Override public final void preTearDown() throws Exception { if (Locator.hasLocator()) { Locator.getLocator().stop(); } GemFireCacheImpl cache = GemFireCacheImpl.getInstance(); if (cache != null && !cache.isClosed()) { cache.close(); } // delete locator state files so they don't accidentally // get used by other tests if (port1 > 0) { DistributedTestUtils.deleteLocatorStateFile(port1); } if (port2 > 0) { DistributedTestUtils.deleteLocatorStateFile(port2); } } @Override public final void postTearDown() throws Exception { if (system != null) { system.disconnect(); system = null; } } // for child classes protected void addDSProps(Properties p) { } //////// Test Methods /** * This tests that the locator can resume control as coordinator after all locators have been shut * down and one is restarted. It's necessary to have a lock service start so elder failover is * forced to happen. Prior to fixing how this worked it hung with the restarted locator trying to * become elder again because it put its address at the beginning of the new view it sent out. */ @Test public void testCollocatedLocatorWithSecurity() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM vm3 = host.getVM(3); port1 = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); final String locators = NetworkUtils.getServerHostName(host) + "[" + port1 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(START_LOCATOR, locators); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(SECURITY_PEER_AUTH_INIT, "org.apache.geode.distributed.AuthInitializer.create"); properties.put(SECURITY_PEER_AUTHENTICATOR, "org.apache.geode.distributed.MyAuthenticator.create"); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); properties.put(USE_CLUSTER_CONFIGURATION, "false"); addDSProps(properties); system = (InternalDistributedSystem) DistributedSystem.connect(properties); InternalDistributedMember mbr = system.getDistributedMember(); assertEquals("expected the VM to have NORMAL vmKind", DistributionManager.NORMAL_DM_TYPE, system.getDistributedMember().getVmKind()); properties.remove(START_LOCATOR); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(LOCATORS, locators); SerializableRunnable startSystem = new SerializableRunnable("start system") { public void run() { system = (InternalDistributedSystem) DistributedSystem.connect(properties); } }; vm1.invoke(startSystem); vm2.invoke(startSystem); // ensure that I, as a collocated locator owner, can create a cache region Cache cache = CacheFactory.create(system); Region r = cache.createRegionFactory(RegionShortcut.REPLICATE).create("test-region"); assertNotNull("expected to create a region", r); // create a lock service and have every vm get a lock DistributedLockService service = DistributedLockService.create("test service", system); service.becomeLockGrantor(); service.lock("foo0", 0, 0); vm1.invoke("get the lock service and lock something", () -> DistributedLockService.create("test service", system).lock("foo1", 0, 0)); vm2.invoke("get the lock service and lock something", () -> DistributedLockService.create("test service", system).lock("foo2", 0, 0)); // cause elder failover. vm1 will become the lock grantor system.disconnect(); try { vm1.invoke("ensure grantor failover", () -> { DistributedLockService serviceNamed = DistributedLockService.getServiceNamed("test service"); serviceNamed.lock("foo3", 0, 0); Awaitility.waitAtMost(10000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> serviceNamed.isLockGrantor()); assertTrue(serviceNamed.isLockGrantor()); }); properties.put(START_LOCATOR, locators); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); system = (InternalDistributedSystem) DistributedSystem.connect(properties); System.out.println("done connecting distributed system. Membership view is " + MembershipManagerHelper.getMembershipManager(system).getView()); assertEquals("should be the coordinator", system.getDistributedMember(), MembershipManagerHelper.getCoordinator(system)); NetView view = MembershipManagerHelper.getMembershipManager(system).getView(); org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .info("view after becoming coordinator is " + view); assertNotSame("should not be the first member in the view (" + view + ")", system.getDistributedMember(), view.get(0)); service = DistributedLockService.create("test service", system); // now force a non-elder VM to get a lock. This will hang if the bug is not fixed vm2.invoke("get the lock service and lock something", () -> { DistributedLockService.getServiceNamed("test service").lock("foo4", 0, 0); }); assertFalse("should not have become lock grantor", service.isLockGrantor()); // Now demonstrate that a new member can join and use the lock service properties.remove(START_LOCATOR); vm3.invoke(startSystem); vm3.invoke("get the lock service and lock something(2)", () -> DistributedLockService.create("test service", system).lock("foo5", 0, 0)); } finally { disconnectAllFromDS(); } } /** * Bug 30341 concerns race conditions in JGroups that allow two locators to start up in a * split-brain configuration. To work around this we have always told customers that they need to * stagger the starting of locators. This test configures two locators to start up simultaneously * and shows that they find each other and form a single system. */ @Category(FlakyTest.class) // GEODE-1931 @Test public void testStartTwoLocators() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM loc1 = host.getVM(1); VM loc2 = host.getVM(2); int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; // for cleanup in tearDown2 DistributedTestUtils.deleteLocatorStateFile(port1); DistributedTestUtils.deleteLocatorStateFile(port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(properties); startVerifyAndStopLocator(loc1, loc2, port1, port2, properties); } private Boolean startLocatorWithPortAndProperties(final int port, final Properties properties) throws IOException { try { System.setProperty("p2p.joinTimeout", "5000"); // set a short join timeout. default is 17000ms Locator.startLocatorAndDS(port, new File(""), properties); } catch (SystemConnectException e) { return Boolean.FALSE; } catch (GemFireConfigException e) { return Boolean.FALSE; } finally { System.getProperties().remove("p2p.joinTimeout"); } return Boolean.TRUE; } private String getSingleKeyKeystore() { return TestUtil.getResourcePath(getClass(), "/ssl/trusted.keystore"); } private String getMultiKeyKeystore() { return TestUtil.getResourcePath(getClass(), "/org/apache/geode/internal/net/multiKey.jks"); } private String getMultiKeyTruststore() { return TestUtil.getResourcePath(getClass(), "/org/apache/geode/internal/net/multiKeyTrust.jks"); } @Test public void testStartTwoLocatorsWithSingleKeystoreSSL() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM loc1 = host.getVM(1); VM loc2 = host.getVM(2); int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; // for cleanup in tearDown2 DistributedTestUtils.deleteLocatorStateFile(port1); DistributedTestUtils.deleteLocatorStateFile(port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); properties.put(SSL_CIPHERS, "any"); properties.put(SSL_PROTOCOLS, "TLSv1,TLSv1.1,TLSv1.2"); properties.put(SSL_KEYSTORE, getSingleKeyKeystore()); properties.put(SSL_KEYSTORE_PASSWORD, "password"); properties.put(SSL_KEYSTORE_TYPE, "JKS"); properties.put(SSL_TRUSTSTORE, getSingleKeyKeystore()); properties.put(SSL_TRUSTSTORE_PASSWORD, "password"); properties.put(SSL_ENABLED_COMPONENTS, SecurableCommunicationChannel.LOCATOR.getConstant()); startVerifyAndStopLocator(loc1, loc2, port1, port2, properties); } @Test public void testStartTwoLocatorsWithMultiKeystoreSSL() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM loc1 = host.getVM(1); VM loc2 = host.getVM(2); int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; // for cleanup in tearDown2 DistributedTestUtils.deleteLocatorStateFile(port1); DistributedTestUtils.deleteLocatorStateFile(port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); properties.put(SSL_CIPHERS, "any"); properties.put(SSL_PROTOCOLS, "any"); properties.put(SSL_KEYSTORE, getMultiKeyKeystore()); properties.put(SSL_KEYSTORE_PASSWORD, "password"); properties.put(SSL_KEYSTORE_TYPE, "JKS"); properties.put(SSL_TRUSTSTORE, getMultiKeyTruststore()); properties.put(SSL_TRUSTSTORE_PASSWORD, "password"); properties.put(SSL_LOCATOR_ALIAS, "locatorkey"); properties.put(SSL_ENABLED_COMPONENTS, SecurableCommunicationChannel.LOCATOR.getConstant()); startVerifyAndStopLocator(loc1, loc2, port1, port2, properties); } private void startVerifyAndStopLocator(VM loc1, VM loc2, int port1, int port2, Properties properties) { try { loc2.invoke("startLocator2", () -> startLocatorWithPortAndProperties(port2, properties)); loc1.invoke("startLocator1", () -> startLocatorWithPortAndProperties(port1, properties)); } finally { try { // verify that they found each other loc2.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(2)); loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(2)); } finally { loc2.invoke("stop locator", () -> stopLocator()); loc1.invoke("stop locator", () -> stopLocator()); } } } @Test public void testStartTwoLocatorsOneWithSSLAndTheOtherNonSSL() throws Exception { IgnoredException.addIgnoredException("Unrecognized SSL message, plaintext connection"); IgnoredException.addIgnoredException("LocatorCancelException"); disconnectAllFromDS(); Host host = Host.getHost(0); VM loc1 = host.getVM(1); VM loc2 = host.getVM(2); int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; // for cleanup in tearDown2 DistributedTestUtils.deleteLocatorStateFile(port1); DistributedTestUtils.deleteLocatorStateFile(port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); properties.put(SSL_CIPHERS, "any"); properties.put(SSL_PROTOCOLS, "any"); properties.put(SSL_KEYSTORE, getSingleKeyKeystore()); properties.put(SSL_KEYSTORE_PASSWORD, "password"); properties.put(SSL_KEYSTORE_TYPE, "JKS"); properties.put(SSL_TRUSTSTORE, getSingleKeyKeystore()); properties.put(SSL_TRUSTSTORE_PASSWORD, "password"); properties.put(SSL_REQUIRE_AUTHENTICATION, "true"); properties.put(SSL_ENABLED_COMPONENTS, SecurableCommunicationChannel.LOCATOR.getConstant()); try { loc1.invoke("start Locator1", () -> startLocator(port1, properties)); loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(1)); properties.remove(SSL_ENABLED_COMPONENTS); loc2.invoke("start Locator2", () -> startLocator(port2, properties)); } finally { try { loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(1)); } finally { loc1.invoke("stop locator", () -> stopLocator()); } } } @Test public void testStartTwoLocatorsOneWithNonSSLAndTheOtherSSL() throws Exception { IgnoredException.addIgnoredException("Remote host closed connection during handshake"); IgnoredException.addIgnoredException("Unrecognized SSL message, plaintext connection"); IgnoredException.addIgnoredException("LocatorCancelException"); disconnectAllFromDS(); Host host = Host.getHost(0); VM loc1 = host.getVM(1); VM loc2 = host.getVM(2); int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; // for cleanup in tearDown2 DistributedTestUtils.deleteLocatorStateFile(port1); DistributedTestUtils.deleteLocatorStateFile(port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); properties.put(SSL_CIPHERS, "any"); properties.put(SSL_PROTOCOLS, "any"); try { loc1.invoke("start Locator1", () -> startLocator(port1, properties)); loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(1)); properties.put(SSL_KEYSTORE, getSingleKeyKeystore()); properties.put(SSL_KEYSTORE_PASSWORD, "password"); properties.put(SSL_KEYSTORE_TYPE, "JKS"); properties.put(SSL_TRUSTSTORE, getSingleKeyKeystore()); properties.put(SSL_TRUSTSTORE_PASSWORD, "password"); properties.put(SSL_REQUIRE_AUTHENTICATION, "true"); properties.put(SSL_ENABLED_COMPONENTS, SecurableCommunicationChannel.LOCATOR.getConstant()); loc2.invoke("start Locator2", () -> startLocator(port2, properties)); } finally { try { loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(1)); } finally { loc1.invoke("stop locator", () -> stopLocator()); } } } @Test public void testStartTwoLocatorsWithDifferentSSLCertificates() throws Exception { IgnoredException.addIgnoredException("Remote host closed connection during handshake"); IgnoredException .addIgnoredException("unable to find valid certification path to requested target"); IgnoredException.addIgnoredException("Received fatal alert: certificate_unknown"); IgnoredException.addIgnoredException("LocatorCancelException"); disconnectAllFromDS(); IgnoredException.addIgnoredException("Unrecognized SSL message, plaintext connection"); disconnectAllFromDS(); Host host = Host.getHost(0); VM loc1 = host.getVM(1); VM loc2 = host.getVM(2); int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; // for cleanup in tearDown2 DistributedTestUtils.deleteLocatorStateFile(port1); DistributedTestUtils.deleteLocatorStateFile(port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "false"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); properties.put(SSL_CIPHERS, "any"); properties.put(SSL_PROTOCOLS, "any"); properties.put(SSL_KEYSTORE, getSingleKeyKeystore()); properties.put(SSL_KEYSTORE_PASSWORD, "password"); properties.put(SSL_KEYSTORE_TYPE, "JKS"); properties.put(SSL_TRUSTSTORE, getSingleKeyKeystore()); properties.put(SSL_TRUSTSTORE_PASSWORD, "password"); properties.put(SSL_REQUIRE_AUTHENTICATION, "true"); properties.put(SSL_ENABLED_COMPONENTS, SecurableCommunicationChannel.LOCATOR.getConstant()); try { loc1.invoke("start Locator1", () -> startLocator(port1, properties)); loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(1)); properties.put(SSL_KEYSTORE, getMultiKeyKeystore()); properties.put(SSL_TRUSTSTORE, getMultiKeyTruststore()); properties.put(SSL_LOCATOR_ALIAS, "locatorkey"); loc2.invoke("start Locator2", () -> startLocator(port2, properties)); } finally { try { loc1.invoke("verifyLocatorNotInSplitBrain", () -> verifyLocatorNotInSplitBrain(1)); } finally { loc1.invoke("stop locator", () -> stopLocator()); } } } private Object verifyLocatorNotInSplitBrain(final int expectedMembers) { InternalDistributedSystem sys = InternalDistributedSystem.getAnyInstance(); if (sys == null) { Assert.fail("no distributed system found"); } assertEquals(expectedMembers, sys.getDM().getViewMembers().size()); return true; } private void startLocator(final int port, final Properties properties) throws IOException { try { System.setProperty("p2p.joinTimeout", "5000"); // set a short join timeout. default is 17000ms Locator.startLocatorAndDS(port, new File(""), properties); } finally { System.getProperties().remove("p2p.joinTimeout"); } } /** * test lead member selection */ @Test public void testLeadMemberSelection() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM vm3 = host.getVM(3); port1 = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); final String locators = NetworkUtils.getServerHostName(host) + "[" + port1 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "true"); properties.put(DISABLE_AUTO_RECONNECT, "true"); addDSProps(properties); File logFile = new File(""); if (logFile.exists()) { logFile.delete(); } Locator locator = Locator.startLocatorAndDS(port1, logFile, properties); try { DistributedSystem sys = locator.getDistributedSystem(); Object[] connectArgs = new Object[] {properties}; assertTrue(MembershipManagerHelper.getLeadMember(sys) == null); // connect three vms and then watch the lead member selection as they // are disconnected/reconnected properties.put("name", "vm1"); DistributedMember mem1 = (DistributedMember) vm1.invoke(this.getClass(), "getDistributedMember", connectArgs); // assertTrue(MembershipManagerHelper.getLeadMember(sys) != null); assertLeadMember(mem1, sys, 5000); properties.put("name", "vm2"); DistributedMember mem2 = (DistributedMember) vm2.invoke(this.getClass(), "getDistributedMember", connectArgs); assertLeadMember(mem1, sys, 5000); properties.put("name", "vm3"); DistributedMember mem3 = (DistributedMember) vm3.invoke(this.getClass(), "getDistributedMember", connectArgs); assertLeadMember(mem1, sys, 5000); // after disconnecting the first vm, the second one should become the leader vm1.invoke(() -> disconnectDistributedSystem()); MembershipManagerHelper.getMembershipManager(sys).waitForDeparture(mem1); assertLeadMember(mem2, sys, 5000); properties.put("name", "vm1"); mem1 = (DistributedMember) vm1.invoke(this.getClass(), "getDistributedMember", connectArgs); assertLeadMember(mem2, sys, 5000); vm2.invoke(() -> disconnectDistributedSystem()); MembershipManagerHelper.getMembershipManager(sys).waitForDeparture(mem2); assertLeadMember(mem3, sys, 5000); vm1.invoke(() -> disconnectDistributedSystem()); MembershipManagerHelper.getMembershipManager(sys).waitForDeparture(mem1); assertLeadMember(mem3, sys, 5000); vm3.invoke(() -> disconnectDistributedSystem()); MembershipManagerHelper.getMembershipManager(sys).waitForDeparture(mem3); assertLeadMember(null, sys, 5000); } finally { locator.stop(); } } private void assertLeadMember(final DistributedMember member, final DistributedSystem sys, long timeout) { Awaitility.waitAtMost(timeout, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> { DistributedMember lead = MembershipManagerHelper.getLeadMember(sys); if (member != null) { return member.equals(lead); } return (lead == null); }); } /** * test lead member and coordinator failure with network partition detection enabled. It would be * nice for this test to have more than two "server" vms, to demonstrate that they all exit when * the leader and potential- coordinator both disappear in the loss-correlation-window, but there * are only four vms available for dunit testing. * <p> * So, we start two locators with admin distributed systems, then start two regular distributed * members. * <p> * We kill the second locator (which is not the view coordinator) and then kill the non-lead * member. That should be okay - the lead and remaining locator continue to run. * <p> * We then kill the lead member and demonstrate that the original locator (which is now the sole * remaining member) shuts itself down. */ @Test public void testLeadAndCoordFailure() throws Exception { IgnoredException.addIgnoredException("Possible loss of quorum due"); disconnectAllFromDS(); Host host = Host.getHost(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM locvm = host.getVM(3); Locator locator = null; int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; DistributedTestUtils.deleteLocatorStateFile(port1, port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "true"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); // properties.put("log-level", "fine"); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(properties); try { final String uname = getUniqueName(); File logFile = new File(""); locator = Locator.startLocatorAndDS(port1, logFile, properties); final DistributedSystem sys = locator.getDistributedSystem(); sys.getLogWriter() .info("<ExpectedException action=add>java.net.ConnectException</ExpectedException>"); MembershipManagerHelper.inhibitForcedDisconnectLogging(true); locvm.invoke(new SerializableRunnable() { public void run() { File lf = new File(""); try { Locator.startLocatorAndDS(port2, lf, properties); } catch (IOException ios) { org.apache.geode.test.dunit.Assert.fail("Unable to start locator2", ios); } } }); Object[] connectArgs = new Object[] {properties}; SerializableRunnable crashLocator = new SerializableRunnable("Crash locator") { public void run() { Locator loc = Locator.getLocators().iterator().next(); DistributedSystem msys = loc.getDistributedSystem(); MembershipManagerHelper.crashDistributedSystem(msys); loc.stop(); } }; assertTrue(MembershipManagerHelper.getLeadMember(sys) == null); // properties.put("log-level", getDUnitLogLevel()); DistributedMember mem1 = (DistributedMember) vm1.invoke(this.getClass(), "getDistributedMember", connectArgs); vm2.invoke(this.getClass(), "getDistributedMember", connectArgs); assertLeadMember(mem1, sys, 5000); assertEquals(sys.getDistributedMember(), MembershipManagerHelper.getCoordinator(sys)); // crash the second vm and the locator. Should be okay DistributedTestUtils.crashDistributedSystem(vm2); locvm.invoke(crashLocator); assertTrue("Distributed system should not have disconnected", vm1.invoke(() -> LocatorDUnitTest.isSystemConnected())); // ensure quorumLost is properly invoked DistributionManager dm = (DistributionManager) ((InternalDistributedSystem) sys).getDistributionManager(); MyMembershipListener listener = new MyMembershipListener(); dm.addMembershipListener(listener); // disconnect the first vm and demonstrate that the third vm and the // locator notice the failure and exit DistributedTestUtils.crashDistributedSystem(vm1); /* * This vm is watching vm1, which is watching vm2 which is watching locvm. It will take 3 * (3 * * member-timeout) milliseconds to detect the full failure and eject the lost members from * the view. */ org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .info("waiting for my distributed system to disconnect due to partition detection"); Awaitility.waitAtMost(24000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> { return !sys.isConnected(); }); if (sys.isConnected()) { fail( "Distributed system did not disconnect as expected - network partition detection is broken"); } // quorumLost should be invoked if we get a ForcedDisconnect in this situation assertTrue("expected quorumLost to be invoked", listener.quorumLostInvoked); assertTrue("expected suspect processing initiated by TCPConduit", listener.suspectReasons.contains(Connection.INITIATING_SUSPECT_PROCESSING)); } finally { if (locator != null) { locator.stop(); } LogWriter bLogger = new LocalLogWriter(InternalLogWriter.ALL_LEVEL, System.out); bLogger.info("<ExpectedException action=remove>service failure</ExpectedException>"); bLogger .info("<ExpectedException action=remove>java.net.ConnectException</ExpectedException>"); bLogger.info( "<ExpectedException action=remove>org.apache.geode.ForcedDisconnectException</ExpectedException>"); disconnectAllFromDS(); } } /** * test lead member failure and normal coordinator shutdown with network partition detection * enabled. * <p> * Start two locators with admin distributed systems, then start two regular distributed members. * <p> * We kill the lead member and demonstrate that the other members continue to operate normally. * <p> * We then shut down the group coordinator and observe the second locator pick up the job and the * remaining member continues to operate normally. */ @Test public void testLeadFailureAndCoordShutdown() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM locvm = host.getVM(3); Locator locator = null; final int[] ports = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; this.port2 = port2; DistributedTestUtils.deleteLocatorStateFile(port1, port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "true"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(properties); try { final String uname = getUniqueName(); File logFile = new File(""); locator = Locator.startLocatorAndDS(port1, logFile, properties); DistributedSystem sys = locator.getDistributedSystem(); locvm.invoke(new SerializableRunnable() { public void run() { File lf = new File(""); try { Locator.startLocatorAndDS(port2, lf, properties); MembershipManagerHelper.inhibitForcedDisconnectLogging(true); } catch (IOException ios) { org.apache.geode.test.dunit.Assert.fail("Unable to start locator2", ios); } } }); Object[] connectArgs = new Object[] {properties}; SerializableRunnable crashSystem = new SerializableRunnable("Crash system") { public void run() { DistributedSystem msys = InternalDistributedSystem.getAnyInstance(); msys.getLogWriter() .info("<ExpectedException action=add>service failure</ExpectedException>"); msys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ConnectException</ExpectedException>"); msys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>"); MembershipManagerHelper.crashDistributedSystem(msys); } }; assertTrue(MembershipManagerHelper.getLeadMember(sys) == null); DistributedMember mem1 = (DistributedMember) vm1.invoke(this.getClass(), "getDistributedMember", connectArgs); DistributedMember mem2 = (DistributedMember) vm2.invoke(this.getClass(), "getDistributedMember", connectArgs); assertEquals(mem1, MembershipManagerHelper.getLeadMember(sys)); assertEquals(sys.getDistributedMember(), MembershipManagerHelper.getCoordinator(sys)); MembershipManagerHelper.inhibitForcedDisconnectLogging(true); // crash the lead vm. Should be okay vm1.invoke(crashSystem); Awaitility.waitAtMost(4 * 2000, TimeUnit.MILLISECONDS) .pollInterval(200, TimeUnit.MILLISECONDS).until(() -> isSystemConnected()); assertTrue("Distributed system should not have disconnected", isSystemConnected()); assertTrue("Distributed system should not have disconnected", vm2.invoke(() -> LocatorDUnitTest.isSystemConnected())); assertTrue("Distributed system should not have disconnected", locvm.invoke(() -> LocatorDUnitTest.isSystemConnected())); // stop the locator normally. This should also be okay locator.stop(); if (!Locator.getLocators().isEmpty()) { // log this for debugging purposes before throwing assertion error org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .warning("found locator " + Locator.getLocators().iterator().next()); } assertTrue("locator is not stopped", Locator.getLocators().isEmpty()); assertTrue("Distributed system should not have disconnected", vm2.invoke(() -> LocatorDUnitTest.isSystemConnected())); assertTrue("Distributed system should not have disconnected", locvm.invoke(() -> LocatorDUnitTest.isSystemConnected())); // the remaining non-locator member should now be the lead member assertEquals( "This test sometimes fails. If the log contains " + "'failed to collect all ACKs' it is a false failure.", mem2, vm2.invoke(() -> LocatorDUnitTest.getLeadMember())); SerializableRunnable disconnect = new SerializableRunnable("Disconnect from " + locators) { public void run() { DistributedSystem sys = InternalDistributedSystem.getAnyInstance(); if (sys != null && sys.isConnected()) { sys.disconnect(); } } }; // disconnect the first vm and demonstrate that the third vm and the // locator notice the failure and exit vm2.invoke(() -> disconnectDistributedSystem()); locvm.invoke(() -> stopLocator()); } finally { MembershipManagerHelper.inhibitForcedDisconnectLogging(false); if (locator != null) { locator.stop(); } try { locvm.invoke(() -> stopLocator()); } catch (Exception e) { org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .severe("failed to stop locator in vm 3", e); } } } /** * test lead member failure and normal coordinator shutdown with network partition detection * enabled. * <p> * Start one locators with admin distributed systems, then start two regular distributed members. * <p> * We kill the lead member and demonstrate that the other members continue to operate normally. * <p> * We then shut down the group coordinator and observe the second locator pick up the job and the * remaining member continues to operate normally. */ // disabled on trunk - should be reenabled on cedar_dev_Oct12 // this test leaves a CloserThread around forever that logs "pausing" messages every 500 ms @Test public void testForceDisconnectAndPeerShutdownCause() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM locvm = host.getVM(3); Locator locator = null; int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; DistributedTestUtils.deleteLocatorStateFile(port1, port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "true"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(properties); try { final String uname = getUniqueName(); File logFile = new File(""); locator = Locator.startLocatorAndDS(port1, logFile, properties); DistributedSystem sys = locator.getDistributedSystem(); locvm.invoke(() -> { File lf = new File(""); try { Locator.startLocatorAndDS(port2, lf, properties); } catch (IOException ios) { org.apache.geode.test.dunit.Assert.fail("Unable to start locator2", ios); } }); Object[] connectArgs = new Object[] {properties}; SerializableRunnable crashSystem = new SerializableRunnable("Crash system") { public void run() { DistributedSystem msys = InternalDistributedSystem.getAnyInstance(); msys.getLogWriter() .info("<ExpectedException action=add>service failure</ExpectedException>"); msys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ConnectException</ExpectedException>"); msys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>"); msys.getLogWriter() .info("<ExpectedException action=add>Possible loss of quorum</ExpectedException>"); hook = new TestHook(); MembershipManagerHelper.getMembershipManager(msys).registerTestHook(hook); try { MembershipManagerHelper.crashDistributedSystem(msys); } finally { hook.reset(); } } }; assertTrue(MembershipManagerHelper.getLeadMember(sys) == null); final DistributedMember mem1 = (DistributedMember) vm1.invoke(this.getClass(), "getDistributedMember", connectArgs); final DistributedMember mem2 = (DistributedMember) vm2.invoke(this.getClass(), "getDistributedMember", connectArgs); assertEquals(mem1, MembershipManagerHelper.getLeadMember(sys)); assertEquals(sys.getDistributedMember(), MembershipManagerHelper.getCoordinator(sys)); // crash the lead vm. Should be okay. it should hang in test hook thats // why call is asynchronous. // vm1.invokeAsync(crashSystem); assertTrue("Distributed system should not have disconnected", isSystemConnected()); assertTrue("Distributed system should not have disconnected", vm2.invoke(() -> LocatorDUnitTest.isSystemConnected())); assertTrue("Distributed system should not have disconnected", locvm.invoke(() -> LocatorDUnitTest.isSystemConnected())); vm2.invokeAsync(crashSystem); Wait.pause(1000); // 4 x the member-timeout // request member removal for first peer from second peer. vm2.invoke(new SerializableRunnable("Request Member Removal") { @Override public void run() { DistributedSystem msys = InternalDistributedSystem.getAnyInstance(); MembershipManager mmgr = MembershipManagerHelper.getMembershipManager(msys); // check for shutdown cause in MembershipManager. Following call should // throw DistributedSystemDisconnectedException which should have cause as // ForceDisconnectException. try { msys.getLogWriter().info( "<ExpectedException action=add>Membership: requesting removal of </ExpectedException>"); mmgr.requestMemberRemoval(mem1, "test reasons"); msys.getLogWriter().info( "<ExpectedException action=remove>Membership: requesting removal of </ExpectedException>"); fail("It should have thrown exception in requestMemberRemoval"); } catch (DistributedSystemDisconnectedException e) { Throwable cause = e.getCause(); assertTrue("This should have been ForceDisconnectException but found " + cause, cause instanceof ForcedDisconnectException); } finally { hook.reset(); } } }); } finally { if (locator != null) { locator.stop(); } locvm.invoke(() -> stopLocator()); assertTrue("locator is not stopped", Locator.getLocators().isEmpty()); } } /** * test lead member shutdown and coordinator crashing with network partition detection enabled. * <p> * Start two locators with admin distributed systems, then start two regular distributed members. * <p> * We kill the coordinator and shut down the lead member and observe the second locator pick up * the job and the remaining member continue to operate normally. */ @Test public void testLeadShutdownAndCoordFailure() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM locvm = host.getVM(3); Locator locator = null; int ports[] = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = ports[0]; this.port1 = port1; final int port2 = ports[1]; DistributedTestUtils.deleteLocatorStateFile(port1, port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties properties = new Properties(); properties.put(MCAST_PORT, "0"); properties.put(LOCATORS, locators); properties.put(ENABLE_NETWORK_PARTITION_DETECTION, "true"); properties.put(DISABLE_AUTO_RECONNECT, "true"); properties.put(MEMBER_TIMEOUT, "2000"); properties.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(properties); try { final String uname = getUniqueName(); locvm.invoke(() -> { File lf = new File(""); try { Locator.startLocatorAndDS(port2, lf, properties); } catch (IOException ios) { org.apache.geode.test.dunit.Assert.fail("Unable to start locator1", ios); } }); File logFile = new File(""); locator = Locator.startLocatorAndDS(port1, logFile, properties); DistributedSystem sys = locator.getDistributedSystem(); sys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>"); Object[] connectArgs = new Object[] {properties}; assertTrue(MembershipManagerHelper.getLeadMember(sys) == null); DistributedMember mem1 = (DistributedMember) vm1.invoke(this.getClass(), "getDistributedMember", connectArgs); vm1.invoke(() -> MembershipManagerHelper.inhibitForcedDisconnectLogging(true)); DistributedMember mem2 = (DistributedMember) vm2.invoke(this.getClass(), "getDistributedMember", connectArgs); DistributedMember loc1Mbr = locvm.invoke(() -> this.getLocatorDistributedMember()); assertLeadMember(mem1, sys, 5000); assertEquals(loc1Mbr, MembershipManagerHelper.getCoordinator(sys)); // crash the lead locator. Should be okay locvm.invoke("crash locator", () -> { Locator loc = Locator.getLocators().iterator().next(); DistributedSystem msys = loc.getDistributedSystem(); msys.getLogWriter() .info("<ExpectedException action=add>service failure</ExpectedException>"); msys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>"); msys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ConnectException</ExpectedException>"); MembershipManagerHelper.crashDistributedSystem(msys); loc.stop(); }); Awaitility.waitAtMost(10000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> sys.isConnected()); assertTrue("Distributed system should not have disconnected", sys.isConnected()); assertTrue("Distributed system should not have disconnected", vm1.invoke(() -> LocatorDUnitTest.isSystemConnected())); assertTrue("Distributed system should not have disconnected", vm2.invoke(() -> LocatorDUnitTest.isSystemConnected())); // disconnect the first vm and demonstrate that the non-lead vm and the // locator notice the failure and continue to run vm1.invoke(() -> disconnectDistributedSystem()); Awaitility.waitAtMost(10, TimeUnit.SECONDS).pollInterval(1000, TimeUnit.MILLISECONDS) .until(() -> vm2.invoke(() -> LocatorDUnitTest.isSystemConnected())); assertTrue("Distributed system should not have disconnected", vm2.invoke(() -> LocatorDUnitTest.isSystemConnected())); assertEquals(sys.getDistributedMember(), MembershipManagerHelper.getCoordinator(sys)); assertEquals(mem2, MembershipManagerHelper.getLeadMember(sys)); } finally { vm2.invoke(() -> disconnectDistributedSystem()); if (locator != null) { locator.stop(); } locvm.invoke(() -> stopLocator()); } } /** * Tests that attempting to connect to a distributed system in which no locator is defined throws * an exception. */ @Test public void testNoLocator() { disconnectAllFromDS(); Host host = Host.getHost(0); int port = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); String locators = NetworkUtils.getServerHostName(host) + "[" + port + "]"; Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); addDSProps(props); final String expected = "java.net.ConnectException"; final String addExpected = "<ExpectedException action=add>" + expected + "</ExpectedException>"; final String removeExpected = "<ExpectedException action=remove>" + expected + "</ExpectedException>"; LogWriter bgexecLogger = new LocalLogWriter(InternalLogWriter.ALL_LEVEL, System.out); bgexecLogger.info(addExpected); boolean exceptionOccurred = true; String oldValue = (String) System.getProperties().put("p2p.joinTimeout", "15000"); try { DistributedSystem.connect(props); exceptionOccurred = false; } catch (DistributionException ex) { // I guess it can throw this too... } catch (GemFireConfigException ex) { String s = ex.getMessage(); assertTrue(s.indexOf("Locator does not exist") >= 0); } catch (Exception ex) { // if you see this fail, determine if unexpected exception is expected // if expected then add in a catch block for it above this catch org.apache.geode.test.dunit.Assert.fail("Failed with unexpected exception", ex); } finally { if (oldValue == null) { System.getProperties().remove("p2p.joinTimeout"); } else { System.getProperties().put("p2p.joinTimeout", oldValue); } bgexecLogger.info(removeExpected); } if (!exceptionOccurred) { fail("Should have thrown a GemFireConfigException"); } } /** * Tests starting one locator in a remote VM and having multiple members of the distributed system * join it. This ensures that members start up okay, and that handling of a stopped locator is * correct. * <p> * The locator is then restarted and is shown to take over the role of membership coordinator. */ @Test public void testOneLocator() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); final int port = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); final String locators = NetworkUtils.getServerHostName(host) + "[" + port + "]"; final String uniqueName = getUniqueName(); vm0.invoke("Start locator " + locators, () -> startLocator(port)); try { SerializableRunnable connect = new SerializableRunnable("Connect to " + locators) { public void run() { // System.setProperty("p2p.joinTimeout", "5000"); Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); props.setProperty(MEMBER_TIMEOUT, "1000"); addDSProps(props); DistributedSystem.connect(props); } }; vm1.invoke(connect); vm2.invoke(connect); Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); props.setProperty(MEMBER_TIMEOUT, "1000"); addDSProps(props); system = (InternalDistributedSystem) DistributedSystem.connect(props); final DistributedMember coord = MembershipManagerHelper.getCoordinator(system); org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .info("coordinator before termination of locator is " + coord); vm0.invoke(() -> stopLocator()); // now ensure that one of the remaining members became the coordinator Awaitility.waitAtMost(15000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> !coord.equals(MembershipManagerHelper.getCoordinator(system))); DistributedMember newCoord = MembershipManagerHelper.getCoordinator(system); LogWriterUtils.getLogWriter().info("coordinator after shutdown of locator was " + newCoord); if (coord.equals(newCoord)) { fail("another member should have become coordinator after the locator was stopped"); } system.disconnect(); vm1.invoke(() -> disconnectDistributedSystem()); vm2.invoke(() -> disconnectDistributedSystem()); } finally { vm0.invoke(() -> stopLocator()); } } protected void startLocator(int port) { File logFile = new File(""); try { Properties locProps = new Properties(); locProps.setProperty(MCAST_PORT, "0"); locProps.setProperty(MEMBER_TIMEOUT, "1000"); locProps.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(locProps); Locator.startLocatorAndDS(port, logFile, locProps); } catch (IOException ex) { org.apache.geode.test.dunit.Assert.fail("While starting locator on port " + port, ex); } } /** * Tests starting one locator in a remote VM and having multiple members of the distributed system * join it. This ensures that members start up okay, and that handling of a stopped locator is * correct. It then restarts the locator to demonstrate that it can connect to and function as the * group coordinator */ @Test public void testLocatorBecomesCoordinator() throws Exception { disconnectAllFromDS(); final String expected = "java.net.ConnectException"; final String addExpected = "<ExpectedException action=add>" + expected + "</ExpectedException>"; final String removeExpected = "<ExpectedException action=remove>" + expected + "</ExpectedException>"; Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); final int port = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); final String locators = NetworkUtils.getServerHostName(host) + "[" + port + "]"; vm0.invoke(getUniqueName() + 1, () -> getStartSBLocatorRunnable(port)); try { final Properties props = new Properties(); props.setProperty(LOCATORS, locators); props.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "true"); props.put(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(props); vm1.invoke(() -> { DistributedSystem sys = getSystem(props); sys.getLogWriter().info(addExpected); }); vm2.invoke(() -> { DistributedSystem sys = getSystem(props); sys.getLogWriter().info(addExpected); }); system = (InternalDistributedSystem) getSystem(props); final DistributedMember coord = MembershipManagerHelper.getCoordinator(system); org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .info("coordinator before termination of locator is " + coord); vm0.invoke(() -> stopLocator()); // now ensure that one of the remaining members became the coordinator Awaitility.waitAtMost(15000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> !coord.equals(MembershipManagerHelper.getCoordinator(system))); DistributedMember newCoord = MembershipManagerHelper.getCoordinator(system); LogWriterUtils.getLogWriter().info("coordinator after shutdown of locator was " + newCoord); if (newCoord == null || coord.equals(newCoord)) { fail("another member should have become coordinator after the locator was stopped: " + newCoord); } // restart the locator to demonstrate reconnection & make disconnects faster // it should also regain the role of coordinator, so we check to make sure // that the coordinator has changed vm0.invoke(getUniqueName() + "2", () -> getStartSBLocatorRunnable(port)); final DistributedMember tempCoord = newCoord; Awaitility.waitAtMost(5000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> !tempCoord.equals(MembershipManagerHelper.getCoordinator(system))); system.disconnect(); LogWriter bgexecLogger = new LocalLogWriter(InternalLogWriter.ALL_LEVEL, System.out); bgexecLogger.info(removeExpected); SerializableRunnable disconnect = new SerializableRunnable("Disconnect from " + locators) { public void run() { } }; vm1.invoke(() -> { DistributedSystem sys = InternalDistributedSystem.getAnyInstance(); if (sys != null && sys.isConnected()) { sys.disconnect(); } // connectExceptions occur during disconnect, so we need the // expectedexception hint to be in effect until this point LogWriter bLogger = new LocalLogWriter(InternalLogWriter.ALL_LEVEL, System.out); bLogger.info(removeExpected); }); vm2.invoke(() -> { DistributedSystem sys = InternalDistributedSystem.getAnyInstance(); if (sys != null && sys.isConnected()) { sys.disconnect(); } // connectExceptions occur during disconnect, so we need the // expectedexception hint to be in effect until this point LogWriter bLogger = new LocalLogWriter(InternalLogWriter.ALL_LEVEL, System.out); bLogger.info(removeExpected); }); vm0.invoke(() -> stopLocator()); } finally { vm0.invoke(() -> stopLocator()); } } /** * set a short locator refresh rate */ public static void setShortRefreshWait() { System.setProperty("p2p.gossipRefreshRate", "2000"); } /** * remove shortened locator refresh rate */ public static void resetRefreshWait() { System.getProperties().remove("p2p.gossipRefreshRate"); } public static boolean isSystemConnected() { DistributedSystem sys = InternalDistributedSystem.getAnyInstance(); if (sys != null && sys.isConnected()) { return true; } return false; } static boolean beforeFailureNotificationReceived; static boolean afterFailureNotificationReceived; /** * Tests starting multiple locators in multiple VMs. */ @Test public void testMultipleLocators() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM vm3 = host.getVM(3); int[] freeTCPPorts = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = freeTCPPorts[0]; this.port1 = port1; final int port2 = freeTCPPorts[1]; this.port2 = port2; DistributedTestUtils.deleteLocatorStateFile(port1, port2); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final Properties dsProps = new Properties(); dsProps.setProperty(LOCATORS, locators); dsProps.setProperty(MCAST_PORT, "0"); dsProps.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(dsProps); vm0.invoke("start Locator1", () -> startLocator(port1, dsProps)); try { vm3.invoke("Start locator on " + port2, () -> startLocator(port2, dsProps)); try { SerializableRunnable connect = new SerializableRunnable("Connect to " + locators) { public void run() { Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); addDSProps(props); DistributedSystem.connect(props); } }; vm1.invoke(connect); vm2.invoke(connect); Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); addDSProps(props); system = (InternalDistributedSystem) DistributedSystem.connect(props); Awaitility.waitAtMost(10000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> system.getDM().getViewMembers().size() >= 3); // three applications plus assertEquals(5, system.getDM().getViewMembers().size()); system.disconnect(); vm1.invoke(() -> disconnectDistributedSystem()); vm2.invoke(() -> disconnectDistributedSystem()); } finally { vm3.invoke(() -> stopLocator()); } } finally { vm0.invoke(() -> stopLocator()); } } private void disconnectDistributedSystem() { DistributedSystem sys = InternalDistributedSystem.getAnyInstance(); if (sys != null && sys.isConnected()) { sys.disconnect(); } MembershipManagerHelper.inhibitForcedDisconnectLogging(false); } /** * Tests starting multiple locators at the same time and ensuring that the locators end up only * have 1 master. GEODE-870 */ @Test public void testMultipleLocatorsRestartingAtSameTime() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM vm3 = host.getVM(3); VM vm4 = host.getVM(4); int[] freeTCPPorts = AvailablePortHelper.getRandomAvailableTCPPorts(3); this.port1 = freeTCPPorts[0]; this.port2 = freeTCPPorts[1]; int port3 = freeTCPPorts[2]; DistributedTestUtils.deleteLocatorStateFile(port1, port2, port3); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]," + host0 + "[" + port3 + "]"; final Properties dsProps = new Properties(); dsProps.setProperty(LOCATORS, locators); dsProps.setProperty(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); dsProps.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "true"); dsProps.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false"); dsProps.setProperty(MCAST_PORT, "0"); addDSProps(dsProps); vm0.invoke(() -> startLocatorAsync(new Object[] {port1, dsProps})); vm1.invoke(() -> startLocatorAsync(new Object[] {port2, dsProps})); vm2.invoke(() -> startLocatorAsync(new Object[] {port3, dsProps})); try { try { vm3.invoke(() -> { DistributedSystem.connect(dsProps); return true; }); vm4.invoke(() -> { DistributedSystem.connect(dsProps); return true; }); system = (InternalDistributedSystem) DistributedSystem.connect(dsProps); Awaitility.waitAtMost(10000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> { try { return system.getDM().getViewMembers().size() == 6; } catch (Exception e) { e.printStackTrace(); org.apache.geode.test.dunit.Assert.fail("unexpected exception", e); } return false; // NOTREACHED }); // three applications plus assertEquals(6, system.getDM().getViewMembers().size()); vm0.invoke(() -> stopLocator()); vm1.invoke(() -> stopLocator()); vm2.invoke(() -> stopLocator()); Awaitility.waitAtMost(10000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> { try { return system.getDM().getMembershipManager().getView().size() <= 3; } catch (Exception e) { e.printStackTrace(); org.apache.geode.test.dunit.Assert.fail("unexpected exception", e); } return false; // NOTREACHED }); final String newLocators = host0 + "[" + port2 + "]," + host0 + "[" + port3 + "]"; dsProps.setProperty(LOCATORS, newLocators); final InternalDistributedMember currentCoordinator = GMSJoinLeaveTestHelper.getCurrentCoordinator(); DistributedMember vm3ID = vm3.invoke(() -> GMSJoinLeaveTestHelper .getInternalDistributedSystem().getDM().getDistributionManagerId()); assertTrue("View is " + system.getDM().getMembershipManager().getView() + " and vm3's ID is " + vm3ID, vm3.invoke(() -> GMSJoinLeaveTestHelper.isViewCreator())); vm1.invoke(() -> startLocatorAsync(new Object[] {port2, dsProps})); vm2.invoke(() -> startLocatorAsync(new Object[] {port3, dsProps})); Awaitility.waitAtMost(30000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> { try { InternalDistributedMember c = GMSJoinLeaveTestHelper.getCurrentCoordinator(); if (c.equals(currentCoordinator)) { // now locator should be new coordinator return false; } return system.getDM().getAllHostedLocators().size() == 2; } catch (Exception e) { e.printStackTrace(); org.apache.geode.test.dunit.Assert.fail("unexpected exception", e); } return false; // NOTREACHED }); vm1.invoke("waitUntilLocatorBecomesCoordinator", () -> waitUntilLocatorBecomesCoordinator()); vm2.invoke("waitUntilLocatorBecomesCoordinator", () -> waitUntilLocatorBecomesCoordinator()); vm3.invoke("waitUntilLocatorBecomesCoordinator", () -> waitUntilLocatorBecomesCoordinator()); vm4.invoke("waitUntilLocatorBecomesCoordinator", () -> waitUntilLocatorBecomesCoordinator()); int netviewId = vm1.invoke("Checking ViewCreator", () -> GMSJoinLeaveTestHelper.getViewId()); assertEquals(netviewId, (int) vm2.invoke("checking ViewID", () -> GMSJoinLeaveTestHelper.getViewId())); assertEquals(netviewId, (int) vm3.invoke("checking ViewID", () -> GMSJoinLeaveTestHelper.getViewId())); assertEquals(netviewId, (int) vm4.invoke("checking ViewID", () -> GMSJoinLeaveTestHelper.getViewId())); assertFalse( vm4.invoke("Checking ViewCreator", () -> GMSJoinLeaveTestHelper.isViewCreator())); // Given the start up order of servers, this server is the elder server assertFalse( vm3.invoke("Checking ViewCreator", () -> GMSJoinLeaveTestHelper.isViewCreator())); if (vm1.invoke(() -> GMSJoinLeaveTestHelper.isViewCreator())) { assertFalse( vm2.invoke("Checking ViewCreator", () -> GMSJoinLeaveTestHelper.isViewCreator())); } else { assertTrue( vm2.invoke("Checking ViewCreator", () -> GMSJoinLeaveTestHelper.isViewCreator())); } } finally { system.disconnect(); vm3.invoke(() -> disconnectDistributedSystem()); vm4.invoke(() -> disconnectDistributedSystem()); vm2.invoke(() -> stopLocator()); vm1.invoke(() -> stopLocator()); } } finally { } } private void waitUntilLocatorBecomesCoordinator() { Awaitility.waitAtMost(15000, TimeUnit.MILLISECONDS).pollInterval(200, TimeUnit.MILLISECONDS) .until(() -> { try { InternalDistributedMember c = GMSJoinLeaveTestHelper.getCurrentCoordinator(); return c.getVmKind() == DistributionManager.LOCATOR_DM_TYPE; } catch (Exception e) { e.printStackTrace(); org.apache.geode.test.dunit.Assert.fail("unexpected exception", e); } return false; // NOTREACHED }); } private void startLocatorSync(Object[] args) { File logFile = new File(""); try { Locator.startLocatorAndDS((int) args[0], logFile, (Properties) args[1]); } catch (IOException ex) { org.apache.geode.test.dunit.Assert.fail("While starting process on port " + args[0], ex); } } private void startLocatorAsync(Object[] args) { File logFile = new File(""); try { Locator.startLocatorAndDS((int) args[0], logFile, (Properties) args[1]); } catch (IOException ex) { org.apache.geode.test.dunit.Assert.fail("While starting process on port " + args[0], ex); } } /** * Tests starting multiple locators in multiple VMs. */ @Test public void testMultipleMcastLocators() throws Exception { disconnectAllFromDS(); IgnoredException.addIgnoredException("Could not stop Distribution Locator"); // shutdown timing // issue in // InternalLocator Host host = Host.getHost(0); VM vm0 = host.getVM(0); VM vm1 = host.getVM(1); VM vm2 = host.getVM(2); VM vm3 = host.getVM(3); final int[] freeTCPPorts = AvailablePortHelper.getRandomAvailableTCPPorts(2); final int port1 = freeTCPPorts[0]; this.port1 = port1; final int port2 = freeTCPPorts[1]; this.port2 = port2; DistributedTestUtils.deleteLocatorStateFile(port1, port2); final int mcastport = AvailablePort.getRandomAvailablePort(AvailablePort.MULTICAST); final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; final String uniqueName = getUniqueName(); vm0.invoke(new SerializableRunnable("Start locator on " + port1) { public void run() { File logFile = new File(""); try { Properties props = new Properties(); props.setProperty(MCAST_PORT, String.valueOf(mcastport)); props.setProperty(LOCATORS, locators); props.setProperty(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); props.setProperty(MCAST_TTL, "0"); props.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "true"); props.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(props); Locator.startLocatorAndDS(port1, logFile, null, props); } catch (IOException ex) { org.apache.geode.test.dunit.Assert.fail("While starting locator on port " + port1, ex); } } }); vm3.invoke("Start locator on " + port2, () -> { File logFile = new File(""); try { Properties props = new Properties(); props.setProperty(MCAST_PORT, String.valueOf(mcastport)); props.setProperty(LOCATORS, locators); props.setProperty(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); props.setProperty(MCAST_TTL, "0"); props.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "true"); props.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(props); Locator.startLocatorAndDS(port2, logFile, null, props); } catch (IOException ex) { org.apache.geode.test.dunit.Assert.fail("While starting locator on port " + port2, ex); } }); SerializableRunnable connect = new SerializableRunnable("Connect to " + locators) { public void run() { Properties props = new Properties(); props.setProperty(MCAST_PORT, String.valueOf(mcastport)); props.setProperty(LOCATORS, locators); props.setProperty(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); props.setProperty(MCAST_TTL, "0"); props.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "true"); addDSProps(props); DistributedSystem.connect(props); } }; try { vm1.invoke(connect); vm2.invoke(connect); Properties props = new Properties(); props.setProperty(MCAST_PORT, String.valueOf(mcastport)); props.setProperty(LOCATORS, locators); props.setProperty(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); props.setProperty(MCAST_TTL, "0"); props.setProperty(ENABLE_NETWORK_PARTITION_DETECTION, "true"); addDSProps(props); system = (InternalDistributedSystem) DistributedSystem.connect(props); Awaitility.waitAtMost(WAIT2_MS, TimeUnit.MILLISECONDS) .pollInterval(200, TimeUnit.MILLISECONDS).until(() -> { try { return system.getDM().getViewMembers().size() == 5; } catch (Exception e) { org.apache.geode.test.dunit.Assert.fail("unexpected exception", e); } return false; // NOTREACHED }); system.disconnect(); vm1.invoke(() -> disconnectDistributedSystem()); vm2.invoke(() -> disconnectDistributedSystem()); } finally { vm0.invoke(() -> stopLocator()); vm3.invoke(() -> stopLocator()); if (system != null) { system.disconnect(); } } } /** * Tests that a VM can connect to a locator that is hosted in its own VM. */ @Test public void testConnectToOwnLocator() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); port1 = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); File logFile = new File(""); Locator locator = Locator.startLocator(port1, logFile); try { final String locators = NetworkUtils.getServerHostName(host) + "[" + port1 + "]"; Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); props.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false"); system = (InternalDistributedSystem) DistributedSystem.connect(props); system.disconnect(); } finally { locator.stop(); } } /** * Tests that a single VM can NOT host multiple locators */ @Test public void testHostingMultipleLocators() throws Exception { disconnectAllFromDS(); Host host = Host.getHost(0); int[] randomAvailableTCPPorts = AvailablePortHelper.getRandomAvailableTCPPorts(2); port1 = randomAvailableTCPPorts[0]; File logFile1 = new File(""); DistributedTestUtils.deleteLocatorStateFile(port1); Locator locator1 = Locator.startLocator(port1, logFile1); try { int port2 = randomAvailableTCPPorts[1]; File logFile2 = new File(""); DistributedTestUtils.deleteLocatorStateFile(port2); try { Locator locator2 = Locator.startLocator(port2, logFile2); fail("expected second locator start to fail."); } catch (IllegalStateException expected) { } final String host0 = NetworkUtils.getServerHostName(host); final String locators = host0 + "[" + port1 + "]," + host0 + "[" + port2 + "]"; SerializableRunnable connect = new SerializableRunnable("Connect to " + locators) { public void run() { Properties props = new Properties(); props.setProperty(MCAST_PORT, "0"); props.setProperty(LOCATORS, locators); props.setProperty(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); DistributedSystem.connect(props); } }; connect.run(); disconnectDistributedSystem(); } finally { locator1.stop(); } } /** * Tests starting, stopping, and restarting a locator. See bug 32856. * * @since GemFire 4.1 */ @Test public void testRestartLocator() throws Exception { disconnectAllFromDS(); port1 = AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET); DistributedTestUtils.deleteLocatorStateFile(port1); File logFile = new File(""); File stateFile = new File("locator" + port1 + "state.dat"); VM vm0 = Host.getHost(0).getVM(0); final Properties p = new Properties(); p.setProperty(LOCATORS, Host.getHost(0).getHostName() + "[" + port1 + "]"); p.setProperty(MCAST_PORT, "0"); p.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false"); addDSProps(p); if (stateFile.exists()) { stateFile.delete(); } org.apache.geode.test.dunit.LogWriterUtils.getLogWriter().info("Starting locator"); Locator locator = Locator.startLocatorAndDS(port1, logFile, p); try { vm0.invoke(() -> { DistributedSystem.connect(p); return null; }); LogWriterUtils.getLogWriter().info("Stopping locator"); locator.stop(); LogWriterUtils.getLogWriter().info("Starting locator"); locator = Locator.startLocatorAndDS(port1, logFile, p); vm0.invoke("disconnect", () -> { DistributedSystem.connect(p).disconnect(); return null; }); } finally { locator.stop(); } } /** * return the distributed member id for the ds on this vm */ public static DistributedMember getDistributedMember(Properties props) { props.put("name", "vm_" + VM.getCurrentVMNum()); DistributedSystem sys = DistributedSystem.connect(props); sys.getLogWriter().info("<ExpectedException action=add>service failure</ExpectedException>"); sys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ConnectException</ExpectedException>"); sys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>"); return DistributedSystem.connect(props).getDistributedMember(); } /** * find a running locator and return its distributed member id */ public static DistributedMember getLocatorDistributedMember() { return (Locator.getLocators().iterator().next()).getDistributedSystem().getDistributedMember(); } /** * find the lead member and return its id */ public static DistributedMember getLeadMember() { DistributedSystem sys = InternalDistributedSystem.getAnyInstance(); return MembershipManagerHelper.getLeadMember(sys); } protected void stopLocator() { MembershipManagerHelper.inhibitForcedDisconnectLogging(false); Locator loc = Locator.getLocator(); if (loc != null) { loc.stop(); assertFalse(Locator.hasLocator()); } } private void getStartSBLocatorRunnable(final int port) { File logFile = new File(""); try { System.setProperty(InternalLocator.LOCATORS_PREFERRED_AS_COORDINATORS, "true"); System.setProperty("p2p.joinTimeout", "1000"); Properties locProps = new Properties(); locProps.put(MCAST_PORT, "0"); locProps.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel()); addDSProps(locProps); Locator.startLocatorAndDS(port, logFile, locProps); } catch (IOException ex) { org.apache.geode.test.dunit.Assert.fail("While starting locator on port " + port, ex); } finally { System.getProperties().remove(InternalLocator.LOCATORS_PREFERRED_AS_COORDINATORS); System.getProperties().remove("p2p.joinTimeout"); } } protected void nukeJChannel(DistributedSystem sys) { sys.getLogWriter().info("<ExpectedException action=add>service failure</ExpectedException>"); sys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ConnectException</ExpectedException>"); sys.getLogWriter().info( "<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>"); try { MembershipManagerHelper.crashDistributedSystem(sys); } catch (DistributedSystemDisconnectedException se) { // it's okay for the system to already be shut down } sys.getLogWriter().info("<ExpectedException action=remove>service failure</ExpectedException>"); sys.getLogWriter().info( "<ExpectedException action=remove>org.apache.geode.ForcedDisconnectException</ExpectedException>"); } // New test hook which blocks before closing channel. class TestHook implements MembershipTestHook { volatile boolean unboundedWait = true; @Override public void beforeMembershipFailure(String reason, Throwable cause) { System.out.println("Inside TestHook.beforeMembershipFailure with " + cause); long giveUp = System.currentTimeMillis() + 30000; if (cause instanceof ForcedDisconnectException) { while (unboundedWait && System.currentTimeMillis() < giveUp) { Wait.pause(1000); } } else { cause.printStackTrace(); } } @Override public void afterMembershipFailure(String reason, Throwable cause) {} public void reset() { unboundedWait = false; } } class MyMembershipListener implements MembershipListener { boolean quorumLostInvoked; List<String> suspectReasons = new ArrayList<>(50); public void memberJoined(InternalDistributedMember id) {} public void memberDeparted(InternalDistributedMember id, boolean crashed) {} public void memberSuspect(InternalDistributedMember id, InternalDistributedMember whoSuspected, String reason) { suspectReasons.add(reason); } public void quorumLost(Set<InternalDistributedMember> failures, List<InternalDistributedMember> remaining) { quorumLostInvoked = true; org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .info("quorumLost invoked in test code"); } } }