/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.geode.distributed.internal;
import static org.apache.geode.distributed.ConfigurationProperties.*;
import static org.apache.geode.test.dunit.Assert.*;
import com.jayway.awaitility.Awaitility;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.geode.test.junit.categories.MembershipTest;
import org.apache.logging.log4j.Logger;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.geode.LogWriter;
import org.apache.geode.admin.AdminDistributedSystem;
import org.apache.geode.admin.AdminDistributedSystemFactory;
import org.apache.geode.admin.Alert;
import org.apache.geode.admin.AlertLevel;
import org.apache.geode.admin.AlertListener;
import org.apache.geode.admin.DistributedSystemConfig;
import org.apache.geode.cache.Cache;
import org.apache.geode.cache.CacheListener;
import org.apache.geode.cache.DataPolicy;
import org.apache.geode.cache.EntryEvent;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.RegionEvent;
import org.apache.geode.cache.RegionFactory;
import org.apache.geode.cache.Scope;
import org.apache.geode.cache.util.CacheListenerAdapter;
import org.apache.geode.distributed.DistributedSystem;
import org.apache.geode.distributed.internal.membership.InternalDistributedMember;
import org.apache.geode.distributed.internal.membership.MembershipManager;
import org.apache.geode.distributed.internal.membership.NetView;
import org.apache.geode.distributed.internal.membership.gms.MembershipManagerHelper;
import org.apache.geode.distributed.internal.membership.gms.interfaces.Manager;
import org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager;
import org.apache.geode.internal.logging.LogService;
import org.apache.geode.test.dunit.Host;
import org.apache.geode.test.dunit.IgnoredException;
import org.apache.geode.test.dunit.NetworkUtils;
import org.apache.geode.test.dunit.SerializableRunnable;
import org.apache.geode.test.dunit.VM;
import org.apache.geode.test.dunit.Wait;
import org.apache.geode.test.dunit.WaitCriterion;
import org.apache.geode.test.dunit.internal.JUnit4DistributedTestCase;
import org.apache.geode.test.junit.categories.DistributedTest;
/**
* This class tests the functionality of the {@link DistributionManager} class.
*/
@Category({DistributedTest.class, MembershipTest.class})
public class DistributionManagerDUnitTest extends JUnit4DistributedTestCase {
private static final Logger logger = LogService.getLogger();
public static DistributedSystem ds;
/**
* Clears the exceptionInThread flag in the given distribution manager.
*/
public static void clearExceptionInThreads(DistributionManager dm) {
dm.clearExceptionInThreads();
}
@Override
public void preSetUp() throws Exception {
disconnectAllFromDS();
}
protected static class ItsOkayForMyClassNotToBeFound extends SerialDistributionMessage {
public int getDSFID() {
return NO_FIXED_ID;
}
@Override
protected void process(DistributionManager dm) {
// We should never get here
}
};
@Test
public void testGetDistributionVMType() {
DM dm = getSystem().getDistributionManager();
InternalDistributedMember ipaddr = dm.getId();
assertEquals(DistributionManager.NORMAL_DM_TYPE, ipaddr.getVmKind());
}
/**
* Send the distribution manager a message it can't deserialize
*/
@Ignore
@Test
public void testExceptionInThreads() throws InterruptedException {
DistributionManager dm = (DistributionManager) getSystem().getDistributionManager();
String p1 = "ItsOkayForMyClassNotToBeFound";
logger.info("<ExpectedException action=add>" + p1 + "</ExpectedException>");
DistributionMessage m = new ItsOkayForMyClassNotToBeFound();
dm.putOutgoing(m);
Thread.sleep(1 * 1000);
logger.info("<ExpectedException action=remove>" + p1 + "</ExpectedException>");
Awaitility.await("waiting for exceptionInThreads to be true").atMost(15, TimeUnit.SECONDS)
.until(() -> {
return dm.exceptionInThreads();
});
dm.clearExceptionInThreads();
assertTrue(!dm.exceptionInThreads());
}
/**
* Demonstrate that a new UDP port is used when an attempt is made to reconnect using a shunned
* port
*/
@Test
public void testConnectAfterBeingShunned() {
InternalDistributedSystem sys = getSystem();
MembershipManager mgr = MembershipManagerHelper.getMembershipManager(sys);
InternalDistributedMember idm = mgr.getLocalMember();
// TODO GMS needs to have a system property allowing the bind-port to be set
System.setProperty(DistributionConfig.GEMFIRE_PREFIX + "jg-bind-port", "" + idm.getPort());
try {
sys.disconnect();
sys = getSystem();
mgr = MembershipManagerHelper.getMembershipManager(sys);
sys.disconnect();
InternalDistributedMember idm2 = mgr.getLocalMember();
org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
.info("original ID=" + idm + " and after connecting=" + idm2);
assertTrue("should not have used a different udp port", idm.getPort() == idm2.getPort());
} finally {
System.getProperties().remove(DistributionConfig.GEMFIRE_PREFIX + "jg-bind-port");
}
}
/**
* Test the handling of "surprise members" in the membership manager. Create a DistributedSystem
* in this VM and then add a fake member to its surpriseMember set. Then ensure that it stays in
* the set when a new membership view arrives that doesn't contain it. Then wait until the member
* should be gone and force more view processing to have it scrubbed from the set.
**/
@Test
public void testSurpriseMemberHandling() {
VM vm0 = Host.getHost(0).getVM(0);
InternalDistributedSystem sys = getSystem();
MembershipManager mgr = MembershipManagerHelper.getMembershipManager(sys);
try {
InternalDistributedMember mbr =
new InternalDistributedMember(NetworkUtils.getIPLiteral(), 12345);
// first make sure we can't add this as a surprise member (bug #44566)
// if the view number isn't being recorded correctly the test will pass but the
// functionality is broken
Assert.assertTrue("expected view ID to be greater than zero", mgr.getView().getViewId() > 0);
int oldViewId = mbr.getVmViewId();
mbr.setVmViewId((int) mgr.getView().getViewId() - 1);
org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
.info("current membership view is " + mgr.getView());
org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
.info("created ID " + mbr + " with view ID " + mbr.getVmViewId());
sys.getLogWriter()
.info("<ExpectedException action=add>attempt to add old member</ExpectedException>");
sys.getLogWriter()
.info("<ExpectedException action=add>Removing shunned GemFire node</ExpectedException>");
try {
boolean accepted = mgr.addSurpriseMember(mbr);
Assert.assertTrue("member with old ID was not rejected (bug #44566)", !accepted);
} finally {
sys.getLogWriter()
.info("<ExpectedException action=remove>attempt to add old member</ExpectedException>");
sys.getLogWriter().info(
"<ExpectedException action=remove>Removing shunned GemFire node</ExpectedException>");
}
mbr.setVmViewId(oldViewId);
// now forcibly add it as a surprise member and show that it is reaped
long gracePeriod = 5000;
long startTime = System.currentTimeMillis();
long timeout = ((GMSMembershipManager) mgr).getSurpriseMemberTimeout();
long birthTime = startTime - timeout + gracePeriod;
MembershipManagerHelper.addSurpriseMember(sys, mbr, birthTime);
assertTrue("Member was not a surprise member", mgr.isSurpriseMember(mbr));
// force a real view change
SerializableRunnable connectDisconnect = new SerializableRunnable() {
public void run() {
getSystem().disconnect();
}
};
vm0.invoke(connectDisconnect);
if (birthTime < (System.currentTimeMillis() - timeout)) {
return; // machine is too busy and we didn't get enough CPU to perform more assertions
}
assertTrue("Member was incorrectly removed from surprise member set",
mgr.isSurpriseMember(mbr));
try {
Thread.sleep(gracePeriod);
} catch (InterruptedException e) {
fail("test was interrupted", e);
}
vm0.invoke(connectDisconnect);
assertTrue("Member was not removed from surprise member set", !mgr.isSurpriseMember(mbr));
} finally {
if (sys != null && sys.isConnected()) {
sys.disconnect();
}
}
}
/**
* vm1 stores its cache in this static variable in testAckSeverAllertThreshold
*/
static Cache myCache;
/**
* Tests that a severe-level alert is generated if a member does not respond with an ack quickly
* enough. vm0 and vm1 create a region and set ack-severe-alert-threshold. vm1 has a cache
* listener in its region that sleeps when notified, forcing the operation to take longer than
* ack-wait-threshold + ack-severe-alert-threshold
*/
@Test
public void testAckSevereAlertThreshold() throws Exception {
disconnectAllFromDS();
Host host = Host.getHost(0);
// VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
// in order to set a small ack-wait-threshold, we have to remove the
// system property established by the dunit harness
String oldAckWait = (String) System.getProperties()
.remove(DistributionConfig.GEMFIRE_PREFIX + ACK_WAIT_THRESHOLD);
try {
final Properties props = getDistributedSystemProperties();
props.setProperty(MCAST_PORT, "0");
props.setProperty(ACK_WAIT_THRESHOLD, "3");
props.setProperty(ACK_SEVERE_ALERT_THRESHOLD, "3");
props.setProperty(NAME, "putter");
getSystem(props);
Region rgn = (new RegionFactory()).setScope(Scope.DISTRIBUTED_ACK).setEarlyAck(false)
.setDataPolicy(DataPolicy.REPLICATE).create("testRegion");
vm1.invoke(new SerializableRunnable("Connect to distributed system") {
public void run() {
props.setProperty(NAME, "sleeper");
getSystem(props);
IgnoredException.addIgnoredException("elapsed while waiting for replies");
RegionFactory rf = new RegionFactory();
Region r = rf.setScope(Scope.DISTRIBUTED_ACK).setDataPolicy(DataPolicy.REPLICATE)
.setEarlyAck(false).addCacheListener(getSleepingListener(false)).create("testRegion");
myCache = r.getCache();
try {
createAlertListener();
} catch (Exception e) {
throw new RuntimeException("failed to create alert listener", e);
}
}
});
// now we have two caches set up. vm1 has a listener that will sleep
// and cause the severe-alert threshold to be crossed
rgn.put("bomb", "pow!"); // this will hang until vm1 responds
rgn.getCache().close();
basicGetSystem().disconnect();
vm1.invoke(new SerializableRunnable("disconnect from ds") {
public void run() {
if (!myCache.isClosed()) {
if (basicGetSystem().isConnected()) {
basicGetSystem().disconnect();
}
myCache = null;
}
if (basicGetSystem().isConnected()) {
basicGetSystem().disconnect();
}
synchronized (alertGuard) {
assertTrue(alertReceived);
}
}
});
} finally {
if (oldAckWait != null) {
System.setProperty(DistributionConfig.GEMFIRE_PREFIX + ACK_WAIT_THRESHOLD, oldAckWait);
}
}
}
static volatile boolean regionDestroyedInvoked;
static CacheListener getSleepingListener(final boolean playDead) {
regionDestroyedInvoked = false;
return new CacheListenerAdapter() {
@Override
public void afterCreate(EntryEvent event) {
try {
if (playDead) {
MembershipManagerHelper.beSickMember(getSystemStatic());
MembershipManagerHelper.playDead(getSystemStatic());
}
Thread.sleep(15000);
} catch (InterruptedException ie) {
fail("interrupted", ie);
}
}
@Override
public void afterRegionDestroy(RegionEvent event) {
LogWriter logger = myCache.getLogger();
logger.info("afterRegionDestroyed invoked in sleeping listener");
logger.info("<ExpectedException action=remove>service failure</ExpectedException>");
logger.info(
"<ExpectedException action=remove>org.apache.geode.ForcedDisconnectException</ExpectedException>");
regionDestroyedInvoked = true;
}
};
}
static AdminDistributedSystem adminSystem;
static Object alertGuard = new Object();
static boolean alertReceived;
static void createAlertListener() throws Exception {
DistributedSystemConfig config =
AdminDistributedSystemFactory.defineDistributedSystem(getSystemStatic(), null);
adminSystem = AdminDistributedSystemFactory.getDistributedSystem(config);
adminSystem.setAlertLevel(AlertLevel.SEVERE);
adminSystem.addAlertListener(new AlertListener() {
public void alert(Alert alert) {
try {
logger
.info("alert listener invoked for alert originating in " + alert.getConnectionName());
logger.info(" alert text = " + alert.getMessage());
logger.info(" systemMember = " + alert.getSystemMember());
} catch (Exception e) {
logger.fatal("exception trying to use alert object", e);
}
synchronized (alertGuard) {
alertReceived = true;
}
}
});
adminSystem.connect();
assertTrue(adminSystem.waitToBeConnected(5 * 1000));
}
/**
* Tests that a sick member is kicked out
*/
@Test
public void testKickOutSickMember() throws Exception {
disconnectAllFromDS();
IgnoredException.addIgnoredException("10 seconds have elapsed while waiting");
Host host = Host.getHost(0);
// VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
// in order to set a small ack-wait-threshold, we have to remove the
// system property established by the dunit harness
String oldAckWait = (String) System.getProperties()
.remove(DistributionConfig.GEMFIRE_PREFIX + ACK_WAIT_THRESHOLD);
try {
final Properties props = getDistributedSystemProperties();
props.setProperty(MCAST_PORT, "0"); // loner
props.setProperty(ACK_WAIT_THRESHOLD, "5");
props.setProperty(ACK_SEVERE_ALERT_THRESHOLD, "5");
props.setProperty(NAME, "putter");
getSystem(props);
Region rgn = (new RegionFactory()).setScope(Scope.DISTRIBUTED_ACK)
.setDataPolicy(DataPolicy.REPLICATE).create("testRegion");
basicGetSystem().getLogWriter().info(
"<ExpectedException action=add>sec have elapsed while waiting for replies</ExpectedException>");
vm1.invoke(new SerializableRunnable("Connect to distributed system") {
public void run() {
props.setProperty(NAME, "sleeper");
getSystem(props);
LogWriter log = basicGetSystem().getLogWriter();
log.info("<ExpectedException action=add>service failure</ExpectedException>");
log.info(
"<ExpectedException action=add>org.apache.geode.ForcedDisconnectException</ExpectedException>");
RegionFactory rf = new RegionFactory();
Region r = rf.setScope(Scope.DISTRIBUTED_ACK).setDataPolicy(DataPolicy.REPLICATE)
.addCacheListener(getSleepingListener(true)).create("testRegion");
myCache = r.getCache();
}
});
// now we have two caches set up, each having an alert listener. Vm1
// also has a cache listener that will turn off its ability to respond
// to "are you dead" messages and then sleep
rgn.put("bomb", "pow!");
rgn.getCache().close();
basicGetSystem().getLogWriter().info(
"<ExpectedException action=remove>sec have elapsed while waiting for replies</ExpectedException>");
basicGetSystem().disconnect();
vm1.invoke(new SerializableRunnable("wait for forced disconnect") {
public void run() {
// wait a while for the DS to finish disconnecting
WaitCriterion ev = new WaitCriterion() {
public boolean done() {
return !basicGetSystem().isConnected();
}
public String description() {
return null;
}
};
// if this fails it means the sick member wasn't kicked out and something is wrong
Wait.waitForCriterion(ev, 60 * 1000, 200, true);
ev = new WaitCriterion() {
public boolean done() {
return myCache.isClosed();
}
public String description() {
return null;
}
};
Wait.waitForCriterion(ev, 20 * 1000, 200, false);
if (!myCache.isClosed()) {
if (basicGetSystem().isConnected()) {
basicGetSystem().disconnect();
}
myCache = null;
throw new RuntimeException("Test Failed - vm1's cache is not closed");
}
if (basicGetSystem().isConnected()) {
basicGetSystem().disconnect();
throw new RuntimeException("Test Failed - vm1's system should have been disconnected");
}
WaitCriterion wc = new WaitCriterion() {
public boolean done() {
return regionDestroyedInvoked;
}
public String description() {
return "vm1's listener should have received afterRegionDestroyed notification";
}
};
Wait.waitForCriterion(wc, 30 * 1000, 1000, true);
}
});
} finally {
if (oldAckWait != null) {
System.setProperty(DistributionConfig.GEMFIRE_PREFIX + ACK_WAIT_THRESHOLD, oldAckWait);
}
}
}
/**
* test use of a bad bind-address for bug #32565
*/
@Test
public void testBadBindAddress() throws Exception {
disconnectAllFromDS();
final Properties props = getDistributedSystemProperties();
props.setProperty(MCAST_PORT, "0"); // loner
// use a valid address that's not proper for this machine
props.setProperty(BIND_ADDRESS, "www.yahoo.com");
props.setProperty(ACK_WAIT_THRESHOLD, "5");
props.setProperty(ACK_SEVERE_ALERT_THRESHOLD, "5");
try {
getSystem(props);
} catch (IllegalArgumentException e) {
org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
.info("caught expected exception (1)", e);
}
// use an invalid address
props.setProperty(BIND_ADDRESS, "bruce.schuchardt");
try {
getSystem(props);
} catch (IllegalArgumentException e) {
org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
.info("caught expected exception (2_", e);
}
// use a valid bind address
props.setProperty(BIND_ADDRESS, InetAddress.getLocalHost().getCanonicalHostName());
getSystem().disconnect();
}
/**
* install a new view and show that waitForViewInstallation works as expected
*/
@Test
public void testWaitForViewInstallation() {
getSystem(new Properties());
MembershipManager mgr = basicGetSystem().getDM().getMembershipManager();
final NetView v = mgr.getView();
final boolean[] passed = new boolean[1];
Thread t = new Thread("wait for view installation") {
public void run() {
try {
((DistributionManager) basicGetSystem().getDM())
.waitForViewInstallation(v.getViewId() + 1);
synchronized (passed) {
passed[0] = true;
}
} catch (InterruptedException e) {
// failed
}
}
};
t.setDaemon(true);
t.start();
Wait.pause(2000);
NetView newView = new NetView(v, v.getViewId() + 1);
((Manager) mgr).installView(newView);
Wait.pause(2000);
synchronized (passed) {
Assert.assertTrue(passed[0]);
}
}
}