/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.geode.internal.cache.partitioned;
import static org.apache.geode.distributed.ConfigurationProperties.*;
import static com.jayway.awaitility.Awaitility.*;
import static java.util.concurrent.TimeUnit.*;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.*;
import static org.junit.Assert.fail;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.geode.DataSerializable;
import org.apache.geode.cache.AttributesFactory;
import org.apache.geode.cache.Cache;
import org.apache.geode.cache.CacheClosedException;
import org.apache.geode.cache.CustomExpiry;
import org.apache.geode.cache.DataPolicy;
import org.apache.geode.cache.DiskAccessException;
import org.apache.geode.cache.DiskStore;
import org.apache.geode.cache.EvictionAction;
import org.apache.geode.cache.EvictionAttributes;
import org.apache.geode.cache.ExpirationAction;
import org.apache.geode.cache.ExpirationAttributes;
import org.apache.geode.cache.PartitionAttributesFactory;
import org.apache.geode.cache.PartitionedRegionStorageException;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.Region.Entry;
import org.apache.geode.cache.RegionAttributes;
import org.apache.geode.cache.RegionFactory;
import org.apache.geode.cache.RegionShortcut;
import org.apache.geode.cache.Scope;
import org.apache.geode.cache.client.PoolFactory;
import org.apache.geode.cache.client.PoolManager;
import org.apache.geode.cache.client.ServerOperationException;
import org.apache.geode.cache.execute.Function;
import org.apache.geode.cache.execute.FunctionContext;
import org.apache.geode.cache.execute.FunctionService;
import org.apache.geode.cache.persistence.ConflictingPersistentDataException;
import org.apache.geode.cache.persistence.PartitionOfflineException;
import org.apache.geode.cache.persistence.RevokeFailedException;
import org.apache.geode.cache.persistence.RevokedPersistentDataException;
import org.apache.geode.cache.query.QueryException;
import org.apache.geode.cache.server.CacheServer;
import org.apache.geode.distributed.internal.DistributionManager;
import org.apache.geode.distributed.internal.DistributionMessage;
import org.apache.geode.distributed.internal.DistributionMessageObserver;
import org.apache.geode.distributed.internal.ReplyException;
import org.apache.geode.internal.AvailablePort;
import org.apache.geode.internal.cache.GemFireCacheImpl;
import org.apache.geode.internal.cache.InitialImageOperation.RequestImageMessage;
import org.apache.geode.internal.cache.control.InternalResourceManager;
import org.apache.geode.internal.cache.partitioned.ManageBucketMessage.ManageBucketReplyMessage;
import org.apache.geode.internal.i18n.LocalizedStrings;
import org.apache.geode.test.dunit.Assert;
import org.apache.geode.test.dunit.AsyncInvocation;
import org.apache.geode.test.dunit.Host;
import org.apache.geode.test.dunit.IgnoredException;
import org.apache.geode.test.dunit.LogWriterUtils;
import org.apache.geode.test.dunit.NetworkUtils;
import org.apache.geode.test.dunit.RMIException;
import org.apache.geode.test.dunit.SerializableCallable;
import org.apache.geode.test.dunit.SerializableRunnable;
import org.apache.geode.test.dunit.VM;
import org.apache.geode.test.dunit.Wait;
import org.apache.geode.test.dunit.WaitCriterion;
import org.apache.geode.test.junit.categories.DistributedTest;
import org.apache.geode.test.junit.categories.FlakyTest;
/**
* Tests the basic use cases for PR persistence.
*/
@Category(DistributedTest.class)
public class PersistentPartitionedRegionDUnitTest extends PersistentPartitionedRegionTestBase {
private static final int NUM_BUCKETS = 15;
// This must be bigger than the dunit ack-wait-threshold for the revoke
// tests. The command line is setting the ack-wait-threshold to be
// 60 seconds.
private static final int MAX_WAIT = 65 * 1000;
public PersistentPartitionedRegionDUnitTest() {
super();
}
/**
* A simple test case that we are actually persisting with a PR.
*/
@Test
public void testSinglePR() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
createPR(vm0, 0);
createData(vm0, 0, 1, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
// closePR(vm0);
closeCache(vm0);
createPR(vm0, 0);
assertEquals(vm0Buckets, getBucketList(vm0));
checkData(vm0, 0, 1, "a");
localDestroyPR(vm0);
closeCache(vm0);
createPR(vm0, 0);
// Make sure the data is now missing
checkData(vm0, 0, 1, null);
}
/**
* Test total-buckets-num getting bigger, which cause exception. but changed to smaller should be
* ok.
*/
@Test
public void testChangedToalBucketNumberSinglePR() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
createPR(vm0, 0, 0, 5);
createData(vm0, 0, 5, "a");
closeCache(vm0);
IgnoredException expect = IgnoredException.addIgnoredException("IllegalStateException", vm0);
expect = IgnoredException.addIgnoredException("DiskAccessException", vm0);
try {
createPR(vm0, 0, 0, 2);
fail("Expect to get java.lang.IllegalStateException, but it did not");
} catch (RMIException exp) {
assertTrue(exp.getCause() instanceof IllegalStateException);
IllegalStateException ise = (IllegalStateException) exp.getCause();
Object[] prms = new Object[] {"/" + PR_REGION_NAME, 2, 5};
assertTrue(ise.getMessage().contains(
LocalizedStrings.PartitionedRegion_FOR_REGION_0_TotalBucketNum_1_SHOULD_NOT_BE_CHANGED_Previous_Configured_2
.toString(prms)));
}
closeCache(vm0);
try {
createPR(vm0, 0, 0, 10);
fail("Expect to get java.lang.IllegalStateException, but it did not");
} catch (RMIException exp) {
assertTrue(exp.getCause() instanceof IllegalStateException);
IllegalStateException ise = (IllegalStateException) exp.getCause();
Object[] prms = new Object[] {"/" + PR_REGION_NAME, 10, 5};
assertTrue(ise.getMessage().contains(
LocalizedStrings.PartitionedRegion_FOR_REGION_0_TotalBucketNum_1_SHOULD_NOT_BE_CHANGED_Previous_Configured_2
.toString(prms)));
}
expect.remove();
}
/**
* Test for bug 44184
*/
@Test
public void testSinglePRWithCustomExpiry() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(1);
SerializableRunnable createPR = new SerializableRunnable() {
public void run() {
Cache cache = getCache();
DiskStore ds = cache.findDiskStore("disk");
if (ds == null) {
ds = cache.createDiskStoreFactory().setDiskDirs(getDiskDirs()).create("disk");
}
AttributesFactory af = new AttributesFactory();
PartitionAttributesFactory paf = new PartitionAttributesFactory();
af.setPartitionAttributes(paf.create());
af.setCustomEntryIdleTimeout(new TestCustomExpiration());
af.setEntryIdleTimeout(new ExpirationAttributes(60, ExpirationAction.INVALIDATE));
af.setDataPolicy(DataPolicy.PERSISTENT_PARTITION);
af.setDiskStoreName("disk");
RegionAttributes attr = af.create();
cache.createRegion(PR_REGION_NAME, attr);
}
};
vm0.invoke(createPR);
createData(vm0, 0, 1, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
// closePR(vm0);
closeCache(vm0);
vm0.invoke(createPR);
assertEquals(vm0Buckets, getBucketList(vm0));
checkData(vm0, 0, 1, "a");
}
/**
* Test to make sure that we can recover from a complete system shutdown with redundancy 0
*
* @throws Throwable
*/
@Test
public void testTotalRecoverRedundancy0() throws Throwable {
totalRecoverTest(0);
}
/**
* Test to make sure that we can recover from a complete system shutdown with redundancy 1
*
* @throws Throwable
*/
@Test
public void testTotalRecoverRedundancy1() throws Throwable {
totalRecoverTest(1);
}
private static boolean FAIL_IN_THIS_VM = false;
/**
* Test for bug #49972 - handle a serialization error in the async writer thread.
*/
@Ignore("Bug 50376")
@Test
public void testBadSerializationInAsyncThread() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
final int numBuckets = 50;
vm0.invoke(new SerializableRunnable() {
@Override
public void run() {
FAIL_IN_THIS_VM = true;
}
});
IgnoredException expected1 = IgnoredException.addIgnoredException("Fatal error from asynch");
IgnoredException expected2 = IgnoredException.addIgnoredException("ToDataException");
try {
int redundancy = 1;
createPR(vm0, redundancy, -1, 113, false);
createPR(vm2, redundancy, -1, 113, false);
// Trigger bucket creation
createData(vm0, 0, numBuckets, "a");
createPR(vm1, redundancy, -1, 113, false);
// write objects which will fail serialization in async writer thread.
vm0.invoke(new SerializableRunnable() {
public void run() {
Cache cache = getCache();
Region region = cache.getRegion(PR_REGION_NAME);
try {
for (int i = 0; i < numBuckets; i++) {
region.put(i, new BadSerializer());
// this will trigger a deserialiation (could have also done this put with a function I
// guess.
region.get(i);
}
} catch (DiskAccessException ex) {
if (ex.getMessage().contains("the flusher thread had been terminated")) {
// expected
} else {
throw ex;
}
}
}
});
// Wait for the thread to get hosed.
Thread.sleep(2000);
createData(vm1, 0, numBuckets, "b");
// Try to do puts from vm1, which doesn't have any buckets
createData(vm1, numBuckets, numBuckets * 2, "b");
createData(vm1, numBuckets, numBuckets * 2, "c");
// make sure everything has settle out (these VM's I suppose may be terminated)
checkData(vm2, 0, numBuckets, "b");
checkData(vm2, numBuckets, numBuckets * 2, "c");
} finally {
expected1.remove();
expected2.remove();
}
}
public static class BadSerializer implements DataSerializable {
public BadSerializer() {
}
public void toData(DataOutput out) throws IOException {
if (Thread.currentThread().getName().contains("Asynchronous disk writer")
&& FAIL_IN_THIS_VM) {
throw new ConcurrentModificationException();
}
}
public void fromData(DataInput in) throws IOException, ClassNotFoundException {
}
}
public void totalRecoverTest(int redundancy) throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
int numBuckets = 50;
createPR(vm0, redundancy);
createPR(vm1, redundancy);
createPR(vm2, redundancy);
createData(vm0, 0, numBuckets, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
Set<Integer> vm2Buckets = getBucketList(vm2);
closeCache(vm0);
closeCache(vm1);
closeCache(vm2);
AsyncInvocation a1 = createPRAsync(vm0, redundancy);
AsyncInvocation a2 = createPRAsync(vm1, redundancy);
AsyncInvocation a3 = createPRAsync(vm2, redundancy);
a1.getResult(MAX_WAIT);
a2.getResult(MAX_WAIT);
a3.getResult(MAX_WAIT);
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(vm2Buckets, getBucketList(vm2));
checkData(vm0, 0, numBuckets, "a");
createData(vm0, numBuckets, 113, "b");
checkData(vm0, numBuckets, 113, "b");
// Test for bug 43476 - make sure a destroy
// cleans up proxy bucket regions.
destroyPR(vm0);
destroyPR(vm1);
destroyPR(vm2);
a1 = createPRAsync(vm0, redundancy);
a2 = createPRAsync(vm1, redundancy);
a3 = createPRAsync(vm2, redundancy);
a1.getResult(MAX_WAIT);
a2.getResult(MAX_WAIT);
a3.getResult(MAX_WAIT);
checkData(vm0, 0, numBuckets, null);
}
@Test
public void testRevokeAfterStartup() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
int numBuckets = 50;
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, numBuckets, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
assertEquals(vm0Buckets, vm1Buckets);
closeCache(vm0);
createData(vm1, 0, numBuckets, "b");
closeCache(vm1);
AsyncInvocation a1 = createPRAsync(vm0, 1);
// [dsmith] Make sure that vm0 is waiting for vm1 to recover
// If VM(0) recovers early, that is a problem, because vm1
// has newer data
Thread.sleep(500);
assertTrue(a1.isAlive());
revokeKnownMissingMembers(vm2, 1);
a1.getResult(MAX_WAIT);
assertEquals(vm0Buckets, getBucketList(vm0));
checkData(vm0, 0, numBuckets, "a");
createData(vm0, numBuckets, 113, "b");
checkData(vm0, numBuckets, 113, "b");
IgnoredException ex =
IgnoredException.addIgnoredException(RevokedPersistentDataException.class.getName(), vm1);
try {
createPR(vm1, 1);
fail("Should have recieved a SplitDistributedSystemException");
} catch (RMIException e) {
// This should throw a split distributed system exception, because
// We revoked this member.
if (!(e.getCause() instanceof RevokedPersistentDataException)) {
throw e;
}
}
ex.remove();
}
@Category(FlakyTest.class) // GEODE-974: async actions, time sensitive, 65 second timeouts
@Test
public void testRevokeBeforeStartup() throws Throwable {
IgnoredException.addIgnoredException("RevokeFailedException");
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
int numBuckets = 50;
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, numBuckets, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
assertEquals(vm0Buckets, vm1Buckets);
// This should fail with a revocation failed message
try {
revokeAllMembers(vm2);
fail("The revoke should have failed, because members are running");
} catch (RMIException e) {
if (!(e.getCause() instanceof ReplyException
&& e.getCause().getCause() instanceof RevokeFailedException)) {
throw e;
}
}
closeCache(vm0);
createData(vm1, 0, numBuckets, "b");
File vm1Directory = getDiskDirectory(vm1);
closeCache(vm1);
vm0.invoke(new SerializableRunnable("get cache") {
public void run() {
getCache();
}
});
revokeMember(vm2, vm1Directory);
AsyncInvocation a1 = createPRAsync(vm0, 1);
a1.getResult(MAX_WAIT);
assertEquals(vm0Buckets, getBucketList(vm0));
checkData(vm0, 0, numBuckets, "a");
createData(vm0, numBuckets, 113, "b");
checkData(vm0, numBuckets, 113, "b");
IgnoredException ex =
IgnoredException.addIgnoredException(RevokedPersistentDataException.class.getName(), vm1);
try {
createPR(vm1, 1);
fail("Should have recieved a SplitDistributedSystemException");
} catch (RMIException e) {
// This should throw a split distributed system exception, because
// We revoked this member.
if (!(e.getCause() instanceof RevokedPersistentDataException)) {
throw e;
}
}
ex.remove();
}
private File getDiskDirectory(VM vm0) {
return (File) vm0.invoke(new SerializableCallable() {
@Override
public Object call() throws Exception {
return getDiskDirs()[0];
}
});
}
/**
* Test that we wait for missing data to come back if the redundancy was 0.
*/
@Test
public void testMissingMemberRedundancy0() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 0);
createPR(vm1, 0);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
final int aVM0Bucket = vm0Buckets.iterator().next();
final int aVM1Bucket = vm1Buckets.iterator().next();
closeCache(vm1);
IgnoredException ex = IgnoredException.addIgnoredException("PartitionOfflineException");
try {
checkReadWriteOperationsWithOfflineMember(vm0, aVM0Bucket, aVM1Bucket);
// Make sure that a newly created member is informed about the offline member
createPR(vm2, 0);
checkReadWriteOperationsWithOfflineMember(vm2, aVM0Bucket, aVM1Bucket);
} finally {
ex.remove();
}
// This should work, because these are new buckets
createData(vm0, NUM_BUCKETS, 113, "a");
createPR(vm1, 0);
// The data should be back online now.
checkData(vm0, 0, 113, "a");
closeCache(vm0);
closeCache(vm1);
}
private void checkReadWriteOperationsWithOfflineMember(VM vm0, final int aVM0Bucket,
final int aVM1Bucket) {
// This should work, because this bucket is still available.
checkData(vm0, aVM0Bucket, aVM0Bucket + 1, "a");
try {
checkData(vm0, aVM1Bucket, aVM1Bucket + 1, null);
fail("Should not have been able to read from missing buckets!");
} catch (RMIException e) {
// We expect a PartitionOfflineException
if (!(e.getCause() instanceof PartitionOfflineException)) {
throw e;
}
}
IgnoredException expect =
IgnoredException.addIgnoredException("PartitionOfflineException", vm0);
// Try a function execution
vm0.invoke(new SerializableRunnable("Test ways to read") {
public void run() {
Cache cache = getCache();
Region region = cache.getRegion(PR_REGION_NAME);
try {
FunctionService.onRegion(region).execute(new TestFunction());
fail("Should not have been able to read from missing buckets!");
} catch (PartitionOfflineException e) {
// expected
}
// This should work, because this bucket is still available.
FunctionService.onRegion(region).withFilter(Collections.singleton(aVM0Bucket))
.execute(new TestFunction());
// This should fail, because this bucket is offline
try {
FunctionService.onRegion(region).withFilter(Collections.singleton(aVM1Bucket))
.execute(new TestFunction());
fail("Should not have been able to read from missing buckets!");
} catch (PartitionOfflineException e) {
// expected
}
// This should fail, because a bucket is offline
try {
HashSet filter = new HashSet();
filter.add(aVM0Bucket);
filter.add(aVM1Bucket);
FunctionService.onRegion(region).withFilter(filter).execute(new TestFunction());
fail("Should not have been able to read from missing buckets!");
} catch (PartitionOfflineException e) {
// expected
}
// This should fail, because a bucket is offline
try {
FunctionService.onRegion(region).execute(new TestFunction());
fail("Should not have been able to read from missing buckets!");
} catch (PartitionOfflineException e) {
// expected
}
try {
cache.getQueryService().newQuery("select * from /" + PR_REGION_NAME).execute();
fail("Should not have been able to read from missing buckets!");
} catch (PartitionOfflineException e) {
// expected
} catch (QueryException e) {
throw new RuntimeException(e);
}
try {
Set keys = region.keySet();
// iterate over all of the keys
for (Object key : keys) {
}
fail("Should not have been able to iterate over keyset");
} catch (PartitionOfflineException e) {
// expected
}
try {
// iterate over all of the keys
for (Object key : region.values()) {
}
fail("Should not have been able to iterate over set");
} catch (PartitionOfflineException e) {
// expected
}
try {
// iterate over all of the keys
for (Object key : region.entrySet()) {
}
fail("Should not have been able to iterate over set");
} catch (PartitionOfflineException e) {
// expected
}
try {
region.get(aVM1Bucket);
fail("Should not have been able to get an offline key");
} catch (PartitionOfflineException e) {
// expected
}
try {
region.containsKey(aVM1Bucket);
fail("Should not have been able to get an offline key");
} catch (PartitionOfflineException e) {
// expected
}
try {
region.getEntry(aVM1Bucket);
fail("Should not have been able to get an offline key");
} catch (PartitionOfflineException e) {
// expected
}
try {
region.invalidate(aVM1Bucket);
fail("Should not have been able to get an offline key");
} catch (PartitionOfflineException e) {
// expected
}
try {
region.destroy(aVM1Bucket);
fail("Should not have been able to get an offline key");
} catch (PartitionOfflineException e) {
// expected
}
}
});
try {
createData(vm0, aVM1Bucket, aVM1Bucket + 1, "b");
fail("Should not have been able to write to missing buckets!");
} catch (RMIException e) {
// We expect to see a partition offline exception here.
if (!(e.getCause() instanceof PartitionOfflineException)) {
throw e;
}
}
expect.remove();
}
/**
* Test to make sure that we recreate a bucket if a member is destroyed
*/
@Test
public void testDestroyedMemberRedundancy0() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
createPR(vm0, 0);
createPR(vm1, 0);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
int aVM0Bucket = vm0Buckets.iterator().next();
int aVM1Bucket = vm1Buckets.iterator().next();
localDestroyPR(vm1);
// This should work, because this bucket is still available.
checkData(vm0, aVM0Bucket, aVM0Bucket + 1, "a");
// This should find that the data is missing, because we destroyed that bucket
checkData(vm0, aVM1Bucket, aVM1Bucket + 1, null);
// We should be able to recreate that bucket
createData(vm0, aVM1Bucket, aVM1Bucket + 1, "b");
createPR(vm1, 0);
// The data should still be available
checkData(vm0, aVM0Bucket, aVM0Bucket + 1, "a");
checkData(vm0, aVM1Bucket, aVM1Bucket + 1, "b");
// This bucket should now be in vm0, because we recreated it there
assertTrue(getBucketList(vm0).contains(aVM1Bucket));
}
/**
* Test to make sure that we recreate a bucket if a member is destroyed
*/
@Test
public void testDestroyedMemberRedundancy1() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
assertEquals(vm0Buckets, vm1Buckets);
int aVM0Bucket = vm0Buckets.iterator().next();
localDestroyPR(vm1);
// This should work, because this bucket is still available.
checkData(vm0, aVM0Bucket, aVM0Bucket + 1, "a");
createPR(vm2, 1);
Set<Integer> vm2Buckets = getBucketList(vm2);
// VM 2 should have created a copy of all of the buckets
assertEquals(vm0Buckets, vm2Buckets);
}
/**
* Test to make sure that we recreate a bucket if a member is revoked
*/
@Test
public void testRevokedMemberRedundancy0() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 0);
createPR(vm1, 0);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
int aVM0Bucket = vm0Buckets.iterator().next();
int aVM1Bucket = vm1Buckets.iterator().next();
closeCache(vm1);
// This should work, because this bucket is still available.
checkData(vm0, aVM0Bucket, aVM0Bucket + 1, "a");
IgnoredException expect =
IgnoredException.addIgnoredException("PartitionOfflineException", vm0);
try {
checkData(vm0, aVM1Bucket, aVM1Bucket + 1, "a");
fail("Should not have been able to read from missing buckets!");
} catch (RMIException e) {
if (!(e.getCause() instanceof PartitionOfflineException)) {
throw e;
}
}
try {
createData(vm0, aVM1Bucket, aVM1Bucket + 1, "b");
fail("Should not have been able to write to missing buckets!");
} catch (RMIException e) {
// We expect to see a partition offline exception here.
if (!(e.getCause() instanceof PartitionOfflineException)) {
throw e;
}
}
expect.remove();
// This should work, because these are new buckets
createData(vm0, NUM_BUCKETS, 113, "a");
revokeKnownMissingMembers(vm2, 1);
createPR(vm2, 0);
// We should be able to use that missing bucket now
checkData(vm2, aVM1Bucket, aVM1Bucket + 1, null);
createData(vm2, aVM1Bucket, aVM1Bucket + 1, "a");
checkData(vm2, aVM1Bucket, aVM1Bucket + 1, "a");
IgnoredException ex =
IgnoredException.addIgnoredException(RevokedPersistentDataException.class.getName(), vm1);
try {
createPR(vm1, 0);
fail("Should have recieved a RevokedPersistentDataException");
} catch (RMIException e) {
// This should throw a split distributed system exception, because
// We revoked this member.
if (!(e.getCause() instanceof RevokedPersistentDataException)) {
throw e;
}
}
ex.remove();
}
/**
* Test to make sure that we recreate a bucket if a member is revoked
*
* @throws Throwable
*/
@Test
public void testRevokedMemberRedundancy1() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
assertEquals(vm0Buckets, vm1Buckets);
closeCache(vm1);
// This should work, because this bucket is still available.
checkData(vm0, 0, NUM_BUCKETS, "a");
createData(vm0, 0, NUM_BUCKETS, "b");
revokeKnownMissingMembers(vm2, 1);
// This should make a copy of all of the buckets,
// because we have revoked VM1.
createPR(vm2, 1);
Set<Integer> vm2Buckets = getBucketList(vm2);
assertEquals(vm1Buckets, vm2Buckets);
IgnoredException ex =
IgnoredException.addIgnoredException(RevokedPersistentDataException.class.getName(), vm1);
try {
createPR(vm1, 1);
fail("Should have recieved a SplitDistributedSystemException");
} catch (RMIException e) {
// This should throw a RevokedPersistentDataException exception, because
// We revoked this member.
if (!(e.getCause() instanceof RevokedPersistentDataException)) {
throw e;
}
}
// Test that we can bounce vm0 and vm1, and still get a RevokedPersistentDataException
// when vm1 tries to recover
closeCache(vm0);
closeCache(vm2);
AsyncInvocation async0 = createPRAsync(vm0, 1);
AsyncInvocation async2 = createPRAsync(vm2, 1);
async0.getResult();
async2.getResult();
try {
createPR(vm1, 1);
fail("Should have recieved a RevokedPersistentDataException");
} catch (RMIException e) {
// This should throw a RevokedPersistentDataException exception, because
// We revoked this member.
if (!(e.getCause() instanceof RevokedPersistentDataException)) {
throw e;
}
}
ex.remove();
// The data shouldn't be affected.
checkData(vm2, 0, NUM_BUCKETS, "b");
}
/**
* Test to make sure that we recreate a bucket if a member is revoked, and that we do it
* immediately if recovery delay is set to 0.
*
* @throws Throwable
*/
@Test
public void testRevokedMemberRedundancy1ImmediateRecovery() throws Throwable {
disconnectAllFromDS(); // I see this test failing because it finds the ds disconnected. Trying
// this as a fix.
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
final VM vm2 = host.getVM(2);
createPR(vm0, 1, 0);
createPR(vm1, 1, 0);
createData(vm0, 0, NUM_BUCKETS, "a");
// This should do nothing because we have satisfied redundancy.
createPR(vm2, 1, 0);
assertEquals(Collections.emptySet(), getBucketList(vm2));
Set<Integer> vm0Buckets = getBucketList(vm0);
final Set<Integer> lostBuckets = getBucketList(vm1);
closeCache(vm1);
// VM2 should pick up the slack
Wait.waitForCriterion(new WaitCriterion() {
public boolean done() {
Set<Integer> vm2Buckets = getBucketList(vm2);
return lostBuckets.equals(vm2Buckets);
}
public String description() {
return "expected to recover " + lostBuckets + " buckets, now have " + getBucketList(vm2);
}
}, 30000, 500, true);
createData(vm0, 0, NUM_BUCKETS, "b");
// VM1 should recover, but it shouldn't host the bucket anymore
createPR(vm1, 1, 0);
// The data shouldn't be affected.
checkData(vm1, 0, NUM_BUCKETS, "b");
// restart everything, and make sure it comes back correctly.
closeCache(vm1);
closeCache(vm0);
closeCache(vm2);
AsyncInvocation async1 = createPRAsync(vm1, 1);
AsyncInvocation async0 = createPRAsync(vm0, 1);
// Make sure we wait for vm2, because it's got the latest copy of the bucket
async1.join(50);
// FAILED On this line
assertTrue(async1.isAlive());
AsyncInvocation async2 = createPRAsync(vm2, 1);
async2.getResult(MAX_WAIT);
async0.getResult(MAX_WAIT);
async1.getResult(MAX_WAIT);
// The data shouldn't be affected.
checkData(vm1, 0, NUM_BUCKETS, "b");
assertEquals(Collections.emptySet(), getBucketList(vm1));
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm0Buckets, getBucketList(vm2));
}
/**
* This test this case we replace buckets where are offline on A by creating them on C We then
* shutdown C and restart A, which recovers those buckets
*/
@Test
public void testBug41340() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
final VM vm2 = host.getVM(2);
createPR(vm0, 1, 0);
createPR(vm1, 1, 0);
createData(vm0, 0, NUM_BUCKETS, "a");
// This should do nothing because we have satisfied redundancy.
createPR(vm2, 1, 0);
assertEquals(Collections.emptySet(), getBucketList(vm2));
Set<Integer> vm0Buckets = getBucketList(vm0);
final Set<Integer> lostBuckets = getBucketList(vm1);
closeCache(vm1);
// VM2 should pick up the slack
waitForBucketRecovery(vm2, lostBuckets);
createData(vm0, 0, NUM_BUCKETS, "b");
// VM1 should recover, but it shouldn't host the bucket anymore
createPR(vm1, 1, 0);
// The data shouldn't be affected.
checkData(vm1, 0, NUM_BUCKETS, "b");
closeCache(vm2);
// The buckets should move back to vm1.
waitForBucketRecovery(vm1, lostBuckets);
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm0Buckets, getBucketList(vm1));
// The data shouldn't be affected.
checkData(vm1, 0, NUM_BUCKETS, "b");
// restart everything, and make sure it comes back correctly.
closeCache(vm0);
closeCache(vm1);
AsyncInvocation async1 = createPRAsync(vm1, 1);
AsyncInvocation async0 = createPRAsync(vm0, 1);
async0.getResult(MAX_WAIT);
async1.getResult(MAX_WAIT);
// The data shouldn't be affected.
checkData(vm1, 0, NUM_BUCKETS, "b");
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm0Buckets, getBucketList(vm1));
}
/**
* Test the with redundancy 1, we restore the same buckets when the missing member comes back
* online.
*/
@Category(FlakyTest.class) // GEODE-1047: thread unsafe test hook, CountDownLatch, async behavior
@Test
public void testMissingMemberRedundancy1() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
closeCache(vm1);
// This should work, because this bucket is still available.
checkData(vm0, 0, NUM_BUCKETS, "a");
removeData(vm0, 0, NUM_BUCKETS / 2);
createData(vm0, NUM_BUCKETS / 2, NUM_BUCKETS, "b");
// This shouldn't create any buckets, because we know there are offline copies
createPR(vm2, 1);
Set<Integer> vm2Buckets = getBucketList(vm2);
assertEquals(Collections.emptySet(), vm2Buckets);
createPR(vm1, 1);
// The data should be back online now.
// and vm1 should have received the latest copy
// of the data.
checkData(vm1, 0, NUM_BUCKETS / 2, null);
checkData(vm1, NUM_BUCKETS / 2, NUM_BUCKETS, "b");
// Make sure we restored the buckets in the right
// place
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(Collections.emptySet(), getBucketList(vm2));
}
/**
* Test that we don't record our old member ID as offline, preventing redundancy recovery in the
* future.
*/
@Test
public void testBug41341() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, 1, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
assertEquals(Collections.singleton(0), vm0Buckets);
assertEquals(Collections.singleton(0), vm1Buckets);
closeCache(vm1);
// This shouldn't create any buckets, because we know there are offline copies
createPR(vm2, 1);
assertEquals(1, getOfflineMembers(0, vm0).size());
// Note, vm2 will consider vm1 as "online" because vm2 doesn't host the bucket
assertEquals(2, getOnlineMembers(0, vm2).size());
Set<Integer> vm2Buckets = getBucketList(vm2);
assertEquals(Collections.emptySet(), vm2Buckets);
createPR(vm1, 1);
// Make sure we restored the buckets in the right
// place
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(Collections.emptySet(), getBucketList(vm2));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm0));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm1));
moveBucket(0, vm1, vm2);
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm0));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm1));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm2));
assertEquals(Collections.singleton(0), getBucketList(vm0));
assertEquals(Collections.emptySet(), getBucketList(vm1));
assertEquals(Collections.singleton(0), getBucketList(vm2));
// Destroy VM2
destroyPR(vm2);
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm0));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm1));
// Close down VM 1
closeCache(vm1);
assertEquals(0, getOfflineMembers(0, vm0).size());
// This should recover redundancy, because vm2 was destroyed
createPR(vm1, 1);
assertEquals(Collections.singleton(0), getBucketList(vm0));
assertEquals(Collections.singleton(0), getBucketList(vm1));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm0));
assertEquals(Collections.emptySet(), getOfflineMembers(0, vm1));
}
/**
* Test that we throw away a bucket if we restored redundancy while that bucket was offline.
*/
@Ignore
@Test
public void testThrowAwayUneededBucket() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, NUM_BUCKETS, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
assertEquals(vm0Buckets, vm1Buckets);
assertEquals(NUM_BUCKETS, vm0Buckets.size());
closeCache(vm1);
createPR(vm2, 1);
checkData(vm0, 0, NUM_BUCKETS, "a");
vm0Buckets = getBucketList(vm0);
Set<Integer> vm2Buckets = getBucketList(vm2);
// Each node should have a full copy of everything
assertEquals(vm0Buckets, vm2Buckets);
assertEquals(NUM_BUCKETS, vm0Buckets.size());
createPR(vm1, 1);
assertEquals(Collections.emptySet(), getBucketList(vm1));
closeCache(vm0);
closeCache(vm1);
closeCache(vm2);
createPR(vm0, 1);
createPR(vm1, 1);
createPR(vm2, 1);
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(Collections.emptySet(), getBucketList(vm1));
assertEquals(vm2Buckets, getBucketList(vm2));
checkData(vm0, 0, NUM_BUCKETS, "a");
}
@Test
public void testMoveBucket() throws Throwable {
int redundancy = 0;
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, redundancy);
createData(vm0, 0, 2, "a");
createPR(vm1, redundancy);
createPR(vm2, redundancy);
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
Set<Integer> vm2Buckets = getBucketList(vm2);
moveBucket(0, vm0, vm1);
moveBucket(0, vm1, vm2);
createData(vm0, 113, 114, "a");
moveBucket(0, vm2, vm0);
createData(vm0, 226, 227, "a");
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(vm2Buckets, getBucketList(vm2));
closeCache(vm0);
closeCache(vm1);
closeCache(vm2);
AsyncInvocation a1 = createPRAsync(vm0, redundancy);
AsyncInvocation a2 = createPRAsync(vm1, redundancy);
AsyncInvocation a3 = createPRAsync(vm2, redundancy);
a1.getResult(MAX_WAIT);
a2.getResult(MAX_WAIT);
a3.getResult(MAX_WAIT);
assertEquals(vm2Buckets, getBucketList(vm2));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(vm0Buckets, getBucketList(vm0));
checkData(vm0, 0, 2, "a");
checkData(vm0, 113, 114, "a");
checkData(vm0, 226, 227, "a");
}
@Category(FlakyTest.class) // GEODE-1582: race in async region creation causing GII when not
// expected
@Test
public void testCleanStop() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
createPR(vm0, 1);
createPR(vm1, 1);
createData(vm0, 0, 1, "a");
fakeCleanShutdown(vm1, 0);
fakeCleanShutdown(vm0, 0);
AsyncInvocation async1 = createPRAsync(vm0, 1);
// [dsmith] Make sure that vm0 is waiting for vm1 to recover
// If VM(0) recovers early, that is a problem, because
// we can now longer do a clean restart
AsyncInvocation async2 = createPRAsync(vm1, 1);
async1.getResult(MAX_WAIT);
async2.getResult(MAX_WAIT);
checkData(vm0, 0, 1, "a");
checkData(vm1, 0, 1, "a");
checkRecoveredFromDisk(vm0, 0, true);
checkRecoveredFromDisk(vm1, 0, true);
closePR(vm0);
closePR(vm1);
async1 = createPRAsync(vm0, 1);
async2 = createPRAsync(vm1, 1);
async1.getResult(MAX_WAIT);
async2.getResult(MAX_WAIT);
checkData(vm0, 0, 1, "a");
checkData(vm1, 0, 1, "a");
checkRecoveredFromDisk(vm0, 0, false);
checkRecoveredFromDisk(vm1, 0, true);
}
@Test
public void testRegisterInterestNoDataStores() {
// Closing the client may log a warning on the server
IgnoredException.addIgnoredException("Connection reset");
IgnoredException.addIgnoredException("SocketTimeoutException");
IgnoredException.addIgnoredException("ServerConnectivityException");
IgnoredException.addIgnoredException("Socket Closed");
IgnoredException.addIgnoredException("Unexpected IOException");
final Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
final Integer serverPort = (Integer) vm0.invoke(new SerializableCallable("create per") {
public Object call() {
Cache cache = getCache();
AttributesFactory af = new AttributesFactory();
PartitionAttributesFactory paf = new PartitionAttributesFactory();
paf.setRedundantCopies(0);
paf.setLocalMaxMemory(0);
af.setPartitionAttributes(paf.create());
af.setDataPolicy(DataPolicy.PARTITION);
cache.createRegion(PR_REGION_NAME, af.create());
CacheServer server = cache.addCacheServer();
server.setPort(AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET));
server.setNotifyBySubscription(true);
try {
server.start();
} catch (IOException e) {
throw new RuntimeException(e);
}
return server.getPort();
}
});
vm1.invoke(new SerializableRunnable("create client") {
public void run() {
Properties props = new Properties();
props.setProperty(MCAST_PORT, "0");
props.setProperty(LOCATORS, "");
getSystem(props);
try {
Cache cache = getCache();
PoolFactory pf = PoolManager.createFactory();
pf.addServer(NetworkUtils.getServerHostName(host), serverPort);
pf.setSubscriptionEnabled(true);
pf.create("pool");
AttributesFactory af = new AttributesFactory();
af.setDataPolicy(DataPolicy.NORMAL);
af.setScope(Scope.LOCAL);
af.setPoolName("pool");
Region region = cache.createRegion(PR_REGION_NAME, af.create());
try {
region.registerInterestRegex(".*");
} catch (ServerOperationException e) {
if (!(e.getCause() instanceof PartitionedRegionStorageException)) {
throw e;
}
}
} finally {
disconnectFromDS();
}
}
});
}
/**
* This test is in here just to test to make sure that we don't get a suspect string with an
* exception during cache closure.
*/
@Test
public void testOverflowCacheClose() {
Cache cache = getCache();
RegionFactory rf = new RegionFactory();
PartitionAttributesFactory paf = new PartitionAttributesFactory();
rf.setPartitionAttributes(paf.create());
rf.setDataPolicy(DataPolicy.PARTITION);
rf.setEvictionAttributes(
EvictionAttributes.createLRUEntryAttributes(50, EvictionAction.OVERFLOW_TO_DISK));
rf.setDiskDirs(getDiskDirs());
Region region = rf.create(PR_REGION_NAME);
region.get(0);
cache.getDistributedSystem().disconnect();
// cache.close();
}
/**
* Test for bug 41336
*/
@Category(FlakyTest.class) // GEODE-1738
@Test
public void testCrashDuringBucketCreation() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
vm0.invoke(new SerializableRunnable("Install observer") {
public void run() {
DistributionMessageObserver.setInstance(new DistributionMessageObserver() {
@Override
public void beforeSendMessage(DistributionManager dm, DistributionMessage msg) {
if (msg instanceof ManageBucketReplyMessage) {
Cache cache = getCache();
disconnectFromDS();
await().atMost(30, SECONDS).until(() -> {
return (cache == null || cache.isClosed());
});
LogWriterUtils.getLogWriter().info("Cache is confirmed closed");
}
}
});
}
});
createPR(vm0, 0);
createPR(vm1, 0);
createData(vm1, 0, 4, "a");
Set<Integer> vm1Buckets = getBucketList(vm1);
// Make sure the test hook ran
vm0.invoke(new SerializableRunnable("Check for no distributed system") {
public void run() {
assertEquals(null, GemFireCacheImpl.getInstance());
}
});
checkData(vm1, 0, 4, "a");
assertEquals(4, vm1Buckets.size());
createPR(vm0, 0);
checkData(vm0, 0, 4, "a");
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(Collections.emptySet(), getBucketList(vm0));
closeCache(vm0);
closeCache(vm1);
AsyncInvocation async0 = createPRAsync(vm0, 0);
AsyncInvocation async1 = createPRAsync(vm1, 0);
async0.getResult();
async1.getResult();
checkData(vm0, 0, 4, "a");
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(Collections.emptySet(), getBucketList(vm0));
}
@Test
public void testNestedPRRegions() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
int numBuckets = 50;
createNestedPR(vm0);
createNestedPR(vm1);
createNestedPR(vm2);
createData(vm0, 0, numBuckets, "a", "parent1/" + PR_REGION_NAME);
createData(vm0, 0, numBuckets, "b", "parent2/" + PR_REGION_NAME);
checkData(vm2, 0, numBuckets, "a", "parent1/" + PR_REGION_NAME);
checkData(vm2, 0, numBuckets, "b", "parent2/" + PR_REGION_NAME);
Set<Integer> vm1_0Buckets = getBucketList(vm0, "parent1/" + PR_REGION_NAME);
Set<Integer> vm1_1Buckets = getBucketList(vm1, "parent1/" + PR_REGION_NAME);
Set<Integer> vm1_2Buckets = getBucketList(vm2, "parent1/" + PR_REGION_NAME);
Set<Integer> vm2_0Buckets = getBucketList(vm0, "parent2/" + PR_REGION_NAME);
Set<Integer> vm2_1Buckets = getBucketList(vm1, "parent2/" + PR_REGION_NAME);
Set<Integer> vm2_2Buckets = getBucketList(vm2, "parent2/" + PR_REGION_NAME);
closeCache(vm0);
closeCache(vm1);
closeCache(vm2);
AsyncInvocation async0 = createNestedPRAsync(vm0);
// [dsmith] Make sure that vm0 is waiting for vm1 and vm2 to recover
// If VM(0) recovers early, that is a problem, because vm1
// has newer data
Thread.sleep(50);
assertTrue(async0.isAlive());
AsyncInvocation async1 = createNestedPRAsync(vm1);
AsyncInvocation async2 = createNestedPRAsync(vm2);
async0.getResult();
async1.getResult();
async2.getResult();
assertEquals(vm1_0Buckets, getBucketList(vm0, "parent1/" + PR_REGION_NAME));
assertEquals(vm1_1Buckets, getBucketList(vm1, "parent1/" + PR_REGION_NAME));
assertEquals(vm1_2Buckets, getBucketList(vm2, "parent1/" + PR_REGION_NAME));
assertEquals(vm2_0Buckets, getBucketList(vm0, "parent2/" + PR_REGION_NAME));
assertEquals(vm2_1Buckets, getBucketList(vm1, "parent2/" + PR_REGION_NAME));
assertEquals(vm2_2Buckets, getBucketList(vm2, "parent2/" + PR_REGION_NAME));
checkData(vm0, 0, numBuckets, "a", "parent1/" + PR_REGION_NAME);
checkData(vm0, 0, numBuckets, "b", "parent2/" + PR_REGION_NAME);
createData(vm1, numBuckets, 113, "c", "parent1/" + PR_REGION_NAME);
createData(vm1, numBuckets, 113, "d", "parent2/" + PR_REGION_NAME);
checkData(vm2, numBuckets, 113, "c", "parent1/" + PR_REGION_NAME);
checkData(vm2, numBuckets, 113, "d", "parent2/" + PR_REGION_NAME);
}
@Test
public void testCloseDuringRegionOperation() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
createPR(vm0, 1, -1, 1);
createPR(vm1, 1, -1, 1);
// Make sure we create a bucket
createData(vm1, 0, 1, "a");
// Try to make sure there are some operations in flight while closing the cache
SerializableCallable createData = new SerializableCallable() {
public Object call() {
Cache cache = getCache();
Region region = cache.getRegion(PR_REGION_NAME);
int i = 0;
while (true) {
try {
region.put(0, i);
i++;
} catch (CacheClosedException e) {
break;
}
}
return i - 1;
}
};
AsyncInvocation asyncCreate = vm0.invokeAsync(createData);
SerializableCallable waitForIntValue = new SerializableCallable() {
public Object call() {
Cache cache = getCache();
Region region = cache.getRegion(PR_REGION_NAME);
// The value is initialized as a String so wait
// for it to be changed to an Integer.
await().atMost(60, SECONDS).until(() -> {
return region.get(0) instanceof Integer;
});
return region.get(0);
}
};
vm0.invoke(waitForIntValue);
vm1.invoke(waitForIntValue);
AsyncInvocation close0 = closeCacheAsync(vm0);
AsyncInvocation close1 = closeCacheAsync(vm1);
// wait for the close to finish
close0.getResult();
close1.getResult();
Integer lastSuccessfulInt = (Integer) asyncCreate.getResult();
System.err.println("Cache was closed on integer " + lastSuccessfulInt);
AsyncInvocation create1 = createPRAsync(vm0, 1, -1, 1);
AsyncInvocation create2 = createPRAsync(vm1, 1, -1, 1);
create1.getResult(MAX_WAIT);
create2.getResult(MAX_WAIT);
SerializableCallable getValue = new SerializableCallable() {
public Object call() {
Cache cache = getCache();
Region region = cache.getRegion(PR_REGION_NAME);
int value = (Integer) region.get(0);
return value;
}
};
int vm1Value = (Integer) vm0.invoke(getValue);
int vm2Value = (Integer) vm1.invoke(getValue);
assertEquals(vm1Value, vm2Value);
assertTrue("value = " + vm1Value + ", lastSuccessfulInt=" + lastSuccessfulInt,
vm1Value == lastSuccessfulInt || vm1Value == lastSuccessfulInt + 1);
}
/**
* Test for bug 4226. 1. Member A has the bucket 2. Member B starts creating the bucket. It tells
* member A that it hosts the bucket 3. Member A crashes 4. Member B destroys the bucket and
* throws a partition offline exception, because it wasn't able to complete initialization. 5.
* Member A recovers, and gets stuck waiting for member B.
*
* @throws Throwable
*/
@Category(FlakyTest.class) // GEODE-1208: time sensitive, multiple non-thread-safe test hooks,
// async actions
@Test
public void testBug42226() throws Exception {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
// Add a hook which will disconnect from the distributed
// system when the initial image message shows up.
vm0.invoke(new SerializableRunnable() {
public void run() {
DistributionMessageObserver.setInstance(new DistributionMessageObserver() {
@Override
public void beforeProcessMessage(DistributionManager dm, DistributionMessage message) {
if (message instanceof RequestImageMessage) {
RequestImageMessage rim = (RequestImageMessage) message;
// Don't disconnect until we see a bucket
if (rim.regionPath.contains("_B_")) {
DistributionMessageObserver.setInstance(null);
disconnectFromDS();
}
}
}
@Override
public void afterProcessMessage(DistributionManager dm, DistributionMessage message) {
}
});
}
});
LogWriterUtils.getLogWriter().info("Creating region in VM0");
createPR(vm0, 1, 0, 1);
// Make sure we create a bucket
createData(vm0, 0, 1, "a");
// This should recover redundancy, which should cause vm0 to disconnect
IgnoredException ex = IgnoredException.addIgnoredException("PartitionOfflineException");
try {
LogWriterUtils.getLogWriter().info("Creating region in VM1");
createPR(vm1, 1, 0, 1);
// Make sure get a partition offline exception
try {
createData(vm1, 0, 1, "a");
} catch (RMIException e) {
// We expect a PartitionOfflineException
if (!(e.getCause() instanceof PartitionOfflineException)) {
throw e;
}
}
} finally {
ex.remove();
}
// Make sure vm0 is really disconnected (avoids a race with the observer).
vm0.invoke(new SerializableRunnable() {
public void run() {
disconnectFromDS();
}
});
// This should recreate the bucket
AsyncInvocation async1 = createPRAsync(vm0, 1, 0, 1);
async1.getResult(MAX_WAIT);
checkData(vm1, 0, 1, "a");
}
/**
* A test to make sure that we allow the PR to be used after at least one copy of every bucket is
* recovered, but before the secondaries are initialized.
*
* @throws Throwable
*/
@Test
public void testAllowRegionUseBeforeRedundancyRecovery() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
final int redundancy = 1;
int numBuckets = 20;
createPR(vm0, redundancy);
createPR(vm1, redundancy);
createPR(vm2, redundancy);
createData(vm0, 0, numBuckets, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
Set<Integer> vm2Buckets = getBucketList(vm2);
closeCache(vm0);
closeCache(vm1);
closeCache(vm2);
SerializableRunnable slowGII = new SerializableRunnable("Slow down GII") {
@SuppressWarnings("synthetic-access")
public void run() {
InternalResourceManager.setResourceObserver(new RecoveryObserver());
DistributionMessageObserver.setInstance(new BlockGIIMessageObserver());
}
};
SerializableRunnable resetSlowGII = new SerializableRunnable("Unset the slow GII") {
public void run() {
BlockGIIMessageObserver messageObserver =
(BlockGIIMessageObserver) DistributionMessageObserver.setInstance(null);
RecoveryObserver recoveryObserver =
(RecoveryObserver) InternalResourceManager.getResourceObserver();
messageObserver.cdl.countDown();
try {
recoveryObserver.recoveryDone.await();
} catch (InterruptedException e) {
Assert.fail("Interrupted", e);
}
InternalResourceManager.setResourceObserver(null);
}
};
try {
vm0.invoke(slowGII);
vm1.invoke(slowGII);
vm2.invoke(slowGII);
SerializableRunnable createPR = new SerializableRunnable("create PR") {
public void run() {
Cache cache = getCache();
RegionAttributes attr = getPersistentPRAttributes(redundancy, -1, cache, 113, true);
cache.createRegion(PR_REGION_NAME, attr);
}
};
AsyncInvocation a1 = vm0.invokeAsync(createPR);
AsyncInvocation a2 = vm1.invokeAsync(createPR);
AsyncInvocation a3 = vm2.invokeAsync(createPR);
a1.getResult(MAX_WAIT);
a2.getResult(MAX_WAIT);
a3.getResult(MAX_WAIT);
// Make sure all of the primary are available.
checkData(vm0, 0, numBuckets, "a");
createData(vm0, 113, 113 + numBuckets, "b");
// But none of the secondaries
Set<Integer> vm0InitialBuckets = getBucketList(vm0);
Set<Integer> vm1InitialBuckets = getBucketList(vm1);
Set<Integer> vm2InitialBuckets = getBucketList(vm2);
assertEquals(
"vm0=" + vm0InitialBuckets + ",vm1=" + vm1InitialBuckets + "vm2=" + vm2InitialBuckets,
numBuckets,
vm0InitialBuckets.size() + vm1InitialBuckets.size() + vm2InitialBuckets.size());
} finally {
// Reset the slow GII flag, and wait for the redundant buckets
// to be recovered.
AsyncInvocation reset0 = vm0.invokeAsync(resetSlowGII);
AsyncInvocation reset1 = vm1.invokeAsync(resetSlowGII);
AsyncInvocation reset2 = vm2.invokeAsync(resetSlowGII);
reset0.getResult(MAX_WAIT);
reset1.getResult(MAX_WAIT);
reset2.getResult(MAX_WAIT);
}
// Now we better have all of the buckets
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(vm2Buckets, getBucketList(vm2));
// Make sure the members see the data recovered from disk
// in those secondary buckets
checkData(vm0, 0, numBuckets, "a");
checkData(vm1, 0, numBuckets, "a");
// Make sure the members see the new updates
// in those secondary buckets
checkData(vm0, 113, 113 + numBuckets, "b");
checkData(vm1, 113, 113 + numBuckets, "b");
}
/**
* A test for bug 41436. If the GII source crashes before the GII is complete, we need to make
* sure that later we can recover redundancy.
*/
@Test
public void testCrashDuringBucketGII() {
IgnoredException.addIgnoredException("PartitionOfflineException");
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createData(vm0, 0, 1, "value");
// Add an observer which will close the cache when the GII starts
vm0.invoke(new SerializableRunnable("Set crashing observer") {
public void run() {
DistributionMessageObserver.setInstance(new DistributionMessageObserver() {
@Override
public void beforeProcessMessage(DistributionManager dm, DistributionMessage message) {
if (message instanceof RequestImageMessage) {
RequestImageMessage rim = (RequestImageMessage) message;
if (rim.regionPath.contains("_0")) {
DistributionMessageObserver.setInstance(null);
getCache().close();
}
}
}
});
}
});
createPR(vm1, 1);
// Make sure vm1 didn't create the bucket
assertEquals(Collections.emptySet(), getBucketList(vm1));
createPR(vm0, 1, 0);
// Make sure vm0 recovers the bucket
assertEquals(Collections.singleton(0), getBucketList(vm0));
// vm1 should satisfy redundancy for the bucket as well
assertEquals(Collections.singleton(0), getBucketList(vm1));
}
/**
* Another test for bug 41436. If the GII source crashes before the GII is complete, we need to
* make sure that later we can recover redundancy.
*
* In this test case, we bring the GII down before we bring the source back up, to make sure the
* source still discovers that the GII target is no longer hosting the bucket.
*
* @throws InterruptedException
*/
@Test
public void testCrashDuringBucketGII2() throws InterruptedException {
IgnoredException.addIgnoredException("PartitionOfflineException");
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
final VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
createPR(vm0, 1);
createData(vm0, 0, 1, "value");
// Add an observer which will close the cache when the GII starts
vm0.invoke(new SerializableRunnable("Set crashing observer") {
public void run() {
DistributionMessageObserver.setInstance(new DistributionMessageObserver() {
@Override
public void beforeProcessMessage(DistributionManager dm, DistributionMessage message) {
if (message instanceof RequestImageMessage) {
RequestImageMessage rim = (RequestImageMessage) message;
if (rim.regionPath.contains("_0")) {
DistributionMessageObserver.setInstance(null);
getCache().close();
}
}
}
});
}
});
createPR(vm1, 1);
// Make sure vm1 didn't create the bucket
assertEquals(Collections.emptySet(), getBucketList(vm1));
closeCache(vm1);
AsyncInvocation async0 = createPRAsync(vm0, 1, 0, 113);
async0.join(500);
// vm0 should get stuck waiting for vm1 to recover from disk,
// because vm0 thinks vm1 has the bucket
assertTrue(async0.isAlive());
createPR(vm1, 1, 0);
// Make sure vm0 recovers the bucket
assertEquals(Collections.singleton(0), getBucketList(vm0));
// vm1 should satisfy redundancy for the bucket as well
WaitCriterion ev = new WaitCriterion() {
public boolean done() {
return (Collections.singleton(0).equals(getBucketList(vm1)));
}
public String description() {
return null;
}
};
Wait.waitForCriterion(ev, 30 * 1000, 200, true);
assertEquals(Collections.singleton(0), getBucketList(vm1));
}
@Test
public void testCleanupAfterConflict() throws Exception {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
createPR(vm0, 0);
// create some buckets
createData(vm0, 0, 2, "a");
closePR(vm0);
createPR(vm1, 0);
// create an overlapping bucket
// TODO - this test hangs if vm1 has some buckets that vm0
// does not have. The problem is that when vm0 starts up and gets a conflict
// on some buckets, it updates it's view for other buckets.
// createData(vm1, 1, 3, "a");
createData(vm1, 1, 2, "a");
// this should throw a conflicting data exception.
IgnoredException expect =
IgnoredException.addIgnoredException("ConflictingPersistentDataException", vm0);
try {
createPR(vm0, 0);
fail("should have seen a conflicting data exception");
} catch (Exception e) {
if (!(e.getCause() instanceof ConflictingPersistentDataException)) {
throw e;
}
} finally {
expect.remove();
}
// This will hang, if this test fails.
// TODO - DAN - I'm not even sure what this means here?
// It seems like if anything, vm1 should not have updated it's persistent
// view from vm0 because vm0 was in conflict!
// In fact, this is a bit of a problem, because now vm1 is dependent
// on vm vm0.
expect = IgnoredException.addIgnoredException("PartitionOfflineException", vm1);
try {
createData(vm1, 0, 1, "a");
fail("Should have seen a PartitionOfflineException for bucket 0");
} catch (Exception e) {
if (!(e.getCause() instanceof PartitionOfflineException)) {
throw e;
}
} finally {
expect.remove();
}
closePR(vm1);
// This should succeed, vm0 should not have persisted any view
// information from vm1
createPR(vm0, 0);
checkData(vm0, 0, 2, "a");
checkData(vm0, 2, 3, null);
}
/**
* Test to make sure that primaries are rebalanced after recovering from disk.
*/
@Test
public void testPrimaryBalanceAfterRecovery() throws Throwable {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
int numBuckets = 30;
createPR(vm0, 1);
createPR(vm1, 1);
createPR(vm2, 1);
createData(vm0, 0, numBuckets, "a");
Set<Integer> vm0Buckets = getBucketList(vm0);
Set<Integer> vm1Buckets = getBucketList(vm1);
Set<Integer> vm2Buckets = getBucketList(vm2);
// We expect to see 10 primaries on each node since we have 30 buckets
Set<Integer> vm0Primaries = getPrimaryBucketList(vm0);
assertEquals("Expected 10 primaries " + vm0Primaries, 10, vm0Primaries.size());
Set<Integer> vm1Primaries = getPrimaryBucketList(vm1);
assertEquals("Expected 10 primaries " + vm1Primaries, 10, vm1Primaries.size());
Set<Integer> vm2Primaries = getPrimaryBucketList(vm2);
assertEquals("Expected 10 primaries " + vm2Primaries, 10, vm2Primaries.size());
// bounce vm0
closeCache(vm0);
createPR(vm0, 1);
waitForBucketRecovery(vm0, vm0Buckets);
assertEquals(vm0Buckets, getBucketList(vm0));
assertEquals(vm1Buckets, getBucketList(vm1));
assertEquals(vm2Buckets, getBucketList(vm2));
/*
* Though we make best effort to get the primaries evenly distributed after bouncing the VM. In
* some instances one primary could end up with 9 primaries such as GEODE-1056. And as asserts
* fail fast we don`t get to verify if other vm`s end up having 11 primaries. So rather than
* asserting for 10 primaries in each VM, try asserting total primaries.
*/
vm0Primaries = getPrimaryBucketList(vm0);
vm1Primaries = getPrimaryBucketList(vm1);
vm2Primaries = getPrimaryBucketList(vm2);
int totalPrimaries = vm0Primaries.size() + vm1Primaries.size() + vm2Primaries.size();
assertEquals("Expected a total of " + numBuckets + " primaries:", numBuckets, totalPrimaries);
/*
* As worst case the primaries sould be 1 less than evenly being distributed, so assert
* primaries to be between 9 and 11 (both inclusive).
*/
assertThat(vm0Primaries.size()).isBetween(9, 11);
assertThat(vm0Primaries.size()).isBetween(9, 11);
assertThat(vm0Primaries.size()).isBetween(9, 11);
}
@Test
public void testConcurrencyChecksEnabled() {
Host host = Host.getHost(0);
VM vm0 = host.getVM(0);
VM vm1 = host.getVM(1);
VM vm2 = host.getVM(2);
VM vm3 = host.getVM(3);
final String regionName = getName();
SerializableCallable createPR = new SerializableCallable() {
@Override
public Object call() throws Exception {
RegionFactory<Integer, String> rf =
getCache().createRegionFactory(RegionShortcut.PARTITION_PERSISTENT);
Region<Integer, String> r = rf.create(regionName);
assertTrue(r.getAttributes().getConcurrencyChecksEnabled());
return null;
}
};
SerializableCallable createPRProxy = new SerializableCallable() {
@Override
public Object call() throws Exception {
RegionFactory<Integer, String> rf =
getCache().createRegionFactory(RegionShortcut.PARTITION_PROXY);
Region<Integer, String> r = rf.create(regionName);
return null;
}
};
vm0.invoke(createPRProxy);
vm1.invoke(createPR);
vm2.invoke(createPR);
vm3.invoke(createPRProxy);
SerializableCallable verifyConcurrenyChecks = new SerializableCallable() {
@Override
public Object call() throws Exception {
Region r = getCache().getRegion(regionName);
assertTrue(r.getAttributes().getConcurrencyChecksEnabled());
return null;
}
};
vm0.invoke(verifyConcurrenyChecks);
vm3.invoke(verifyConcurrenyChecks);
}
@Test
public void testNonPersistentProxy() {
Host host = Host.getHost(0);
VM vm1 = host.getVM(0);
VM vm2 = host.getVM(1);
VM vm3 = host.getVM(2);
final String regionName = getName();
SerializableCallable createAccessor = new SerializableCallable() {
@Override
public Object call() throws Exception {
getCache().createRegionFactory(RegionShortcut.PARTITION_PROXY).create(regionName);
return null;
}
};
vm1.invoke(createAccessor);
vm2.invoke(new SerializableCallable() {
@Override
public Object call() throws Exception {
Region r =
getCache().createRegionFactory(RegionShortcut.PARTITION_PERSISTENT).create(regionName);
assertTrue(r.getAttributes().getConcurrencyChecksEnabled());
return null;
}
});
vm3.invoke(createAccessor);
SerializableCallable verifyConcurrencyChecks = new SerializableCallable() {
@Override
public Object call() throws Exception {
Region r = getCache().getRegion(regionName);
assertTrue(r.getAttributes().getConcurrencyChecksEnabled());
return null;
}
};
vm1.invoke(verifyConcurrencyChecks);
vm3.invoke(verifyConcurrencyChecks);
}
@Test
public void testReplicateAfterPersistent() {
Host host = Host.getHost(0);
VM vm1 = host.getVM(0);
VM vm2 = host.getVM(1);
VM vm3 = host.getVM(2);
final String regionName = getName();
SerializableCallable createPersistentReplicate = new SerializableCallable() {
@Override
public Object call() throws Exception {
Region r =
getCache().createRegionFactory(RegionShortcut.REPLICATE_PERSISTENT).create(regionName);
return null;
}
};
SerializableCallable createNonPersistentReplicate = new SerializableCallable() {
@Override
public Object call() throws Exception {
Region r = getCache().createRegionFactory(RegionShortcut.REPLICATE).create(regionName);
return null;
}
};
vm1.invoke(createPersistentReplicate);
vm2.invoke(createNonPersistentReplicate);
vm3.invoke(createPersistentReplicate);
}
private static final class RecoveryObserver
extends InternalResourceManager.ResourceObserverAdapter {
final CountDownLatch recoveryDone = new CountDownLatch(1);
@Override
public void rebalancingOrRecoveryFinished(Region region) {
if (region.getName().equals(PR_REGION_NAME)) {
recoveryDone.countDown();
}
}
}
private static class TestFunction implements Function, Serializable {
public void execute(FunctionContext context) {
context.getResultSender().lastResult(null);
}
public String getId() {
return TestFunction.class.getSimpleName();
}
public boolean hasResult() {
return true;
}
public boolean optimizeForWrite() {
return false;
}
public boolean isHA() {
return false;
}
}
private static class BlockGIIMessageObserver extends DistributionMessageObserver {
CountDownLatch cdl = new CountDownLatch(1);
@Override
public void beforeSendMessage(DistributionManager dm, DistributionMessage message) {
if (message instanceof RequestImageMessage) {
RequestImageMessage rim = (RequestImageMessage) message;
// make sure this is a bucket region doing a GII
if (rim.regionPath.contains("B_")) {
try {
cdl.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
}
}
private static class TestCustomExpiration implements CustomExpiry {
public void close() {
// do nothing
}
public ExpirationAttributes getExpiry(Entry entry) {
return new ExpirationAttributes(
(entry.getKey().hashCode() + entry.getValue().hashCode()) % 100,
ExpirationAction.INVALIDATE);
}
}
}