package org.infinispan.distribution;
import static org.infinispan.test.Exceptions.expectException;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertTrue;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
import org.infinispan.Cache;
import org.infinispan.commands.read.GetCacheEntryCommand;
import org.infinispan.commands.read.GetKeyValueCommand;
import org.infinispan.commands.remote.ClusteredGetCommand;
import org.infinispan.commons.CacheException;
import org.infinispan.configuration.cache.CacheMode;
import org.infinispan.configuration.cache.ConfigurationBuilder;
import org.infinispan.container.entries.ImmortalCacheValue;
import org.infinispan.context.InvocationContext;
import org.infinispan.context.impl.FlagBitSets;
import org.infinispan.interceptors.DDAsyncInterceptor;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.remoting.RemoteException;
import org.infinispan.remoting.inboundhandler.DeliverOrder;
import org.infinispan.remoting.responses.CacheNotFoundResponse;
import org.infinispan.remoting.responses.Response;
import org.infinispan.remoting.responses.SuccessfulResponse;
import org.infinispan.remoting.rpc.ResponseMode;
import org.infinispan.remoting.rpc.RpcManager;
import org.infinispan.remoting.rpc.RpcOptions;
import org.infinispan.remoting.transport.Address;
import org.infinispan.remoting.transport.jgroups.JGroupsAddress;
import org.infinispan.remoting.transport.jgroups.JGroupsTransport;
import org.infinispan.statetransfer.StateTransferInterceptor;
import org.infinispan.test.Exceptions;
import org.infinispan.test.MultipleCacheManagersTest;
import org.infinispan.test.TestingUtil;
import org.infinispan.test.fwk.CleanupAfterMethod;
import org.infinispan.util.ByteString;
import org.infinispan.util.concurrent.TimeoutException;
import org.jgroups.JChannel;
import org.jgroups.View;
import org.jgroups.protocols.pbcast.GMS;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
@Test(groups = "functional")
@CleanupAfterMethod
public class RemoteGetFailureTest extends MultipleCacheManagersTest {
private Object key;
@Override
protected void createCacheManagers() throws Throwable {
ConfigurationBuilder builder = getDefaultClusteredCacheConfig(CacheMode.DIST_SYNC);
// cache stop takes quite long when the view splits
builder.clustering().stateTransfer().timeout(10, TimeUnit.SECONDS);
builder.clustering().remoteTimeout(5, TimeUnit.SECONDS);
createCluster(builder, 3);
waitForClusterToForm();
key = getKeyForCache(cache(1), cache(2));
}
@AfterMethod(alwaysRun = true)
@Override
protected void clearContent() throws Throwable {
// When we send a ClearCommand from node that does not have a newer view installed to node that has already
// installed a view without the sender, the message is dropped and the ClearCommand has to time out.
// Therefore, don't issue the clear command at all.
TestingUtil.killCacheManagers(cacheManagers);
cacheManagers.clear();
}
public void testDelayed(Method m) {
initAndCheck(m);
CountDownLatch release = new CountDownLatch(1);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(null, release), 0);
long requestStart = System.nanoTime();
assertEquals(m.getName(), cache(0).get(key));
long requestEnd = System.nanoTime();
long remoteTimeout = cache(0).getCacheConfiguration().clustering().remoteTimeout();
long delay = TimeUnit.NANOSECONDS.toMillis(requestEnd - requestStart);
assertTrue(delay < remoteTimeout);
release.countDown();
}
public void testExceptionFromBothOwners(Method m) {
initAndCheck(m);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new FailingInterceptor(), 0);
cache(2).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new FailingInterceptor(), 0);
expectException(RemoteException.class, CacheException.class, "Injected", () -> cache(0).get(key));
}
public void testExceptionFromOneOwnerOtherTimeout(Method m) {
initAndCheck(m);
CountDownLatch release = new CountDownLatch(1);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new FailingInterceptor(), 0);
cache(2).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(null, release), 0);
// It's not enough to test if the exception is TimeoutException as we want the remote get fail immediately
// upon exception.
// We cannot mock TimeService in ScheduledExecutor, so we have to measure if the response was fast
// remoteTimeout is gracious enough (15s) to not cause false positives
long requestStart = System.nanoTime();
try {
expectException(RemoteException.class, CacheException.class, "Injected", () -> cache(0).get(key));
long exceptionThrown = System.nanoTime();
long remoteTimeout = cache(0).getCacheConfiguration().clustering().remoteTimeout();
long delay = TimeUnit.NANOSECONDS.toMillis(exceptionThrown - requestStart);
assertTrue(delay < remoteTimeout);
} finally {
release.countDown();
}
}
public void testBothOwnersSuspected(Method m) throws ExecutionException, InterruptedException {
initAndCheck(m);
CountDownLatch arrival = new CountDownLatch(2);
CountDownLatch release = new CountDownLatch(1);
AtomicInteger thrown = new AtomicInteger();
AtomicInteger retried = new AtomicInteger();
cache(0).getAdvancedCache().getAsyncInterceptorChain().addInterceptorAfter(new CheckOTEInterceptor(thrown, retried), StateTransferInterceptor.class);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release), 0);
cache(2).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release), 0);
Future<Object> future = fork(() -> cache(0).get(key));
assertTrue(arrival.await(10, TimeUnit.SECONDS));
installNewView(cache(0), cache(0));
// The entry was lost, so we'll get null
assertNull(future.get());
// Since we've lost all owners
assertEquals(1, thrown.get()); // OwnersLostException
assertEquals(0, retried.get());
release.countDown();
}
public void testOneOwnerSuspected(Method m) throws ExecutionException, InterruptedException {
initAndCheck(m);
CountDownLatch arrival = new CountDownLatch(2);
CountDownLatch release1 = new CountDownLatch(1);
CountDownLatch release2 = new CountDownLatch(1);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release1), 0);
cache(2).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release2), 0);
Future<?> future = fork(() -> {
assertEquals(cache(0).get(key), m.getName());
});
assertTrue(arrival.await(10, TimeUnit.SECONDS));
installNewView(cache(0), cache(0), cache(1));
// suspection should not fail the operation
assertFalse(future.isDone());
release1.countDown();
future.get();
release2.countDown();
}
public void testOneOwnerSuspectedNoFilter(Method m) throws ExecutionException, InterruptedException {
initAndCheck(m);
CountDownLatch arrival = new CountDownLatch(2);
CountDownLatch release1 = new CountDownLatch(1);
CountDownLatch release2 = new CountDownLatch(1);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release1), 0);
cache(2).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release2), 0);
Address address1 = address(1);
Address address2 = address(2);
List<Address> owners = Arrays.asList(address1, address2);
ClusteredGetCommand clusteredGet = new ClusteredGetCommand(key, ByteString.fromString(cache(0).getName()), 0);
final int timeout = 15;
RpcOptions rpcOptions = new RpcOptions(timeout, TimeUnit.SECONDS, null, ResponseMode.WAIT_FOR_VALID_RESPONSE, DeliverOrder.NONE);
RpcManager rpcManager = cache(0).getAdvancedCache().getRpcManager();
CompletableFuture<Map<Address, Response>> future = rpcManager.invokeRemotelyAsync(owners, clusteredGet, rpcOptions);
assertTrue(arrival.await(10, TimeUnit.SECONDS));
installNewView(cache(0), cache(0), cache(1));
// RequestCorrelator processes the view asynchronously, so we need to wait a bit for node 2 to be suspected
Thread.sleep(100);
// suspection should not fail the operation
assertFalse(future.isDone());
long requestAllowed = System.nanoTime();
release1.countDown();
Map<Address, Response> responses = future.get();
long requestCompleted = System.nanoTime();
long requestSeconds = TimeUnit.NANOSECONDS.toSeconds(requestCompleted - requestAllowed);
assertTrue("Request took too long: " + requestSeconds, requestSeconds < timeout / 2);
assertEquals(SuccessfulResponse.create(new ImmortalCacheValue(m.getName())), responses.get(address1));
assertEquals(CacheNotFoundResponse.INSTANCE, responses.get(address2));
release2.countDown();
}
public void testOneOwnerSuspectedOtherTimeout(Method m) throws ExecutionException, InterruptedException {
initAndCheck(m);
CountDownLatch arrival = new CountDownLatch(2);
CountDownLatch release = new CountDownLatch(1);
cache(1).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release), 0);
cache(2).getAdvancedCache().getAsyncInterceptorChain().addInterceptor(new DelayingInterceptor(arrival, release), 0);
Future<?> future = fork(() -> {
long start = System.nanoTime();
Exceptions.expectException(TimeoutException.class, () -> cache(0).get(key));
long end = System.nanoTime();
long duration = TimeUnit.NANOSECONDS.toMillis(end - start);
assertTrue("Request did not wait for long enough: " + duration,
duration >= cache(0).getCacheConfiguration().clustering().remoteTimeout());
});
assertTrue(arrival.await(10, TimeUnit.SECONDS));
installNewView(cache(0), cache(0), cache(1));
// suspection should not fail the operation
assertFalse(future.isDone());
future.get();
release.countDown();
}
private void initAndCheck(Method m) {
cache(0).put(key, m.getName());
assertEquals(m.getName(), cache(1).get(key));
assertEquals(m.getName(), cache(2).get(key));
}
private void installNewView(Cache installing, Cache... cachesInView) {
JGroupsTransport transport = (JGroupsTransport) installing.getCacheManager().getTransport();
JChannel channel = transport.getChannel();
org.jgroups.Address[] members = Stream.of(cachesInView)
.map(c -> ((JGroupsAddress) address(c)).getJGroupsAddress())
.toArray(org.jgroups.Address[]::new);
View view = View.create(members[0], transport.getViewId() + 1, members);
((GMS) channel.getProtocolStack().findProtocol(GMS.class)).installView(view);
}
private static class FailingInterceptor extends DDAsyncInterceptor {
@Override
public Object visitGetCacheEntryCommand(InvocationContext ctx, GetCacheEntryCommand command) throws Throwable {
throw new CacheException("Injected");
}
}
private static class DelayingInterceptor extends DDAsyncInterceptor {
private final CountDownLatch arrival;
private final CountDownLatch release;
private DelayingInterceptor(CountDownLatch arrival, CountDownLatch release) {
this.arrival = arrival;
this.release = release;
}
@Override
public Object visitGetCacheEntryCommand(InvocationContext ctx, GetCacheEntryCommand command) throws Throwable {
if (arrival != null) arrival.countDown();
// the timeout has to be longer than remoteTimeout!
release.await(30, TimeUnit.SECONDS);
return super.visitGetCacheEntryCommand(ctx, command);
}
}
private class CheckOTEInterceptor extends DDAsyncInterceptor {
private final AtomicInteger thrown;
private final AtomicInteger retried;
public CheckOTEInterceptor(AtomicInteger thrown, AtomicInteger retried) {
this.thrown = thrown;
this.retried = retried;
}
@Override
public Object visitGetKeyValueCommand(InvocationContext ctx, GetKeyValueCommand command) throws Throwable {
if (command.hasAnyFlag(FlagBitSets.COMMAND_RETRY)) {
retried.incrementAndGet();
}
return invokeNextAndExceptionally(ctx, command, (rCtx, rCommand, t) -> {
thrown.incrementAndGet();
throw t;
});
}
}
}