/* * Copyright (C) 2012-2015 DataStax Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datastax.driver.core; import com.datastax.driver.core.Host.State; import com.datastax.driver.core.policies.ConstantReconnectionPolicy; import com.datastax.driver.core.policies.DelegatingLoadBalancingPolicy; import com.datastax.driver.core.policies.LoadBalancingPolicy; import com.datastax.driver.core.policies.RoundRobinPolicy; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.Uninterruptibles; import org.testng.annotations.Test; import java.io.IOException; import java.net.InetSocketAddress; import java.util.Collections; import java.util.Iterator; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import static com.datastax.driver.core.Assertions.assertThat; import static com.datastax.driver.core.CreateCCM.TestMode.PER_METHOD; import static java.util.concurrent.TimeUnit.SECONDS; import static org.mockito.Mockito.*; /** * Scenarios where a Cluster loses connection to a host and reconnects. */ @CreateCCM(PER_METHOD) public class ReconnectionTest extends CCMTestsSupport { private final int reconnectionDelayMillis = 1000; @CCMConfig(dirtiesContext = true, numberOfNodes = 2, createCluster = false) @Test(groups = "long") public void should_reconnect_after_full_connectivity_loss() throws InterruptedException { Cluster cluster = register(Cluster.builder() .addContactPoints(getContactPoints().get(0)) .withPort(ccm().getBinaryPort()) .withReconnectionPolicy(new ConstantReconnectionPolicy(reconnectionDelayMillis)) .build()); cluster.connect(); assertThat(cluster).usesControlHost(1); // Stop all nodes. We won't get notifications anymore, so the only mechanism to // reconnect is the background reconnection attempts. ccm().stop(2); ccm().stop(1); ccm().waitForDown(2); ccm().start(2); ccm().waitForUp(2); assertThat(cluster).host(2).comesUpWithin(Cluster.NEW_NODE_DELAY_SECONDS * 2, SECONDS); // Give the control connection a few moments to reconnect TimeUnit.MILLISECONDS.sleep(reconnectionDelayMillis * 2); assertThat(cluster).usesControlHost(2); } @CCMConfig( dirtiesContext = true, config = "authenticator:PasswordAuthenticator", jvmArgs = "-Dcassandra.superuser_setup_delay_ms=0", createCluster = false) @Test(groups = "long") public void should_keep_reconnecting_on_authentication_error() throws InterruptedException { // For C* 1.2, sleep before attempting to connect as there is a small delay between // user being created. if (ccm().getCassandraVersion().getMajor() < 2) { Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); } CountingReconnectionPolicy reconnectionPolicy = new CountingReconnectionPolicy(new ConstantReconnectionPolicy(reconnectionDelayMillis)); CountingAuthProvider authProvider = new CountingAuthProvider("cassandra", "cassandra"); Cluster cluster = register(Cluster.builder() .addContactPoints(getContactPoints().get(0)) .withPort(ccm().getBinaryPort()) // Start with the correct auth so that we can initialize the server .withAuthProvider(authProvider) .withReconnectionPolicy(reconnectionPolicy) .build()); cluster.init(); assertThat(cluster).usesControlHost(1); // Stop the server, set wrong credentials and restart ccm().stop(1); ccm().waitForDown(1); authProvider.setPassword("wrongPassword"); ccm().start(1); ccm().waitForUp(1); // Wait a few iterations to ensure that our authProvider has returned the wrong credentials at least twice // NB: authentication errors show up in the logs int initialCount = authProvider.count.get(); long initialMetricCount = cluster.getMetrics().getErrorMetrics().getAuthenticationErrors().getCount(); int iterations = 0, maxIterations = 12; // make sure we don't wait indefinitely do { iterations += 1; TimeUnit.SECONDS.sleep(5); } while (iterations < maxIterations && authProvider.count.get() <= initialCount + 1); assertThat(iterations).isLessThan(maxIterations); // Number of authentication errors should have increased. assertThat(cluster.getMetrics().getErrorMetrics().getAuthenticationErrors().getCount()) .isGreaterThan(initialMetricCount); // Fix the credentials authProvider.setPassword("cassandra"); // The driver should eventually reconnect to the node assertThat(cluster).host(1).comesUpWithin(Cluster.NEW_NODE_DELAY_SECONDS * 2, SECONDS); } @CCMConfig(dirtiesContext = true, numberOfNodes = 2, createCluster = false) @Test(groups = "long") public void should_cancel_reconnection_attempts() throws InterruptedException { // Stop a node and cancel the reconnection attempts to it CountingReconnectionPolicy reconnectionPolicy = new CountingReconnectionPolicy(new ConstantReconnectionPolicy(reconnectionDelayMillis)); Cluster cluster = register(Cluster.builder() .addContactPoints(getContactPoints().get(0)) .withPort(ccm().getBinaryPort()) .withReconnectionPolicy(reconnectionPolicy).build()); cluster.connect(); // Stop a node and cancel the reconnection attempts to it ccm().stop(2); Host host2 = TestUtils.findHost(cluster, 2); host2.getReconnectionAttemptFuture().cancel(false); // The reconnection count should not vary over time anymore int initialCount = reconnectionPolicy.count.get(); TimeUnit.MILLISECONDS.sleep(reconnectionDelayMillis * 2); assertThat(reconnectionPolicy.count.get()).isEqualTo(initialCount); // Restart the node, which will trigger an UP notification ccm().start(2); ccm().waitForUp(2); // The driver should now see the node as UP again assertThat(cluster).host(2).comesUpWithin(Cluster.NEW_NODE_DELAY_SECONDS * 2, SECONDS); } @CCMConfig(dirtiesContext = true, createCluster = false) @Test(groups = "long") public void should_trigger_one_time_reconnect() throws InterruptedException, IOException { TogglabePolicy loadBalancingPolicy = new TogglabePolicy(new RoundRobinPolicy()); Cluster cluster = register(Cluster.builder() .addContactPointsWithPorts(ccm().addressOfNode(1)) .withPort(ccm().getBinaryPort()) .withLoadBalancingPolicy(loadBalancingPolicy) .withReconnectionPolicy(new ConstantReconnectionPolicy(reconnectionDelayMillis)) .build()); cluster.connect(); // Tweak the LBP so that the control connection never reconnects, otherwise // it would interfere with the rest of the test (this is a bit of a hack) loadBalancingPolicy.returnEmptyQueryPlan = true; // Stop the node, ignore it and cancel reconnection attempts to it ccm().stop(1); ccm().waitForDown(1); assertThat(cluster).host(1).goesDownWithin(20, SECONDS); Host host1 = TestUtils.findHost(cluster, 1); loadBalancingPolicy.setDistance(TestUtils.findHost(cluster, 1), HostDistance.IGNORED); ListenableFuture<?> reconnectionAttemptFuture = host1.getReconnectionAttemptFuture(); if (reconnectionAttemptFuture != null) reconnectionAttemptFuture.cancel(false); // Trigger a one-time reconnection attempt (this will fail) host1.tryReconnectOnce(); // Wait for a few reconnection cycles before checking TimeUnit.MILLISECONDS.sleep(reconnectionDelayMillis * 2); assertThat(cluster).host(1).hasState(State.DOWN); // Restart the node (this will not trigger an UP notification thanks to our // hack to disable the control connection reconnects). The host should stay // down for the driver. ccm().start(1); ccm().waitForUp(1); assertThat(cluster).host(1).hasState(State.DOWN); TimeUnit.SECONDS.sleep(Cluster.NEW_NODE_DELAY_SECONDS); assertThat(cluster).host(1).hasState(State.DOWN); // Trigger another one-time reconnection attempt (this will succeed). The // host should be back up. host1.tryReconnectOnce(); assertThat(cluster).host(1).comesUpWithin(Cluster.NEW_NODE_DELAY_SECONDS * 2, SECONDS); } /** * The connection established by a successful reconnection attempt should be reused in one of the * connection pools (JAVA-505). */ @CCMConfig(dirtiesContext = true, createCluster = false) @Test(groups = "long") public void should_use_connection_from_reconnection_in_pool() { TogglabePolicy loadBalancingPolicy = new TogglabePolicy(new RoundRobinPolicy()); // Spy SocketOptions.getKeepAlive to count how many connections were instantiated. SocketOptions socketOptions = spy(new SocketOptions()); Cluster cluster = register(Cluster.builder() .addContactPoints(getContactPoints().get(0)) .withPort(ccm().getBinaryPort()) .withReconnectionPolicy(new ConstantReconnectionPolicy(5000)) .withLoadBalancingPolicy(loadBalancingPolicy) .withSocketOptions(socketOptions) .withProtocolVersion(ccm().getProtocolVersion()) .build()); // Create two sessions to have multiple pools cluster.connect(); cluster.connect(); int corePoolSize = TestUtils.numberOfLocalCoreConnections(cluster); // Right after init, 1 connection has been opened by the control connection, and the core size for each pool. verify(socketOptions, times(1 + corePoolSize * 2)).getKeepAlive(); // Tweak the LBP so that the control connection never reconnects. This makes it easier // to reason about the number of connection attempts. loadBalancingPolicy.returnEmptyQueryPlan = true; // Stop the node and cancel the reconnection attempts to it ccm().stop(1); ccm().waitForDown(1); assertThat(cluster).host(1).goesDownWithin(20, SECONDS); Host host1 = TestUtils.findHost(cluster, 1); host1.getReconnectionAttemptFuture().cancel(false); ccm().start(1); ccm().waitForUp(1); // Reset the spy and count the number of connections attempts for 1 reconnect reset(socketOptions); host1.tryReconnectOnce(); assertThat(cluster).host(1).comesUpWithin(Cluster.NEW_NODE_DELAY_SECONDS * 2, SECONDS); // Expect 1 connection from the reconnection attempt 3 for the pools (we need 4 // but the one from the reconnection attempt gets reused). verify(socketOptions, times(corePoolSize * 2)).getKeepAlive(); } /** * Extends the plain text auth provider to track how many times the credentials have been requested */ static class CountingAuthProvider extends PlainTextAuthProvider { final AtomicInteger count = new AtomicInteger(); CountingAuthProvider(String username, String password) { super(username, password); } @Override public Authenticator newAuthenticator(InetSocketAddress host, String authenticator) { count.incrementAndGet(); return super.newAuthenticator(host, authenticator); } } /** * A load balancing policy that: * - can be "disabled" by having its query plan return no hosts. * - can be instructed to return a specific distance for some hosts. */ public static class TogglabePolicy extends DelegatingLoadBalancingPolicy { volatile boolean returnEmptyQueryPlan; final ConcurrentMap<Host, HostDistance> distances = new ConcurrentHashMap<Host, HostDistance>(); public TogglabePolicy(LoadBalancingPolicy delegate) { super(delegate); } @Override public HostDistance distance(Host host) { HostDistance distance = distances.get(host); return (distance != null) ? distance : super.distance(host); } public void setDistance(Host host, HostDistance distance) { distances.put(host, distance); } @Override public Iterator<Host> newQueryPlan(String loggedKeyspace, Statement statement) { if (returnEmptyQueryPlan) return Collections.<Host>emptyList().iterator(); else return super.newQueryPlan(loggedKeyspace, statement); } } }