/* * Copyright (C) 2012-2015 DataStax Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datastax.driver.core.policies; import com.datastax.driver.core.*; import com.datastax.driver.core.exceptions.*; import org.scassandra.http.client.Result; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import static com.datastax.driver.core.TestUtils.nonQuietClusterCloseOptions; import static java.util.concurrent.TimeUnit.SECONDS; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.scassandra.http.client.PrimingRequest.queryBuilder; import static org.scassandra.http.client.PrimingRequest.then; import static org.scassandra.http.client.Result.*; public class ErrorAwarePolicyIntegrationTest { private QueryTracker queryTracker; private Clock clock; private ScassandraCluster sCluster; private AtomicInteger errorCounter; private LatencyTracker latencyTracker; @BeforeMethod(groups = "short") public void setUp() { queryTracker = new QueryTracker(); clock = mock(Clock.class); sCluster = ScassandraCluster.builder().withNodes(2).build(); sCluster.init(); errorCounter = new AtomicInteger(0); latencyTracker = new LatencyTracker() { @Override public void update(Host host, Statement statement, Exception exception, long newLatencyNanos) { if (exception != null) errorCounter.incrementAndGet(); } @Override public void onRegister(Cluster cluster) { } @Override public void onUnregister(Cluster cluster) { } }; // By default node 1 should always fail, 2 succeed. prime(1, unauthorized); prime(2, success); } @AfterMethod(groups = "short") public void tearDown() { sCluster.stop(); } private Cluster.Builder builder(LoadBalancingPolicy lbp) { return Cluster.builder() .withNettyOptions(nonQuietClusterCloseOptions) .addContactPoints(sCluster.address(1).getAddress()) .withPort(sCluster.getBinaryPort()) .withLoadBalancingPolicy(lbp); } private void prime(int node, Result result) { sCluster.node(node).primingClient().prime( queryBuilder() .withQuery(QueryTracker.QUERY) .withThen(then().withResult(result)) .build() ); } /** * Checks that {@link LatencyTracker#update} was called at least expectedCount times within 5 seconds. * <p/> * Note that the usefulness of this is dependent on the {@link ErrorAwarePolicy.PerHostErrorTracker} being invoked * before the latency tracker being used to track error invocations in this test. The existing implementation * seems to invoke update on latency trackers in order that they are registered. * * @param expectedCount Expected number of errors to have been invoked. */ private void awaitTrackerUpdate(final int expectedCount) { ConditionChecker.check() .every(10) .that(new Callable<Boolean>() { @Override public Boolean call() throws Exception { return errorCounter.get() >= expectedCount; } }) .before(5000) .becomesTrue(); } private void setTime(long time, TimeUnit timeUnit) { when(clock.nanoTime()).thenReturn(TimeUnit.NANOSECONDS.convert(time, timeUnit)); } /** * Validates that {@link ErrorAwarePolicy} properly excludes a host after the maximum number of errors is exceeded. * <p/> * This test configures a maximum of 1 error per minute and executes 2 failing queries against host1 during the * first 5 simulated seconds. host1 should be excluded as soon as the rolling count updates over the next 5-second * interval. * <p/> * It then makes another query and ensures it is executed against host2 and that the response was successful. * * @jira_ticket JAVA-1055 * @test_category load_balancing:error_aware * @since 3.1.0 */ @Test(groups = "short") public void should_exclude_host_after_reaching_maximum_errors() throws InterruptedException { LoadBalancingPolicy lbp = ErrorAwarePolicy.builder(new SortingLoadBalancingPolicy()) .withMaxErrorsPerMinute(1) .withClock(clock) .build(); Cluster cluster = builder(lbp).build(); try { Session session = cluster.connect(); cluster.register(latencyTracker); setTime(0, SECONDS); // Make 2 queries producing a count higher than the threshold queryTracker.query(session, 2, UnauthorizedException.class, sCluster.address(1)); awaitTrackerUpdate(2); // Advance time so that RollingCount ticks and updates its count. setTime(5, SECONDS); // The next query should succeed and hit node 2 since node 1 is now ignored. queryTracker.query(session, 1, sCluster.address(2)); } finally { cluster.close(); } } /** * Validates that {@link ErrorAwarePolicy} will include a previously excluded host after the configured retry * period has elapsed. * <p/> * The test executes queries with error to get host1 excluded. It then executes queries over 70 simulated seconds * and then executes another query after this time has elapsed and ensures that the next query execution uses host1. * * @jira_ticket JAVA-1055 * @test_category load_balancing:error_aware * @since 3.1.0 */ @Test(groups = "short") public void should_resurrect_host_after_retry_period() throws InterruptedException { LoadBalancingPolicy lbp = ErrorAwarePolicy.builder(new SortingLoadBalancingPolicy()) .withMaxErrorsPerMinute(1) .withRetryPeriod(70, SECONDS) .withClock(clock) .build(); Cluster cluster = builder(lbp).build(); try { Session session = cluster.connect(); cluster.register(latencyTracker); setTime(0, SECONDS); // Make 2 queries producing a count higher than the threshold queryTracker.query(session, 2, UnauthorizedException.class, sCluster.address(1)); awaitTrackerUpdate(2); // Advance time so that RollingCount ticks and updates its count. setTime(5, SECONDS); // Execute some queries, these should all succeed and hit host2 since host1 is excluded. queryTracker.query(session, 5, sCluster.address(2)); // Advance time after the retry period setTime(75, SECONDS); // At this the load balancing policy should resurrect node 1 which will be used and fail. queryTracker.query(session, 1, UnauthorizedException.class, sCluster.address(1)); } finally { cluster.close(); } } /** * Validates that {@link ErrorAwarePolicy} will not penalize errors that are not considered in the default * {@link ErrorAwarePolicy.ErrorFilter} implementation. * <p/> * Executes 10 queries with each error type and ensures that host1 is used each time, verifying that it was * never excluded. * * @jira_ticket JAVA-1055 * @test_category load_balancing:error_aware * @since 3.1.0 */ @Test(groups = "short") public void should_not_penalize_default_ignored_exceptions() throws InterruptedException { LoadBalancingPolicy lbp = ErrorAwarePolicy.builder(new SortingLoadBalancingPolicy()) .withMaxErrorsPerMinute(1) .withClock(clock) .build(); // Use fall through retry policy so other hosts aren't tried. Cluster cluster = builder(lbp).withRetryPolicy(FallthroughRetryPolicy.INSTANCE).build(); try { Session session = cluster.connect(); cluster.register(latencyTracker); setTime(0, SECONDS); // TODO: Add Read and Write Failure, FunctionExecution exception when Scassandra supports v4. prime(1, read_request_timeout); queryTracker.query(session, 10, ReadTimeoutException.class, sCluster.address(1)); awaitTrackerUpdate(10); setTime(5, SECONDS); prime(1, write_request_timeout); queryTracker.query(session, 10, WriteTimeoutException.class, sCluster.address(1)); awaitTrackerUpdate(20); setTime(10, SECONDS); prime(1, unavailable); queryTracker.query(session, 10, UnavailableException.class, sCluster.address(1)); awaitTrackerUpdate(30); setTime(15, SECONDS); prime(1, already_exists); queryTracker.query(session, 10, AlreadyExistsException.class, sCluster.address(1)); awaitTrackerUpdate(40); setTime(20, SECONDS); prime(1, invalid); queryTracker.query(session, 10, InvalidQueryException.class, sCluster.address(1)); awaitTrackerUpdate(50); setTime(25, SECONDS); prime(1, syntax_error); queryTracker.query(session, 10, SyntaxError.class, sCluster.address(1)); awaitTrackerUpdate(60); setTime(30, SECONDS); // ensure host1 still used after another tick. queryTracker.query(session, 10, SyntaxError.class, sCluster.address(1)); } finally { cluster.close(); } } /** * Validates that {@link ErrorAwarePolicy} will regard a custom {@link ErrorAwarePolicy.ErrorFilter} by only * penalizing a node when it produces exceptions that evaluate to true in the filter implementation. * <p/> * It first executes 10 queries with an error type that is not considered and verify that host1 is never excluded. * <p/> * It then executes queries with an error type that is considered and verifies that host1 is then excluded and host2 * is used instead. * * @jira_ticket JAVA-1055 * @test_category load_balancing:error_aware * @since 3.1.0 */ @Test(groups = "short") public void should_only_consider_exceptions_based_on_errors_filter() throws InterruptedException { ErrorAwarePolicy.ErrorFilter iqeOnlyFilter = new ErrorAwarePolicy.ErrorFilter() { @Override public boolean shouldConsiderError(Exception e, Host host, Statement statement) { return e.getClass().isAssignableFrom(InvalidQueryException.class); } }; LoadBalancingPolicy lbp = ErrorAwarePolicy.builder(new SortingLoadBalancingPolicy()) .withMaxErrorsPerMinute(1) .withClock(clock) .withErrorsFilter(iqeOnlyFilter) .build(); // Use fall through retry policy so other hosts aren't tried. Cluster cluster = builder(lbp).withRetryPolicy(FallthroughRetryPolicy.INSTANCE).build(); try { Session session = cluster.connect(); cluster.register(latencyTracker); setTime(0, SECONDS); // UnauthorizedException evaluates to false in the filter, so it should not be considered. prime(1, unauthorized); queryTracker.query(session, 10, UnauthorizedException.class, sCluster.address(1)); awaitTrackerUpdate(10); setTime(5, SECONDS); // should still query host1 queryTracker.query(session, 1, UnauthorizedException.class, sCluster.address(1)); // InvalidQueryException evaluates to true, so it *should* be considered increment the count prime(1, invalid); queryTracker.query(session, 2, InvalidQueryException.class, sCluster.address(1)); awaitTrackerUpdate(13); // Advance time so that the rolling count updates the next time we query. // The first errors that were considered were at t = 5 seconds and this is when the rolling count was // initialized, so we want to be at t > 5 + 5 for the rolling count to update setTime(10, SECONDS); // The next query should succeed and hit node 2 since node 1 is now ignored. queryTracker.query(session, 1, sCluster.address(2)); } finally { cluster.close(); } } /** * Validates that an {@link ErrorAwarePolicy} configured with its defaults behaves as documented, that being that * the maximum number of errors is 1 and the retry period is 120 seconds. * * @jira_ticket JAVA-1055 * @test_category load_balancing:error_aware * @since 3.1.0 */ @Test(groups = "short") public void should_regard_defaults() throws InterruptedException { LoadBalancingPolicy lbp = ErrorAwarePolicy.builder(new SortingLoadBalancingPolicy()) .withClock(clock) .build(); Cluster cluster = builder(lbp).build(); try { Session session = cluster.connect(); cluster.register(latencyTracker); setTime(0, SECONDS); // Make 2 queries producing a count higher than the threshold queryTracker.query(session, 2, UnauthorizedException.class, sCluster.address(1)); awaitTrackerUpdate(2); // Advance time so the rolling count ticks, next query should go to host 2 setTime(5, SECONDS); queryTracker.query(session, 5, sCluster.address(2)); // Advance clock 30 seconds, this is within the retry period so host 1 should still be excluded. setTime(35, SECONDS); queryTracker.query(session, 5, sCluster.address(2)); // At this point 120 seconds have elapsed, the load balancing policy should see that the retry period has // elapsed and resurrect node 1 which will be used and fail. setTime(125, SECONDS); queryTracker.query(session, 1, UnauthorizedException.class, sCluster.address(1)); } finally { cluster.close(); } } }