/* * Copyright (C) 2012-2015 DataStax Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datastax.driver.core; import com.datastax.driver.core.exceptions.OperationTimedOutException; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.annotations.Test; import java.util.List; import java.util.Random; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import static com.datastax.driver.core.CreateCCM.TestMode.PER_METHOD; import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.fail; @CreateCCM(PER_METHOD) public class ReusedStreamIdTest extends CCMTestsSupport { static Logger logger = LoggerFactory.getLogger(ReusedStreamIdTest.class); @SuppressWarnings("unused") public Cluster.Builder createClusterLowReadTimeout() { // Low Read Timeout to trigger retry behavior. return Cluster.builder().withSocketOptions(new SocketOptions().setReadTimeoutMillis(1000)); } /** * Ensures that if activity tied to future completion blocks netty io threads that this does not cause the * driver to possibly invoke the wrong handler for a response, as described in JAVA-1179. * <p/> * This is accomplished by setting up a 2 node cluster and setting a low read timeout (1 second). The test * submits 10 concurrent requests repeatedly for up to 500 queries and on completion of each request may block * in a callback randomly (25% of the time) for 1 second, causing a retry on the next host to trigger. If a new * stream id is not allocated on the retry, its possible it could use an already used stream id and cause the driver * to invoke the wrong handlers for a response. The test checks for this by ensuring that the column returned in a * response matches the one queried. If the column received does not match, the test fails. In cases where this * bug is present, it should be detected within 10 seconds. * * @jira_ticket JAVA-1179 * @test_category queries:async_callback */ @Test(groups = "long") @CCMConfig(numberOfNodes = 2, clusterProvider = "createClusterLowReadTimeout") public void should_not_receive_wrong_response_when_callbacks_block_io_thread() { int concurrency = 10; final Semaphore semaphore = new Semaphore(concurrency); // RNG to determine sleep times. final Random random = new Random(); try { // Use the system.local table and alternate between columns that are queried. List<ColumnMetadata> columnsToGrab = cluster().getMetadata().getKeyspace("system").getTable("local").getColumns(); assertThat(columnsToGrab.size()).isGreaterThan(1); final CountDownLatch errorTrigger = new CountDownLatch(1); long start = System.currentTimeMillis(); // 500 iterations will take roughly 1 minute. int iterations = 500; final AtomicInteger completed = new AtomicInteger(0); for (int i = 1; i <= iterations; i++) { try { if (errorTrigger.getCount() == 0) { fail(String.format("Error triggered at or before %d of %d requests after %dms.", i, iterations, System.currentTimeMillis() - start)); } semaphore.acquire(); final String column = columnsToGrab.get(i % columnsToGrab.size()).getName(); String query = String.format("select %s from system.local", column); ResultSetFuture future = session().executeAsync(query); Futures.addCallback(future, new FutureCallback<ResultSet>() { @Override public void onSuccess(ResultSet result) { semaphore.release(); // Expect the column that you queried to be present, if its not we got the wrong response // back. int columnIndex = result.getColumnDefinitions().getIndexOf(column); if (columnIndex == -1) { logger.error("Got response without column {}, got columns {} from Host {}.", column, result.getColumnDefinitions(), result.getExecutionInfo().getQueriedHost()); errorTrigger.countDown(); return; } completed.incrementAndGet(); // Block netty io thread 25% of the time. int num = random.nextInt(1); if (num == 0) { // Sleep exactly one second, should trigger retry. Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); } } @Override public void onFailure(Throwable t) { semaphore.release(); // Timeouts are inevitable because of low query timeouts and blocked threads. if (!(t instanceof OperationTimedOutException)) { logger.error("Unexpected error encountered.", t); errorTrigger.countDown(); } } }); } catch (InterruptedException e) { fail("Test interrupted", e); } if (i % (iterations / 10) == 0) { logger.info("Submitted {} of {} requests. ({} completed successfully)", i, iterations, completed.get()); } } // Wait for 10 seconds for any remaining requests to possibly trigger an error, its likely // that if we get to this point this will not happen. Uninterruptibles.awaitUninterruptibly(errorTrigger, 10, TimeUnit.SECONDS); if (errorTrigger.getCount() == 0) { fail(String.format("Error triggered after %dms.", System.currentTimeMillis() - start)); } // Sanity check to ensure that at least some requests succeeded, we expect some failures if both // hosts timeout as its likely they could be blocked on the event loop. assertThat(completed.get()).isGreaterThan(0); } finally { try { // Acquire all permits to make sure all inflight requests complete. if (!semaphore.tryAcquire(concurrency, 10, TimeUnit.SECONDS)) { fail("Could not acquire all permits within 10 seconds of completion."); } } catch (InterruptedException e) { fail("Interrupted.", e); } } } }