/**
* Copyright 2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.router;
import com.github.ambry.clustermap.DataNodeId;
import com.github.ambry.clustermap.MockClusterMap;
import com.github.ambry.clustermap.ReplicaId;
import com.github.ambry.commons.ByteBufferReadableStreamChannel;
import com.github.ambry.commons.LoggingNotificationSystem;
import com.github.ambry.commons.ResponseHandler;
import com.github.ambry.commons.ServerErrorCode;
import com.github.ambry.config.RouterConfig;
import com.github.ambry.config.VerifiableProperties;
import com.github.ambry.messageformat.BlobProperties;
import com.github.ambry.network.NetworkClient;
import com.github.ambry.network.NetworkClientErrorCode;
import com.github.ambry.network.RequestInfo;
import com.github.ambry.network.ResponseInfo;
import com.github.ambry.utils.MockTime;
import com.github.ambry.utils.SystemTime;
import com.github.ambry.utils.TestUtils;
import com.github.ambry.utils.Utils;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
/**
* Class to test the {@link NonBlockingRouter}
*/
public class NonBlockingRouterTest {
private static final int MAX_PORTS_PLAIN_TEXT = 3;
private static final int MAX_PORTS_SSL = 3;
private static final int CHECKOUT_TIMEOUT_MS = 1000;
private static final int REQUEST_TIMEOUT_MS = 1000;
private static final int PUT_REQUEST_PARALLELISM = 3;
private static final int PUT_SUCCESS_TARGET = 2;
private static final int GET_REQUEST_PARALLELISM = 2;
private static final int GET_SUCCESS_TARGET = 1;
private static final int DELETE_REQUEST_PARALLELISM = 3;
private static final int DELETE_SUCCESS_TARGET = 2;
private static final int AWAIT_TIMEOUT_MS = 2000;
private static final int PUT_CONTENT_SIZE = 1000;
private int maxPutChunkSize = PUT_CONTENT_SIZE;
private final Random random = new Random();
private NonBlockingRouter router;
private PutManager putManager;
private GetManager getManager;
private DeleteManager deleteManager;
private AtomicReference<MockSelectorState> mockSelectorState = new AtomicReference<MockSelectorState>();
private final MockTime mockTime;
private final MockClusterMap mockClusterMap;
// Request params;
BlobProperties putBlobProperties;
byte[] putUserMetadata;
byte[] putContent;
ReadableStreamChannel putChannel;
/**
* Initialize parameters common to all tests.
* @throws Exception
*/
public NonBlockingRouterTest() throws Exception {
mockTime = new MockTime();
mockClusterMap = new MockClusterMap();
NonBlockingRouter.currentOperationsCount.set(0);
}
@After
public void after() {
Assert.assertEquals(0, NonBlockingRouter.currentOperationsCount.get());
}
/**
* Constructs and returns a VerifiableProperties instance with the defaults required for instantiating
* the {@link NonBlockingRouter}.
* @return the created VerifiableProperties instance.
*/
private Properties getNonBlockingRouterProperties(String routerDataCenter) {
Properties properties = new Properties();
properties.setProperty("router.hostname", "localhost");
properties.setProperty("router.datacenter.name", routerDataCenter);
properties.setProperty("router.put.request.parallelism", Integer.toString(PUT_REQUEST_PARALLELISM));
properties.setProperty("router.put.success.target", Integer.toString(PUT_SUCCESS_TARGET));
properties.setProperty("router.max.put.chunk.size.bytes", Integer.toString(maxPutChunkSize));
properties.setProperty("router.get.request.parallelism", Integer.toString(GET_REQUEST_PARALLELISM));
properties.setProperty("router.get.success.target", Integer.toString(GET_SUCCESS_TARGET));
properties.setProperty("router.delete.request.parallelism", Integer.toString(DELETE_REQUEST_PARALLELISM));
properties.setProperty("router.delete.success.target", Integer.toString(DELETE_SUCCESS_TARGET));
properties.setProperty("router.connection.checkout.timeout.ms", Integer.toString(CHECKOUT_TIMEOUT_MS));
properties.setProperty("router.request.timeout.ms", Integer.toString(REQUEST_TIMEOUT_MS));
properties.setProperty("clustermap.cluster.name", "test");
properties.setProperty("clustermap.datacenter.name", "dc1");
properties.setProperty("clustermap.host.name", "localhost");
return properties;
}
/**
* Construct {@link Properties} and {@link MockServerLayout} and initialize and set the
* router with them.
*/
private void setRouter() throws IOException {
setRouter(getNonBlockingRouterProperties("DC1"), new MockServerLayout(mockClusterMap));
}
/**
* Initialize and set the router with the given {@link Properties} and {@link MockServerLayout}
* @param props the {@link Properties}
* @param mockServerLayout the {@link MockServerLayout}
*/
private void setRouter(Properties props, MockServerLayout mockServerLayout) throws IOException {
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
router = new NonBlockingRouter(new RouterConfig(verifiableProperties), new NonBlockingRouterMetrics(mockClusterMap),
new MockNetworkClientFactory(verifiableProperties, null, MAX_PORTS_PLAIN_TEXT, MAX_PORTS_SSL,
CHECKOUT_TIMEOUT_MS, mockServerLayout, mockTime), new LoggingNotificationSystem(), mockClusterMap,
mockTime);
}
private void setOperationParams() {
putBlobProperties = new BlobProperties(-1, "serviceId", "memberId", "contentType", false, Utils.Infinite_Time);
putUserMetadata = new byte[10];
random.nextBytes(putUserMetadata);
putContent = new byte[PUT_CONTENT_SIZE];
random.nextBytes(putContent);
putChannel = new ByteBufferReadableStreamChannel(ByteBuffer.wrap(putContent));
}
/**
* Test the {@link NonBlockingRouterFactory}
*/
@Test
public void testNonBlockingRouterFactory() throws Exception {
Properties props = getNonBlockingRouterProperties("NotInClusterMap");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
try {
router = (NonBlockingRouter) new NonBlockingRouterFactory(verifiableProperties, mockClusterMap,
new LoggingNotificationSystem(), null).getRouter();
Assert.fail("NonBlockingRouterFactory instantiation should have failed because the router datacenter is not in "
+ "the cluster map");
} catch (IllegalStateException e) {
}
props = getNonBlockingRouterProperties("DC1");
verifiableProperties = new VerifiableProperties((props));
router = (NonBlockingRouter) new NonBlockingRouterFactory(verifiableProperties, mockClusterMap,
new LoggingNotificationSystem(), null).getRouter();
assertExpectedThreadCounts(2, 1);
router.close();
assertExpectedThreadCounts(0, 0);
}
/**
* Test Router with a single scaling unit.
*/
@Test
public void testRouterBasic() throws Exception {
setRouter();
assertExpectedThreadCounts(2, 1);
setOperationParams();
// More extensive test for puts present elsewhere - these statements are here just to exercise the flow within the
// NonBlockingRouter class, and to ensure that operations submitted to a router eventually completes.
String blobId = router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
router.getBlob(blobId, new GetBlobOptionsBuilder().build()).get();
router.getBlob(blobId, new GetBlobOptionsBuilder().operationType(GetBlobOptions.OperationType.BlobInfo).build())
.get();
router.deleteBlob(blobId, null).get();
router.close();
assertExpectedThreadCounts(0, 0);
//submission after closing should return a future that is already done.
assertClosed();
}
/**
* Test behavior with various null inputs to router methods.
* @throws Exception
*/
@Test
public void testNullArguments() throws Exception {
setRouter();
assertExpectedThreadCounts(2, 1);
setOperationParams();
try {
router.getBlob(null, new GetBlobOptionsBuilder().build());
Assert.fail("null blobId should have resulted in IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
try {
router.getBlob("", null);
Assert.fail("null options should have resulted in IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
try {
router.putBlob(putBlobProperties, putUserMetadata, null);
Assert.fail("null channel should have resulted in IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
try {
router.putBlob(null, putUserMetadata, putChannel);
Assert.fail("null blobProperties should have resulted in IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
try {
router.deleteBlob(null, null);
Assert.fail("null blobId should have resulted in IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
// null user metadata should work.
router.putBlob(putBlobProperties, null, putChannel).get();
router.close();
assertExpectedThreadCounts(0, 0);
//submission after closing should return a future that is already done.
assertClosed();
}
/**
* Test router put operation in a scenario where there are no partitions available.
*/
@Test
public void testRouterPartitionsUnavailable() throws Exception {
setRouter();
setOperationParams();
mockClusterMap.markAllPartitionsUnavailable();
try {
router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
Assert.fail("Put should have failed if there are no partitions");
} catch (Exception e) {
RouterException r = (RouterException) e.getCause();
Assert.assertEquals("Should have received AmbryUnavailable error", RouterErrorCode.AmbryUnavailable,
r.getErrorCode());
}
router.close();
assertExpectedThreadCounts(0, 0);
assertClosed();
}
/**
* Test router put operation in a scenario where there are partitions, but none in the local DC.
* This should not ideally happen unless there is a bad config, but the router should be resilient and
* just error out these operations.
*/
@Test
public void testRouterNoPartitionInLocalDC() throws Exception {
// set the local DC to invalid, so that for puts, no partitions are available locally.
Properties props = getNonBlockingRouterProperties("invalidDC");
setRouter(props, new MockServerLayout(mockClusterMap));
setOperationParams();
try {
router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
Assert.fail("Put should have failed if there are no partitions");
} catch (Exception e) {
RouterException r = (RouterException) e.getCause();
Assert.assertEquals(RouterErrorCode.UnexpectedInternalError, r.getErrorCode());
}
router.close();
assertExpectedThreadCounts(0, 0);
assertClosed();
}
/**
* Test RequestResponseHandler thread exit flow. If the RequestResponseHandlerThread exits on its own (due to a
* Throwable), then the router gets closed immediately along with the completion of all the operations.
*/
@Test
public void testRequestResponseHandlerThreadExitFlow() throws Exception {
Properties props = getNonBlockingRouterProperties("DC1");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
MockClusterMap mockClusterMap = new MockClusterMap();
MockTime mockTime = new MockTime();
router = new NonBlockingRouter(new RouterConfig(verifiableProperties), new NonBlockingRouterMetrics(mockClusterMap),
new MockNetworkClientFactory(verifiableProperties, mockSelectorState, MAX_PORTS_PLAIN_TEXT, MAX_PORTS_SSL,
CHECKOUT_TIMEOUT_MS, new MockServerLayout(mockClusterMap), mockTime), new LoggingNotificationSystem(),
mockClusterMap, mockTime);
assertExpectedThreadCounts(2, 1);
setOperationParams();
mockSelectorState.set(MockSelectorState.ThrowExceptionOnAllPoll);
Future future = router.putBlob(putBlobProperties, putUserMetadata, putChannel);
try {
while (!future.isDone()) {
mockTime.sleep(1000);
Thread.yield();
}
future.get();
Assert.fail("The operation should have failed");
} catch (ExecutionException e) {
Assert.assertEquals(RouterErrorCode.OperationTimedOut, ((RouterException) e.getCause()).getErrorCode());
}
setOperationParams();
mockSelectorState.set(MockSelectorState.ThrowThrowableOnSend);
future = router.putBlob(putBlobProperties, putUserMetadata, putChannel);
Thread requestResponseHandlerThread = TestUtils.getThreadByThisName("RequestResponseHandlerThread");
// If the thread is still running, wait until it dies
if (requestResponseHandlerThread != null) {
requestResponseHandlerThread.join();
}
try {
future.get();
Assert.fail("The operation should have failed");
} catch (ExecutionException e) {
Assert.assertEquals(RouterErrorCode.RouterClosed, ((RouterException) e.getCause()).getErrorCode());
}
assertClosed();
// Ensure that both operations failed and with the right exceptions.
Assert.assertEquals("No ChunkFiller Thread should be running after the router is closed", 0,
TestUtils.numThreadsByThisName("ChunkFillerThread"));
Assert.assertEquals("No RequestResponseHandler should be running after the router is closed", 0,
TestUtils.numThreadsByThisName("RequestResponseHandlerThread"));
Assert.assertEquals("All operations should have completed", 0, router.getOperationsCount());
}
/**
* Test that if a composite blob put fails, the successfully put data chunks are deleted.
*/
@Test
public void testUnsuccessfulPutDataChunkDelete() throws Exception {
// Ensure there are 4 chunks.
maxPutChunkSize = PUT_CONTENT_SIZE / 4;
Properties props = getNonBlockingRouterProperties("DC1");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
MockClusterMap mockClusterMap = new MockClusterMap();
MockTime mockTime = new MockTime();
MockServerLayout mockServerLayout = new MockServerLayout(mockClusterMap);
// Since this test wants to ensure that successfully put data chunks are deleted when the overall put operation
// fails, it uses a notification system to track the deletions.
final CountDownLatch deletesDoneLatch = new CountDownLatch(2);
final Map<String, String> blobsThatAreDeleted = new HashMap<>();
LoggingNotificationSystem deleteTrackingNotificationSystem = new LoggingNotificationSystem() {
@Override
public void onBlobDeleted(String blobId, String serviceId) {
blobsThatAreDeleted.put(blobId, serviceId);
deletesDoneLatch.countDown();
}
};
router = new NonBlockingRouter(new RouterConfig(verifiableProperties), new NonBlockingRouterMetrics(mockClusterMap),
new MockNetworkClientFactory(verifiableProperties, mockSelectorState, MAX_PORTS_PLAIN_TEXT, MAX_PORTS_SSL,
CHECKOUT_TIMEOUT_MS, mockServerLayout, mockTime), deleteTrackingNotificationSystem, mockClusterMap,
mockTime);
setOperationParams();
List<DataNodeId> dataNodeIds = mockClusterMap.getDataNodeIds();
List<ServerErrorCode> serverErrorList = new ArrayList<>();
// There are 4 chunks for this blob.
// All put operations make one request to each local server as there are 3 servers overall in the local DC.
// Set the state of the mock servers so that they return success for the first 2 requests in order to succeed
// the first two chunks.
serverErrorList.add(ServerErrorCode.No_Error);
serverErrorList.add(ServerErrorCode.No_Error);
// fail requests for third and fourth data chunks including the slipped put attempts:
serverErrorList.add(ServerErrorCode.Unknown_Error);
serverErrorList.add(ServerErrorCode.Unknown_Error);
serverErrorList.add(ServerErrorCode.Unknown_Error);
serverErrorList.add(ServerErrorCode.Unknown_Error);
// all subsequent requests (no more puts, but there will be deletes) will succeed.
for (DataNodeId dataNodeId : dataNodeIds) {
MockServer server = mockServerLayout.getMockServer(dataNodeId.getHostname(), dataNodeId.getPort());
server.setServerErrors(serverErrorList);
}
// Submit the put operation and wait for it to fail.
try {
router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
} catch (ExecutionException e) {
Assert.assertEquals(RouterErrorCode.AmbryUnavailable, ((RouterException) e.getCause()).getErrorCode());
}
// Now, wait until the deletes of the successfully put blobs are complete.
Assert.assertTrue("Deletes should not take longer than " + AWAIT_TIMEOUT_MS,
deletesDoneLatch.await(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS));
for (Map.Entry<String, String> blobIdAndServiceId : blobsThatAreDeleted.entrySet()) {
Assert.assertEquals("Unexpected service ID for deleted blob",
BackgroundDeleteRequest.SERVICE_ID_PREFIX + putBlobProperties.getServiceId(), blobIdAndServiceId.getValue());
}
router.close();
assertClosed();
Assert.assertEquals("All operations should have completed", 0, router.getOperationsCount());
}
/**
* Test that if a composite blob is deleted, the data chunks are eventually deleted. Also check the service IDs used
* for delete operations.
*/
@Test
public void testCompositeBlobDataChunksDelete() throws Exception {
// Ensure there are 4 chunks.
maxPutChunkSize = PUT_CONTENT_SIZE / 4;
Properties props = getNonBlockingRouterProperties("DC1");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
RouterConfig routerConfig = new RouterConfig(verifiableProperties);
MockClusterMap mockClusterMap = new MockClusterMap();
MockTime mockTime = new MockTime();
MockServerLayout mockServerLayout = new MockServerLayout(mockClusterMap);
// metadata blob + data chunks.
final AtomicReference<CountDownLatch> deletesDoneLatch = new AtomicReference<>();
final Map<String, String> blobsThatAreDeleted = new HashMap<>();
LoggingNotificationSystem deleteTrackingNotificationSystem = new LoggingNotificationSystem() {
@Override
public void onBlobDeleted(String blobId, String serviceId) {
blobsThatAreDeleted.put(blobId, serviceId);
deletesDoneLatch.get().countDown();
}
};
router = new NonBlockingRouter(routerConfig, new NonBlockingRouterMetrics(mockClusterMap),
new MockNetworkClientFactory(verifiableProperties, mockSelectorState, MAX_PORTS_PLAIN_TEXT, MAX_PORTS_SSL,
CHECKOUT_TIMEOUT_MS, mockServerLayout, mockTime), deleteTrackingNotificationSystem, mockClusterMap,
mockTime);
setOperationParams();
String blobId = router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
String deleteServiceId = "delete-service";
Set<String> blobsToBeDeleted = getBlobsInServers(mockServerLayout);
// The second iteration is to test the case where the blob was already deleted.
// The third iteration is to test the case where the blob has expired.
for (int i = 0; i < 3; i++) {
if (i == 2) {
// Create a clean cluster and put another blob that immediate expires.
setOperationParams();
putBlobProperties = new BlobProperties(-1, "serviceId", "memberId", "contentType", false, 0);
blobId = router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
Set<String> allBlobsInServer = getBlobsInServers(mockServerLayout);
allBlobsInServer.removeAll(blobsToBeDeleted);
blobsToBeDeleted = allBlobsInServer;
}
blobsThatAreDeleted.clear();
deletesDoneLatch.set(new CountDownLatch(5));
router.deleteBlob(blobId, deleteServiceId, null).get();
Assert.assertTrue("Deletes should not take longer than " + AWAIT_TIMEOUT_MS,
deletesDoneLatch.get().await(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS));
Assert.assertTrue("All blobs in server are deleted", blobsThatAreDeleted.keySet().containsAll(blobsToBeDeleted));
Assert.assertTrue("Only blobs in server are deleted", blobsToBeDeleted.containsAll(blobsThatAreDeleted.keySet()));
for (Map.Entry<String, String> blobIdAndServiceId : blobsThatAreDeleted.entrySet()) {
String expectedServiceId = blobIdAndServiceId.getKey().equals(blobId) ? deleteServiceId
: BackgroundDeleteRequest.SERVICE_ID_PREFIX + deleteServiceId;
Assert.assertEquals("Unexpected service ID for deleted blob", expectedServiceId, blobIdAndServiceId.getValue());
}
}
deletesDoneLatch.set(new CountDownLatch(5));
router.deleteBlob(blobId, null, null).get();
Assert.assertTrue("Deletes should not take longer than " + AWAIT_TIMEOUT_MS,
deletesDoneLatch.get().await(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS));
router.close();
assertClosed();
Assert.assertEquals("All operations should have completed", 0, router.getOperationsCount());
}
/**
* Return the blob ids of all the blobs in the servers in the cluster.
* @param mockServerLayout the {@link MockServerLayout} representing the cluster.
* @return a Set of blob id strings of the blobs in the servers in the cluster.
*/
private Set<String> getBlobsInServers(MockServerLayout mockServerLayout) {
Set<String> blobsInServers = new HashSet<>();
for (MockServer mockServer : mockServerLayout.getMockServers()) {
blobsInServers.addAll(mockServer.getBlobs().keySet());
}
return blobsInServers;
}
/**
* Test to ensure that for simple blob deletions, no additional background delete operations
* are initiated.
*/
@Test
public void testSimpleBlobDelete() throws Exception {
// Ensure there are 4 chunks.
maxPutChunkSize = PUT_CONTENT_SIZE;
Properties props = getNonBlockingRouterProperties("DC1");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
MockClusterMap mockClusterMap = new MockClusterMap();
MockTime mockTime = new MockTime();
MockServerLayout mockServerLayout = new MockServerLayout(mockClusterMap);
String deleteServiceId = "delete-service";
// metadata blob + data chunks.
final AtomicInteger deletesInitiated = new AtomicInteger();
final AtomicReference<String> receivedDeleteServiceId = new AtomicReference<>();
LoggingNotificationSystem deleteTrackingNotificationSystem = new LoggingNotificationSystem() {
@Override
public void onBlobDeleted(String blobId, String serviceId) {
deletesInitiated.incrementAndGet();
receivedDeleteServiceId.set(serviceId);
}
};
router = new NonBlockingRouter(new RouterConfig(verifiableProperties), new NonBlockingRouterMetrics(mockClusterMap),
new MockNetworkClientFactory(verifiableProperties, mockSelectorState, MAX_PORTS_PLAIN_TEXT, MAX_PORTS_SSL,
CHECKOUT_TIMEOUT_MS, mockServerLayout, mockTime), deleteTrackingNotificationSystem, mockClusterMap,
mockTime);
setOperationParams();
String blobId = router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
router.deleteBlob(blobId, deleteServiceId, null).get();
long waitStart = SystemTime.getInstance().milliseconds();
while (router.getBackgroundOperationsCount() != 0
&& SystemTime.getInstance().milliseconds() < waitStart + AWAIT_TIMEOUT_MS) {
Thread.sleep(1000);
}
Assert.assertEquals("All background operations should be complete ", 0, router.getBackgroundOperationsCount());
Assert.assertEquals("Only the original blob deletion should have been initiated", 1, deletesInitiated.get());
Assert.assertEquals("The delete service ID should match the expected value", deleteServiceId,
receivedDeleteServiceId.get());
router.close();
assertClosed();
Assert.assertEquals("All operations should have completed", 0, router.getOperationsCount());
}
/**
* Test that multiple scaling units can be instantiated, exercised and closed.
*/
@Test
public void testMultipleScalingUnit() throws Exception {
final int SCALING_UNITS = 3;
Properties props = getNonBlockingRouterProperties("DC1");
props.setProperty("router.scaling.unit.count", Integer.toString(SCALING_UNITS));
setRouter(props, new MockServerLayout(mockClusterMap));
assertExpectedThreadCounts(SCALING_UNITS + 1, SCALING_UNITS);
// Submit a few jobs so that all the scaling units get exercised.
for (int i = 0; i < SCALING_UNITS * 10; i++) {
setOperationParams();
router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
}
router.close();
assertExpectedThreadCounts(0, 0);
//submission after closing should return a future that is already done.
setOperationParams();
assertClosed();
}
/**
* Response handling related tests for all operation managers.
*/
@Test
public void testResponseHandling() throws Exception {
Properties props = getNonBlockingRouterProperties("DC1");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
setOperationParams();
final List<ReplicaId> failedReplicaIds = new ArrayList<>();
final AtomicInteger successfulResponseCount = new AtomicInteger(0);
final AtomicBoolean invalidResponse = new AtomicBoolean(false);
ResponseHandler mockResponseHandler = new ResponseHandler(mockClusterMap) {
@Override
public void onEvent(ReplicaId replicaId, Object e) {
if (e instanceof ServerErrorCode) {
if (e == ServerErrorCode.No_Error) {
successfulResponseCount.incrementAndGet();
} else {
invalidResponse.set(true);
}
} else {
failedReplicaIds.add(replicaId);
}
}
};
// Instantiate a router just to put a blob successfully.
MockServerLayout mockServerLayout = new MockServerLayout(mockClusterMap);
setRouter(props, mockServerLayout);
setOperationParams();
// More extensive test for puts present elsewhere - these statements are here just to exercise the flow within the
// NonBlockingRouter class, and to ensure that operations submitted to a router eventually completes.
String blobId = router.putBlob(putBlobProperties, putUserMetadata, putChannel).get();
router.close();
for (MockServer mockServer : mockServerLayout.getMockServers()) {
mockServer.setServerErrorForAllRequests(ServerErrorCode.No_Error);
}
NetworkClient networkClient =
new MockNetworkClientFactory(verifiableProperties, mockSelectorState, MAX_PORTS_PLAIN_TEXT, MAX_PORTS_SSL,
CHECKOUT_TIMEOUT_MS, mockServerLayout, mockTime).getNetworkClient();
putManager = new PutManager(mockClusterMap, mockResponseHandler, new LoggingNotificationSystem(),
new RouterConfig(verifiableProperties), new NonBlockingRouterMetrics(mockClusterMap),
new RouterCallback(networkClient, new ArrayList<BackgroundDeleteRequest>()), "0", mockTime);
OperationHelper opHelper = new OperationHelper(OperationType.PUT);
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, null, successfulResponseCount,
invalidResponse, -1);
// Test that if a failed response comes before the operation is completed, failure detector is notified.
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, null, successfulResponseCount,
invalidResponse, 0);
// Test that if a failed response comes after the operation is completed, failure detector is notified.
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, null, successfulResponseCount,
invalidResponse, PUT_REQUEST_PARALLELISM - 1);
testNoResponseNoNotification(opHelper, failedReplicaIds, null, successfulResponseCount, invalidResponse);
testResponseDeserializationError(opHelper, networkClient, null);
opHelper = new OperationHelper(OperationType.GET);
getManager = new GetManager(mockClusterMap, mockResponseHandler, new RouterConfig(verifiableProperties),
new NonBlockingRouterMetrics(mockClusterMap),
new RouterCallback(networkClient, new ArrayList<BackgroundDeleteRequest>()), mockTime);
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, blobId, successfulResponseCount,
invalidResponse, -1);
// Test that if a failed response comes before the operation is completed, failure detector is notified.
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, blobId, successfulResponseCount,
invalidResponse, 0);
// Test that if a failed response comes after the operation is completed, failure detector is notified.
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, blobId, successfulResponseCount,
invalidResponse, GET_REQUEST_PARALLELISM - 1);
testNoResponseNoNotification(opHelper, failedReplicaIds, blobId, successfulResponseCount, invalidResponse);
testResponseDeserializationError(opHelper, networkClient, blobId);
opHelper = new OperationHelper(OperationType.DELETE);
deleteManager = new DeleteManager(mockClusterMap, mockResponseHandler, new LoggingNotificationSystem(),
new RouterConfig(verifiableProperties), new NonBlockingRouterMetrics(mockClusterMap),
new RouterCallback(null, new ArrayList<BackgroundDeleteRequest>()), mockTime);
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, blobId, successfulResponseCount,
invalidResponse, -1);
// Test that if a failed response comes before the operation is completed, failure detector is notified.
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, blobId, successfulResponseCount,
invalidResponse, 0);
// Test that if a failed response comes after the operation is completed, failure detector is notified.
testFailureDetectorNotification(opHelper, networkClient, failedReplicaIds, blobId, successfulResponseCount,
invalidResponse, DELETE_REQUEST_PARALLELISM - 1);
testNoResponseNoNotification(opHelper, failedReplicaIds, blobId, successfulResponseCount, invalidResponse);
testResponseDeserializationError(opHelper, networkClient, blobId);
putManager.close();
getManager.close();
deleteManager.close();
}
/**
* Test that failure detector is correctly notified for all responses regardless of the order in which successful
* and failed responses arrive.
* @param opHelper the {@link OperationHelper}
* @param networkClient the {@link NetworkClient}
* @param failedReplicaIds the list that will contain all the replicas for which failure was notified.
* @param blobId the id of the blob to get/delete. For puts, this will be null.
* @param successfulResponseCount the AtomicInteger that will contain the count of replicas for which success was
* notified.
* @param invalidResponse the AtomicBoolean that will contain whether an unexpected failure was notified.
* @param indexToFail if greater than 0, the index representing which response for which failure is to be simulated.
* For example, if index is 0, then the first response will be failed.
* If the index is -1, no responses will be failed, and successful responses will be returned to
* the operation managers.
*/
private void testFailureDetectorNotification(OperationHelper opHelper, NetworkClient networkClient,
List<ReplicaId> failedReplicaIds, String blobId, AtomicInteger successfulResponseCount,
AtomicBoolean invalidResponse, int indexToFail) throws Exception {
failedReplicaIds.clear();
successfulResponseCount.set(0);
invalidResponse.set(false);
mockSelectorState.set(MockSelectorState.Good);
FutureResult futureResult = opHelper.submitOperation(blobId);
int requestParallelism = opHelper.requestParallelism;
List<RequestInfo> allRequests = new ArrayList<>();
long loopStartTimeMs = SystemTime.getInstance().milliseconds();
while (allRequests.size() < requestParallelism) {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for requests.");
}
opHelper.pollOpManager(allRequests);
}
ReplicaId replicaIdToFail =
indexToFail == -1 ? null : ((RouterRequestInfo) allRequests.get(indexToFail)).getReplicaId();
for (RequestInfo requestInfo : allRequests) {
ResponseInfo responseInfo;
if (replicaIdToFail != null && replicaIdToFail.equals(((RouterRequestInfo) requestInfo).getReplicaId())) {
responseInfo = new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null);
} else {
List<RequestInfo> requestInfoListToSend = new ArrayList<>();
requestInfoListToSend.add(requestInfo);
List<ResponseInfo> responseInfoList;
loopStartTimeMs = SystemTime.getInstance().milliseconds();
do {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for the response.");
}
responseInfoList = networkClient.sendAndPoll(requestInfoListToSend, 10);
requestInfoListToSend.clear();
} while (responseInfoList.size() == 0);
responseInfo = responseInfoList.get(0);
}
opHelper.handleResponse(responseInfo);
}
// Poll once again so that the operation gets a chance to complete.
allRequests.clear();
opHelper.pollOpManager(allRequests);
futureResult.get(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS);
if (indexToFail == -1) {
Assert.assertEquals("Successful notification should have arrived for replicas that were up",
opHelper.requestParallelism, successfulResponseCount.get());
Assert.assertEquals("Failure detector should not have been notified", 0, failedReplicaIds.size());
Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
} else {
Assert.assertEquals("Failure detector should have been notified", 1, failedReplicaIds.size());
Assert.assertEquals("Failed notification should have arrived for the failed replica", replicaIdToFail,
failedReplicaIds.get(0));
Assert.assertEquals("Successful notification should have arrived for replicas that were up",
opHelper.requestParallelism - 1, successfulResponseCount.get());
Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
}
}
/**
* Test that failure detector is not notified when the router times out requests.
* @param opHelper the {@link OperationHelper}
* @param failedReplicaIds the list that will contain all the replicas for which failure was notified.
* @param blobId the id of the blob to get/delete. For puts, this will be null.
* @param successfulResponseCount the AtomicInteger that will contain the count of replicas for which success was
* notified.
* @param invalidResponse the AtomicBoolean that will contain whether an unexpected failure was notified.
*/
private void testNoResponseNoNotification(OperationHelper opHelper, List<ReplicaId> failedReplicaIds, String blobId,
AtomicInteger successfulResponseCount, AtomicBoolean invalidResponse) throws Exception {
failedReplicaIds.clear();
successfulResponseCount.set(0);
invalidResponse.set(false);
FutureResult futureResult = opHelper.submitOperation(blobId);
List<RequestInfo> allRequests = new ArrayList<>();
long loopStartTimeMs = SystemTime.getInstance().milliseconds();
while (!futureResult.isDone()) {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for requests.");
}
opHelper.pollOpManager(allRequests);
mockTime.sleep(REQUEST_TIMEOUT_MS + 1);
}
Assert.assertEquals("Successful notification should not have arrived for replicas that were up", 0,
successfulResponseCount.get());
Assert.assertEquals("Failure detector should not have been notified", 0, failedReplicaIds.size());
Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
}
/**
* Test that operations succeed even in the presence of responses that are corrupt and fail to deserialize.
* @param opHelper the {@link OperationHelper}
* @param networkClient the {@link NetworkClient}
* @param blobId the id of the blob to get/delete. For puts, this will be null.
* @throws Exception
*/
private void testResponseDeserializationError(OperationHelper opHelper, NetworkClient networkClient, String blobId)
throws Exception {
mockSelectorState.set(MockSelectorState.Good);
FutureResult futureResult = opHelper.submitOperation(blobId);
int requestParallelism = opHelper.requestParallelism;
List<RequestInfo> allRequests = new ArrayList<>();
long loopStartTimeMs = SystemTime.getInstance().milliseconds();
while (allRequests.size() < requestParallelism) {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for requests.");
}
opHelper.pollOpManager(allRequests);
}
List<ResponseInfo> responseInfoList = new ArrayList<>();
loopStartTimeMs = SystemTime.getInstance().milliseconds();
do {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for the response.");
}
responseInfoList.addAll(networkClient.sendAndPoll(allRequests, 10));
allRequests.clear();
} while (responseInfoList.size() < requestParallelism);
// corrupt the first response.
ByteBuffer response = responseInfoList.get(0).getResponse();
byte b = response.get(response.limit() - 1);
response.put(response.limit() - 1, (byte) ~b);
for (ResponseInfo responseInfo : responseInfoList) {
opHelper.handleResponse(responseInfo);
}
allRequests.clear();
opHelper.pollOpManager(allRequests);
try {
futureResult.get(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
Assert.fail("Operation should have succeeded with one corrupt response");
}
}
/**
* Assert that the number of ChunkFiller and RequestResponseHandler threads running are as expected.
* @param expectedRequestResponseHandlerCount the expected number of ChunkFiller and RequestResponseHandler threads.
* @param expectedChunkFillerCount the expected number of ChunkFiller threads.
*/
private void assertExpectedThreadCounts(int expectedRequestResponseHandlerCount, int expectedChunkFillerCount) {
Assert.assertEquals("Number of RequestResponseHandler threads running should be as expected",
expectedRequestResponseHandlerCount, TestUtils.numThreadsByThisName("RequestResponseHandlerThread"));
Assert.assertEquals("Number of chunkFiller threads running should be as expected", expectedChunkFillerCount,
TestUtils.numThreadsByThisName("ChunkFillerThread"));
if (expectedRequestResponseHandlerCount == 0) {
Assert.assertFalse("Router should be closed if there are no worker threads running", router.isOpen());
Assert.assertEquals("All operations should have completed if the router is closed", 0,
router.getOperationsCount());
}
}
/**
* Assert that submission after closing the router returns a future that is already done and an appropriate
* exception.
*/
private void assertClosed() {
Future<String> future = router.putBlob(putBlobProperties, putUserMetadata, putChannel);
Assert.assertTrue(future.isDone());
RouterException e = (RouterException) ((FutureResult<String>) future).error();
Assert.assertEquals(e.getErrorCode(), RouterErrorCode.RouterClosed);
}
/**
* Enum for the three operation types.
*/
private enum OperationType {
PUT, GET, DELETE,
}
/**
* A helper class to abstract away the details about specific operation manager.
*/
private class OperationHelper {
final OperationType opType;
int requestParallelism = 0;
/**
* Construct an OperationHelper object with the associated type.
* @param opType the type of operation.
*/
OperationHelper(OperationType opType) {
this.opType = opType;
switch (opType) {
case PUT:
requestParallelism = PUT_REQUEST_PARALLELISM;
break;
case GET:
requestParallelism = GET_REQUEST_PARALLELISM;
break;
case DELETE:
requestParallelism = DELETE_REQUEST_PARALLELISM;
break;
}
}
/**
* Submit a put, get or delete operation based on the associated {@link OperationType} of this object.
* @param blobId the blobId to get or delete. For puts, this is ignored.
* @return the {@link FutureResult} associated with the submitted operation.
*/
FutureResult submitOperation(String blobId) {
FutureResult futureResult = null;
switch (opType) {
case PUT:
futureResult = new FutureResult<String>();
ReadableStreamChannel putChannel = new ByteBufferReadableStreamChannel(ByteBuffer.wrap(putContent));
putManager.submitPutBlobOperation(putBlobProperties, putUserMetadata, putChannel, futureResult, null);
break;
case GET:
final FutureResult getFutureResult = new FutureResult<GetBlobResultInternal>();
getManager.submitGetBlobOperation(blobId, new GetBlobOptionsInternal(
new GetBlobOptionsBuilder().operationType(GetBlobOptions.OperationType.BlobInfo).build(), false),
new Callback<GetBlobResultInternal>() {
@Override
public void onCompletion(GetBlobResultInternal result, Exception exception) {
getFutureResult.done(result, exception);
}
});
futureResult = getFutureResult;
break;
case DELETE:
futureResult = new FutureResult<Void>();
deleteManager.submitDeleteBlobOperation(blobId, null, futureResult, null);
break;
}
NonBlockingRouter.currentOperationsCount.incrementAndGet();
return futureResult;
}
/**
* Poll the associated operation manager.
* @param requestInfos the list of {@link RequestInfo} to pass in the poll call.
*/
void pollOpManager(List<RequestInfo> requestInfos) {
switch (opType) {
case PUT:
putManager.poll(requestInfos);
break;
case GET:
getManager.poll(requestInfos);
break;
case DELETE:
deleteManager.poll(requestInfos);
break;
}
}
/**
* Hand over a responseInfo to the operation manager.
* @param responseInfo the {@link ResponseInfo} to hand over.
*/
void handleResponse(ResponseInfo responseInfo) {
switch (opType) {
case PUT:
putManager.handleResponse(responseInfo);
break;
case GET:
getManager.handleResponse(responseInfo);
break;
case DELETE:
deleteManager.handleResponse(responseInfo);
break;
}
}
}
}