/*
* Copyright 2017 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.router;
import com.codahale.metrics.Counter;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.github.ambry.clustermap.MockDataNodeId;
import com.github.ambry.clustermap.MockPartitionId;
import com.github.ambry.clustermap.MockReplicaId;
import com.github.ambry.clustermap.ReplicaId;
import com.github.ambry.network.Port;
import com.github.ambry.network.PortType;
import com.github.ambry.utils.MockTime;
import com.github.ambry.utils.Pair;
import com.github.ambry.utils.TestUtils;
import com.github.ambry.utils.Time;
import com.github.ambry.utils.Utils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.junit.Test;
import static org.junit.Assert.*;
/**
* Unit test for {@link AdaptiveOperationTracker} that tests the adaptability based on request latencies. This class
* only tests features not already tested in {@link OperationTrackerTest}.
*
* The status of an operation is represented as in the following format:
*
* local unsent count-local inflight count-local succeeded count-local failed count;
* remote unsent count-remote inflight count-remote succeeded count-remote failed count
*
* For example: 3-0-0-0; 9-0-0-0
*/
public class AdaptiveOperationTrackerTest {
private static final int REPLICA_COUNT = 6;
private static final int PORT = 6666;
private static final double QUANTILE = 0.9;
private static final Pair<Long, Long> LOCAL_COLO_LATENCY_RANGE = new Pair<>(0L, 100L);
private static final Pair<Long, Long> CROSS_COLO_LATENCY_RANGE = new Pair<>(120L, 220L);
private final List<MockDataNodeId> datanodes;
private final MockPartitionId mockPartition;
private final String localDcName;
private final LinkedList<ReplicaId> inflightReplicas = new LinkedList<>();
private final Set<ReplicaId> repetitionTracker = new HashSet<>();
private final Time time = new MockTime();
private final MetricRegistry registry = new MetricRegistry();
private final Histogram localColoTracker = registry.histogram("LocalColoTracker");
private final Histogram crossColoTracker = registry.histogram("CrossColoTracker");
private final Counter pastDueCounter = registry.counter("PastDueCounter");
/**
* Constructor that sets up state.
*/
public AdaptiveOperationTrackerTest() {
List<Port> portList = Collections.singletonList(new Port(PORT, PortType.PLAINTEXT));
List<String> mountPaths = Collections.singletonList("mockMountPath");
datanodes = new ArrayList<>(Arrays.asList(
new MockDataNodeId[]{new MockDataNodeId(portList, mountPaths, "dc-0"), new MockDataNodeId(portList, mountPaths,
"dc-1")}));
localDcName = datanodes.get(0).getDatacenterName();
mockPartition = new MockPartitionId();
for (int i = 0; i < REPLICA_COUNT; i++) {
mockPartition.replicaIds.add(new MockReplicaId(PORT, mockPartition, datanodes.get(i % datanodes.size()), 0));
}
}
/**
* Tests that requests are discounted from the parallelism count once they move beyond the tolerance quantile.
* @throws InterruptedException
*/
@Test
public void adaptationTest() throws InterruptedException {
primeTracker(localColoTracker, AdaptiveOperationTracker.MIN_DATA_POINTS_REQUIRED, LOCAL_COLO_LATENCY_RANGE);
primeTracker(crossColoTracker, AdaptiveOperationTracker.MIN_DATA_POINTS_REQUIRED, CROSS_COLO_LATENCY_RANGE);
double localColoCutoff = localColoTracker.getSnapshot().getValue(QUANTILE);
double crossColoCutoff = crossColoTracker.getSnapshot().getValue(QUANTILE);
OperationTracker ot = getOperationTracker(true, REPLICA_COUNT, 2);
// 3-0-0-0; 3-0-0-0
sendRequests(ot, 2);
// 1-2-0-0; 3-0-0-0
// sleep for less than the cutoff
time.sleep((long) localColoCutoff - 2);
sendRequests(ot, 0);
// push it over the edge
time.sleep(5);
// should send two requests because both of the oldest requests are past their due times
// the second of the two requests is a cross colo request
sendRequests(ot, 2);
// 0-3-0-0; 2-1-0-0
time.sleep((long) localColoCutoff + 2);
// second cross colo request sent (local colo request is past due but the first cross colo request is not past due).
sendRequests(ot, 1);
// 0-3-0-0; 1-2-0-0
long sleepTime = (long) localColoCutoff + 2;
time.sleep(sleepTime);
// no requests should be sent
sendRequests(ot, 0);
// 0-3-0-0; 1-2-0-0
sleepTime = (long) (crossColoCutoff - localColoCutoff) + 2;
time.sleep(sleepTime);
// third cross colo request sent (first cross colo request is past due)
sendRequests(ot, 1);
// 0-3-0-0; 0-3-0-0
time.sleep((long) crossColoCutoff + 2);
// no more replicas left to send requests to
sendRequests(ot, 0);
// generate a response for every request and make sure there are no errors
for (int i = 0; i < REPLICA_COUNT; i++) {
assertFalse("Operation should not be done", ot.isDone());
ot.onResponse(inflightReplicas.poll(), true);
}
assertTrue("Operation should have succeeded", ot.hasSucceeded());
// past due counter should be REPLICA_COUNT - 2
assertEquals("Past due counter is inconsistent", REPLICA_COUNT - 2, pastDueCounter.getCount());
}
/**
* Tests that the {@link Histogram} instances used by {@link AdaptiveOperationTracker} are updated correctly on
* successful requests.
* @throws InterruptedException
*/
@Test
public void trackerUpdateOnSuccessTest() throws InterruptedException {
doTrackerUpdateTest(true);
}
/**
* Tests that the {@link Histogram} instances used by {@link AdaptiveOperationTracker} are updated correctly on failed
* requests.
* @throws InterruptedException
*/
@Test
public void trackerUpdateOnFailureTest() throws InterruptedException {
doTrackerUpdateTest(false);
}
/**
* Tests the case where there are no unexpired requests because the only unexpired request returned a failure. In
* that case, the tracker must allow sending more requests.
* @throws InterruptedException
*/
@Test
public void noUnexpiredRequestsTest() throws InterruptedException {
primeTracker(localColoTracker, AdaptiveOperationTracker.MIN_DATA_POINTS_REQUIRED, LOCAL_COLO_LATENCY_RANGE);
primeTracker(crossColoTracker, AdaptiveOperationTracker.MIN_DATA_POINTS_REQUIRED, CROSS_COLO_LATENCY_RANGE);
double localColoCutoff = localColoTracker.getSnapshot().getValue(QUANTILE);
OperationTracker ot = getOperationTracker(false, 1, 1);
// 3-0-0-0
sendRequests(ot, 1);
// 2-1-0-0
// sleep for a time greater than cutoff
time.sleep((long) localColoCutoff + 2);
sendRequests(ot, 1);
// 1-2-0-0
// provide a response to the second request that is not a success
ot.onResponse(inflightReplicas.pollLast(), false);
// 1-1-0-1
assertFalse("Operation should not be done", ot.isDone());
// should now be able to send one more request
sendRequests(ot, 1);
// 0-2-0-1
ot.onResponse(inflightReplicas.pollLast(), true);
// 0-1-1-1
assertTrue("Operation should have succeeded", ot.hasSucceeded());
// past due counter should be 1
assertEquals("Past due counter is inconsistent", 1, pastDueCounter.getCount());
}
/**
* Tests the case where the tracker is updated b/w the {@link Iterator#hasNext()} and {@link Iterator#next()} calls.
* @throws InterruptedException
*/
@Test
public void trackerUpdateBetweenHasNextAndNextTest() throws InterruptedException {
primeTracker(localColoTracker, AdaptiveOperationTracker.MIN_DATA_POINTS_REQUIRED, LOCAL_COLO_LATENCY_RANGE);
primeTracker(crossColoTracker, AdaptiveOperationTracker.MIN_DATA_POINTS_REQUIRED, CROSS_COLO_LATENCY_RANGE);
double localColoCutoff = localColoTracker.getSnapshot().getValue(1);
OperationTracker ot =
new AdaptiveOperationTracker(localDcName, mockPartition, false, 1, 1, time, localColoTracker, null,
pastDueCounter, 1);
// 3-0-0-0
sendRequests(ot, 1);
// 2-1-0-0
// sleep for a time greater than cutoff
time.sleep((long) localColoCutoff + 2);
// now get an iterator and call hasNext() on it
Iterator<ReplicaId> replicaIterator = ot.getReplicaIterator();
assertTrue("There should be a replica to send to", replicaIterator.hasNext());
// now insert a value in the tracker such that it is the max value. However, the return value of hasNext() must
// not change even though the tracker has changed its return value for getSnapshot().getValue(1).
long valueToInsert = 2 * (long) localColoCutoff;
localColoTracker.update(valueToInsert);
assertEquals("Tracker's snapshot should return the max value", valueToInsert,
(long) localColoTracker.getSnapshot().getValue(1));
// hasNext() should not change it's return value
assertTrue("There should be a replica to send to", replicaIterator.hasNext());
sendRequests(ot, 1);
// 1-2-0-0
ot.onResponse(inflightReplicas.pollLast(), true);
// 1-1-1-0
assertTrue("Operation should have succeeded", ot.hasSucceeded());
// past due counter should be 1
assertEquals("Past due counter is inconsistent", 1, pastDueCounter.getCount());
}
// helpers
// general
/**
* Returns an instance of {@link AdaptiveOperationTracker}.
* @param crossColoEnabled {@code true} if cross colo needs to be enabled. {@code false} otherwise.
* @param successTarget the number of successful responses required for the operation to succeed.
* @param parallelism the number of parallel requests that can be in flight.
* @return an instance of {@link AdaptiveOperationTracker} with the given parameters.
*/
private OperationTracker getOperationTracker(boolean crossColoEnabled, int successTarget, int parallelism) {
return new AdaptiveOperationTracker(localDcName, mockPartition, crossColoEnabled, successTarget, parallelism, time,
localColoTracker, crossColoEnabled ? crossColoTracker : null, pastDueCounter, QUANTILE);
}
/**
* Updates the {@code tracker} to mimic {@code numRequests} each taking {@code latency} ms.
* @param tracker the {@link Histogram} to update
* @param numRequests the number of requests (data points)
* @param latencyRange the range of latencies (in ms) to generate and record.
*/
private void primeTracker(Histogram tracker, long numRequests, Pair<Long, Long> latencyRange) {
for (long i = 0; i < numRequests; i++) {
long latency = Utils.getRandomLong(TestUtils.RANDOM, latencyRange.getSecond()) + latencyRange.getFirst();
tracker.update(latency);
}
}
/**
* Send requests to all replicas provided by the {@link OperationTracker#getReplicaIterator()}
* @param operationTracker the {@link OperationTracker} that provides replicas.
* @param numRequestsExpected the number of requests expected to be sent out.
*/
private void sendRequests(OperationTracker operationTracker, int numRequestsExpected) {
int sent = 0;
Iterator<ReplicaId> replicaIdIterator = operationTracker.getReplicaIterator();
while (replicaIdIterator.hasNext()) {
ReplicaId nextReplica = replicaIdIterator.next();
assertNotNull("There should be a replica to send a request to", nextReplica);
assertFalse("Replica that was used for a request returned by iterator again",
repetitionTracker.contains(nextReplica));
inflightReplicas.offer(nextReplica);
repetitionTracker.add(nextReplica);
replicaIdIterator.remove();
sent++;
}
assertEquals("Did not send expected number of requests", numRequestsExpected, sent);
}
// trackerUpdateTest() helpers
/**
* Tests that the {@link Histogram} instances used by {@link AdaptiveOperationTracker} are updated correctly.
* @param succeedRequests {@code true} if the requests should receive successful responses. {@code false} otherwise.
* @throws InterruptedException
*/
private void doTrackerUpdateTest(boolean succeedRequests) throws InterruptedException {
long timeIncrement = 10;
OperationTracker ot = getOperationTracker(true, REPLICA_COUNT, REPLICA_COUNT);
// 3-0-0-0; 3-0-0-0
sendRequests(ot, REPLICA_COUNT);
// 0-3-0-0; 0-3-0-0
Double[] localColoExpectedAverages =
{(double) timeIncrement, (double) timeIncrement + timeIncrement / 2, 2 * (double) timeIncrement};
verifyHistogramRecording(ot, succeedRequests, timeIncrement, localColoExpectedAverages, localColoTracker);
Double[] crossColoExpectedAverages =
{4 * (double) timeIncrement, 4 * (double) timeIncrement + timeIncrement / 2, 5 * (double) timeIncrement};
verifyHistogramRecording(ot, succeedRequests, timeIncrement, crossColoExpectedAverages, crossColoTracker);
assertEquals("Operation success state is unexpected", succeedRequests, ot.hasSucceeded());
assertTrue("Operation should be done", ot.isDone());
}
/**
* Verifies that the {@code tracker} is updated correctly when responses are received.
* @param ot the {@link OperationTracker} to use.
* @param succeedRequests {@code true} if the requests should receive successful responses. {@code false} otherwise.
* @param timeIncrement increment of time (in ms) before responses are recorded for each request.
* @param expectedAverages the expected averages after every response that is recorded.
* @param tracker the {@link Histogram} that should be updated.
* @throws InterruptedException
*/
private void verifyHistogramRecording(OperationTracker ot, boolean succeedRequests, long timeIncrement,
Double[] expectedAverages, Histogram tracker) throws InterruptedException {
for (double expectedAverage : expectedAverages) {
time.sleep(timeIncrement);
ot.onResponse(inflightReplicas.poll(), succeedRequests);
assertEquals("Average does not match. Histogram recording may be incorrect", expectedAverage,
tracker.getSnapshot().getMean(), 0.001);
}
}
}