package com.spotify.heroic.cluster;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import com.clearspring.analytics.util.Lists;
import com.google.common.collect.ImmutableList;
import com.spotify.heroic.AbstractLocalClusterIT;
import com.spotify.heroic.HeroicCoreInstance;
import eu.toolchain.async.AsyncFuture;
import eu.toolchain.async.StreamCollector;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.junit.Test;
@Slf4j
public class CoreClusterManagerIT extends AbstractLocalClusterIT {
private final ControlledNodeMetadataFactory metadata1 = new ControlledNodeMetadataFactory();
private final ControlledNodeMetadataFactory metadata2 = new ControlledNodeMetadataFactory();
private final List<ControlledNodeMetadataFactory> metadataFactories =
ImmutableList.of(metadata1, metadata2);
private static final Random random = new Random();
@Override
protected String protocol() {
return "grpc";
}
@Override
protected List<NodeMetadataFactory> metadataFactories() {
return ImmutableList.of(metadata1, metadata2);
}
/**
* This tests the assumption that it is safe to perform refreshes, even when there are
* requests pending, or about to be fired.
*
* The test will continue to run, while the refresh thread is running.
*/
@Test
public void testSafeRefresh() throws Exception {
final int numberOfRefreshes = 100;
final int requestsPerIteration = 100;
final HeroicCoreInstance a = instances.get(0);
final ClusterManager clusterManager = a.inject(ClusterComponent::clusterManager);
/* setup a thread that performs the given number of refresh iterations */
final ClusterRefreshThread t = setupRefreshThread(clusterManager, numberOfRefreshes);
final DataStreamCollector collector = new DataStreamCollector();
while (!t.shutdown.get()) {
final List<Callable<AsyncFuture<Void>>> operations = new ArrayList<>();
for (int i = 0; i < requestsPerIteration; i++) {
operations.add(() -> {
final List<AsyncFuture<Void>> pongs = new ArrayList<>();
for (final ClusterShard shard : clusterManager.useDefaultGroup()) {
for (int p = 0; p < 10; p++) {
pongs.addAll(pingAllNodesInShard(clusterManager, shard));
}
}
return async.collectAndDiscard(pongs);
});
}
async.eventuallyCollect(operations, collector, 20).get();
}
t.join();
assertNull("no errors during refreshes", t.refreshError.get());
assertTrue("number of refreshes are non-zero", t.refreshes.get() > 0);
assertTrue("number of resolved requests are non-zero", collector.resolved.get() > 0);
assertEquals("expect no cancelled requests", 0, collector.cancelled.get());
assertEquals("expect no failed requests", 0, collector.failed.get());
assertTrue(collector.ended);
}
private List<AsyncFuture<Void>> pingAllNodesInShard(
ClusterManager clusterManager, ClusterShard shard
) {
final List<AsyncFuture<Void>> futures = Lists.newArrayList();
final List<ClusterNode> excludeIds = Lists.newArrayList();
while (true) {
Optional<ClusterManager.NodeResult<AsyncFuture<Void>>> ret =
clusterManager.withNodeInShardButNotWithId(shard.getShard(), excludeIds::contains,
ClusterNode.Group::ping);
if (!ret.isPresent()) {
// No more nodes available in shard, we're done
return futures;
}
ClusterManager.NodeResult<AsyncFuture<Void>> result = ret.get();
futures.add(result.getReturnValue());
excludeIds.add(result.getNode());
}
}
private ClusterRefreshThread setupRefreshThread(
final ClusterManager clusterManager, final int iterations
) {
final ClusterRefreshThread t = new ClusterRefreshThread(clusterManager, iterations);
t.setName("refresh-test-" + UUID.randomUUID());
t.start();
return t;
}
@RequiredArgsConstructor
private class ClusterRefreshThread extends Thread {
private final ClusterManager clusterManager;
private final int iterations;
private final AtomicReference<Exception> refreshError = new AtomicReference<>();
private final AtomicInteger refreshes = new AtomicInteger();
private final AtomicBoolean shutdown = new AtomicBoolean(false);
@Override
public void run() {
for (int i = 0; i < iterations; i++) {
/* randomize metadata state to cause refreshes/failures */
for (final ControlledNodeMetadataFactory factory : metadataFactories) {
switch (random.nextInt(2)) {
case 0:
factory.setId(UUID.randomUUID());
break;
case 1:
factory.setFail(!factory.fail);
break;
}
}
try {
clusterManager.refresh().get();
} catch (final Exception e) {
refreshError.set(e);
break;
}
refreshes.addAndGet(1);
}
shutdown.set(true);
}
}
private static class DataStreamCollector implements StreamCollector<Void, Void> {
private final AtomicInteger resolved = new AtomicInteger();
private final AtomicInteger failed = new AtomicInteger();
private final AtomicInteger cancelled = new AtomicInteger();
private boolean ended = false;
@Override
public void resolved(final Void result) throws Exception {
resolved.getAndAdd(1);
}
@Override
public void failed(final Throwable cause) throws Exception {
failed.getAndAdd(1);
}
@Override
public void cancelled() throws Exception {
cancelled.getAndAdd(1);
}
@Override
public Void end(final int resolved, final int failed, final int cancelled)
throws Exception {
ended = true;
return null;
}
}
private static class ControlledNodeMetadataFactory implements NodeMetadataFactory {
private Optional<UUID> id = Optional.empty();
private boolean fail = false;
public void setId(UUID id) {
this.id = Optional.of(id);
}
public void setFail(boolean fail) {
this.fail = fail;
}
@Override
public NodeMetadataProvider buildProvider(
final NodeMetadata localMetadata
) {
return () -> {
if (fail) {
throw new RuntimeException("a failure");
}
NodeMetadata node = localMetadata;
if (id.isPresent()) {
node = new NodeMetadata(node.getVersion(), id.get(), node.getTags(),
node.getService());
}
return node;
};
}
}
}