/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster;
import com.google.common.base.Predicate;
import com.google.common.util.concurrent.ListenableFuture;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.tasks.PendingClusterTasksResponse;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.InternalClusterService;
import org.elasticsearch.cluster.service.PendingClusterTask;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.component.LifecycleComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.Singleton;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
import org.elasticsearch.test.MockLogAppender;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.threadpool.ThreadPool;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import static org.elasticsearch.common.settings.Settings.settingsBuilder;
import static org.elasticsearch.test.ESIntegTestCase.Scope;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;
/**
*
*/
@ClusterScope(scope = Scope.TEST, numDataNodes = 0)
@ESIntegTestCase.SuppressLocalMode
public class ClusterServiceIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return pluginList(TestPlugin.class);
}
@Test
public void testTimeoutUpdateTask() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService1 = internalCluster().getInstance(ClusterService.class);
final CountDownLatch block = new CountDownLatch(1);
clusterService1.submitStateUpdateTask("test1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
try {
block.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
throw new RuntimeException(t);
}
});
final CountDownLatch timedOut = new CountDownLatch(1);
final AtomicBoolean executeCalled = new AtomicBoolean();
clusterService1.submitStateUpdateTask("test2", new ClusterStateUpdateTask() {
@Override
public TimeValue timeout() {
return TimeValue.timeValueMillis(2);
}
@Override
public void onFailure(String source, Throwable t) {
timedOut.countDown();
}
@Override
public ClusterState execute(ClusterState currentState) {
executeCalled.set(true);
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
}
});
timedOut.await();
block.countDown();
final CountDownLatch allProcessed = new CountDownLatch(1);
clusterService1.submitStateUpdateTask("test3", new ClusterStateUpdateTask() {
@Override
public void onFailure(String source, Throwable t) {
throw new RuntimeException(t);
}
@Override
public ClusterState execute(ClusterState currentState) {
allProcessed.countDown();
return currentState;
}
});
allProcessed.await(); // executed another task to double check that execute on the timed out update task is not called...
assertThat(executeCalled.get(), equalTo(false));
}
@Test
public void testAckedUpdateTask() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
final AtomicBoolean allNodesAcked = new AtomicBoolean(false);
final AtomicBoolean ackTimeout = new AtomicBoolean(false);
final AtomicBoolean onFailure = new AtomicBoolean(false);
final AtomicBoolean executed = new AtomicBoolean(false);
final CountDownLatch latch = new CountDownLatch(1);
final CountDownLatch processedLatch = new CountDownLatch(1);
clusterService.submitStateUpdateTask("test", new AckedClusterStateUpdateTask<Void>(null, null) {
@Override
protected Void newResponse(boolean acknowledged) {
return null;
}
@Override
public boolean mustAck(DiscoveryNode discoveryNode) {
return true;
}
@Override
public void onAllNodesAcked(@Nullable Throwable t) {
allNodesAcked.set(true);
latch.countDown();
}
@Override
public void onAckTimeout() {
ackTimeout.set(true);
latch.countDown();
}
@Override
public TimeValue ackTimeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public TimeValue timeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
processedLatch.countDown();
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
executed.set(true);
return ClusterState.builder(currentState).build();
}
@Override
public void onFailure(String source, Throwable t) {
logger.error("failed to execute callback in test {}", t, source);
onFailure.set(true);
latch.countDown();
}
});
ensureGreen();
assertThat(latch.await(1, TimeUnit.SECONDS), equalTo(true));
assertThat(allNodesAcked.get(), equalTo(true));
assertThat(ackTimeout.get(), equalTo(false));
assertThat(executed.get(), equalTo(true));
assertThat(onFailure.get(), equalTo(false));
assertThat(processedLatch.await(1, TimeUnit.SECONDS), equalTo(true));
}
@Test
public void testAckedUpdateTaskSameClusterState() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
final AtomicBoolean allNodesAcked = new AtomicBoolean(false);
final AtomicBoolean ackTimeout = new AtomicBoolean(false);
final AtomicBoolean onFailure = new AtomicBoolean(false);
final AtomicBoolean executed = new AtomicBoolean(false);
final CountDownLatch latch = new CountDownLatch(1);
final CountDownLatch processedLatch = new CountDownLatch(1);
clusterService.submitStateUpdateTask("test", new AckedClusterStateUpdateTask<Void>(null, null) {
@Override
protected Void newResponse(boolean acknowledged) {
return null;
}
@Override
public void onAllNodesAcked(@Nullable Throwable t) {
allNodesAcked.set(true);
latch.countDown();
}
@Override
public void onAckTimeout() {
ackTimeout.set(true);
latch.countDown();
}
@Override
public TimeValue ackTimeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public TimeValue timeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
processedLatch.countDown();
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
executed.set(true);
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
logger.error("failed to execute callback in test {}", t, source);
onFailure.set(true);
latch.countDown();
}
});
ensureGreen();
assertThat(latch.await(1, TimeUnit.SECONDS), equalTo(true));
assertThat(allNodesAcked.get(), equalTo(true));
assertThat(ackTimeout.get(), equalTo(false));
assertThat(executed.get(), equalTo(true));
assertThat(onFailure.get(), equalTo(false));
assertThat(processedLatch.await(1, TimeUnit.SECONDS), equalTo(true));
}
@Test
public void testMasterAwareExecution() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
ListenableFuture<String> master = internalCluster().startNodeAsync(settings);
ListenableFuture<String> nonMaster = internalCluster().startNodeAsync(settingsBuilder().put(settings).put("node.master", false).build());
master.get();
ensureGreen(); // make sure we have a cluster
ClusterService clusterService = internalCluster().getInstance(ClusterService.class, nonMaster.get());
final boolean[] taskFailed = {false};
final CountDownLatch latch1 = new CountDownLatch(1);
clusterService.submitStateUpdateTask("test", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
latch1.countDown();
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
taskFailed[0] = true;
latch1.countDown();
}
});
latch1.await();
assertTrue("cluster state update task was executed on a non-master", taskFailed[0]);
taskFailed[0] = true;
final CountDownLatch latch2 = new CountDownLatch(1);
clusterService.submitStateUpdateTask("test", new ClusterStateNonMasterUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
taskFailed[0] = false;
latch2.countDown();
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
taskFailed[0] = true;
latch2.countDown();
}
});
latch2.await();
assertFalse("non-master cluster state update task was not executed", taskFailed[0]);
}
@Test
public void testAckedUpdateTaskNoAckExpected() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
final AtomicBoolean allNodesAcked = new AtomicBoolean(false);
final AtomicBoolean ackTimeout = new AtomicBoolean(false);
final AtomicBoolean onFailure = new AtomicBoolean(false);
final AtomicBoolean executed = new AtomicBoolean(false);
final CountDownLatch latch = new CountDownLatch(1);
clusterService.submitStateUpdateTask("test", new AckedClusterStateUpdateTask<Void>(null, null) {
@Override
protected Void newResponse(boolean acknowledged) {
return null;
}
@Override
public boolean mustAck(DiscoveryNode discoveryNode) {
return false;
}
@Override
public void onAllNodesAcked(@Nullable Throwable t) {
allNodesAcked.set(true);
latch.countDown();
}
@Override
public void onAckTimeout() {
ackTimeout.set(true);
latch.countDown();
}
@Override
public TimeValue ackTimeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public TimeValue timeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
executed.set(true);
return ClusterState.builder(currentState).build();
}
@Override
public void onFailure(String source, Throwable t) {
logger.error("failed to execute callback in test {}", t, source);
onFailure.set(true);
latch.countDown();
}
});
ensureGreen();
assertThat(latch.await(1, TimeUnit.SECONDS), equalTo(true));
assertThat(allNodesAcked.get(), equalTo(true));
assertThat(ackTimeout.get(), equalTo(false));
assertThat(executed.get(), equalTo(true));
assertThat(onFailure.get(), equalTo(false));
}
@Test
public void testAckedUpdateTaskTimeoutZero() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
final AtomicBoolean allNodesAcked = new AtomicBoolean(false);
final AtomicBoolean ackTimeout = new AtomicBoolean(false);
final AtomicBoolean onFailure = new AtomicBoolean(false);
final AtomicBoolean executed = new AtomicBoolean(false);
final CountDownLatch latch = new CountDownLatch(1);
final CountDownLatch processedLatch = new CountDownLatch(1);
clusterService.submitStateUpdateTask("test", new AckedClusterStateUpdateTask<Void>(null, null) {
@Override
protected Void newResponse(boolean acknowledged) {
return null;
}
@Override
public boolean mustAck(DiscoveryNode discoveryNode) {
return false;
}
@Override
public void onAllNodesAcked(@Nullable Throwable t) {
allNodesAcked.set(true);
latch.countDown();
}
@Override
public void onAckTimeout() {
ackTimeout.set(true);
latch.countDown();
}
@Override
public TimeValue ackTimeout() {
return TimeValue.timeValueSeconds(0);
}
@Override
public TimeValue timeout() {
return TimeValue.timeValueSeconds(10);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
processedLatch.countDown();
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
executed.set(true);
return ClusterState.builder(currentState).build();
}
@Override
public void onFailure(String source, Throwable t) {
logger.error("failed to execute callback in test {}", t, source);
onFailure.set(true);
latch.countDown();
}
});
ensureGreen();
assertThat(latch.await(1, TimeUnit.SECONDS), equalTo(true));
assertThat(allNodesAcked.get(), equalTo(false));
assertThat(ackTimeout.get(), equalTo(true));
assertThat(executed.get(), equalTo(true));
assertThat(onFailure.get(), equalTo(false));
assertThat(processedLatch.await(1, TimeUnit.SECONDS), equalTo(true));
}
@Test
public void testPendingUpdateTask() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
String node_0 = internalCluster().startNode(settings);
internalCluster().startNodeClient(settings);
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, node_0);
final CountDownLatch block1 = new CountDownLatch(1);
final CountDownLatch invoked1 = new CountDownLatch(1);
clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
invoked1.countDown();
try {
block1.await();
} catch (InterruptedException e) {
fail();
}
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
invoked1.countDown();
fail();
}
});
invoked1.await();
final CountDownLatch invoked2 = new CountDownLatch(9);
for (int i = 2; i <= 10; i++) {
clusterService.submitStateUpdateTask(Integer.toString(i), new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
invoked2.countDown();
}
});
}
// there might be other tasks in this node, make sure to only take the ones we add into account in this test
// The tasks can be re-ordered, so we need to check out-of-order
Set<String> controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"));
List<PendingClusterTask> pendingClusterTasks = clusterService.pendingTasks();
assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(10));
assertThat(pendingClusterTasks.get(0).getSource().string(), equalTo("1"));
assertThat(pendingClusterTasks.get(0).isExecuting(), equalTo(true));
for (PendingClusterTask task : pendingClusterTasks) {
controlSources.remove(task.getSource().string());
}
assertTrue(controlSources.isEmpty());
controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"));
PendingClusterTasksResponse response = internalCluster().clientNodeClient().admin().cluster().preparePendingClusterTasks().execute().actionGet();
assertThat(response.pendingTasks().size(), greaterThanOrEqualTo(10));
assertThat(response.pendingTasks().get(0).getSource().string(), equalTo("1"));
assertThat(response.pendingTasks().get(0).isExecuting(), equalTo(true));
for (PendingClusterTask task : response) {
controlSources.remove(task.getSource().string());
}
assertTrue(controlSources.isEmpty());
block1.countDown();
invoked2.await();
// whenever we test for no tasks, we need to awaitBusy since this is a live node
assertTrue(awaitBusy(new Predicate<Object>() {
@Override
public boolean apply(Object input) {
return clusterService.pendingTasks().isEmpty();
}
}));
waitNoPendingTasksOnAll();
final CountDownLatch block2 = new CountDownLatch(1);
final CountDownLatch invoked3 = new CountDownLatch(1);
clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
invoked3.countDown();
try {
block2.await();
} catch (InterruptedException e) {
fail();
}
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
invoked3.countDown();
fail();
}
});
invoked3.await();
for (int i = 2; i <= 5; i++) {
clusterService.submitStateUpdateTask(Integer.toString(i), new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
}
Thread.sleep(100);
pendingClusterTasks = clusterService.pendingTasks();
assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(5));
controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5"));
for (PendingClusterTask task : pendingClusterTasks) {
controlSources.remove(task.getSource().string());
}
assertTrue(controlSources.isEmpty());
response = internalCluster().clientNodeClient().admin().cluster().preparePendingClusterTasks().get();
assertThat(response.pendingTasks().size(), greaterThanOrEqualTo(5));
controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5"));
for (PendingClusterTask task : response) {
if (controlSources.remove(task.getSource().string())) {
assertThat(task.getTimeInQueueInMillis(), greaterThan(0l));
}
}
assertTrue(controlSources.isEmpty());
block2.countDown();
}
@Test
public void testLocalNodeMasterListenerCallbacks() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "zen")
.put("discovery.zen.minimum_master_nodes", 1)
.put("discovery.zen.ping_timeout", "400ms")
.put("discovery.initial_state_timeout", "500ms")
.build();
String node_0 = internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
MasterAwareService testService = internalCluster().getInstance(MasterAwareService.class);
ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes("1").get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
// the first node should be a master as the minimum required is 1
assertThat(clusterService.state().nodes().masterNode(), notNullValue());
assertThat(clusterService.state().nodes().localNodeMaster(), is(true));
assertThat(testService.master(), is(true));
String node_1 = internalCluster().startNode(settings);
final ClusterService clusterService1 = internalCluster().getInstance(ClusterService.class, node_1);
MasterAwareService testService1 = internalCluster().getInstance(MasterAwareService.class, node_1);
clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes("2").get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
// the second node should not be the master as node1 is already the master.
assertThat(clusterService1.state().nodes().localNodeMaster(), is(false));
assertThat(testService1.master(), is(false));
internalCluster().stopCurrentMasterNode();
clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes("1").get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
// now that node0 is closed, node1 should be elected as master
assertThat(clusterService1.state().nodes().localNodeMaster(), is(true));
assertThat(testService1.master(), is(true));
// start another node and set min_master_node
internalCluster().startNode(Settings.builder().put(settings));
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
Settings transientSettings = settingsBuilder()
.put("discovery.zen.minimum_master_nodes", 2)
.build();
client().admin().cluster().prepareUpdateSettings().setTransientSettings(transientSettings).get();
// and shutdown the second node
internalCluster().stopRandomNonMasterNode();
// there should not be any master as the minimum number of required eligible masters is not met
awaitBusy(new Predicate<Object>() {
@Override
public boolean apply(Object obj) {
return clusterService1.state().nodes().masterNode() == null && clusterService1.state().status() == ClusterState.ClusterStateStatus.APPLIED;
}
});
assertThat(testService1.master(), is(false));
// bring the node back up
String node_2 = internalCluster().startNode(Settings.builder().put(settings).put(transientSettings));
ClusterService clusterService2 = internalCluster().getInstance(ClusterService.class, node_2);
MasterAwareService testService2 = internalCluster().getInstance(MasterAwareService.class, node_2);
// make sure both nodes see each other otherwise the masternode below could be null if node 2 is master and node 1 did'r receive the updated cluster state...
assertThat(internalCluster().client(node_1).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setLocal(true).setWaitForNodes("2").get().isTimedOut(), is(false));
assertThat(internalCluster().client(node_2).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setLocal(true).setWaitForNodes("2").get().isTimedOut(), is(false));
// now that we started node1 again, a new master should be elected
assertThat(clusterService2.state().nodes().masterNode(), is(notNullValue()));
if (node_2.equals(clusterService2.state().nodes().masterNode().name())) {
assertThat(testService1.master(), is(false));
assertThat(testService2.master(), is(true));
} else {
assertThat(testService1.master(), is(true));
assertThat(testService2.master(), is(false));
}
}
/**
* Note, this test can only work as long as we have a single thread executor executing the state update tasks!
*/
@Test
public void testPrioritizedTasks() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
BlockingTask block = new BlockingTask(Priority.IMMEDIATE);
clusterService.submitStateUpdateTask("test", block);
int taskCount = randomIntBetween(5, 20);
Priority[] priorities = Priority.values();
// will hold all the tasks in the order in which they were executed
List<PrioritizedTask> tasks = new ArrayList<>(taskCount);
CountDownLatch latch = new CountDownLatch(taskCount);
for (int i = 0; i < taskCount; i++) {
Priority priority = priorities[randomIntBetween(0, priorities.length - 1)];
clusterService.submitStateUpdateTask("test", new PrioritizedTask(priority, latch, tasks));
}
block.release();
latch.await();
Priority prevPriority = null;
for (PrioritizedTask task : tasks) {
if (prevPriority == null) {
prevPriority = task.priority();
} else {
assertThat(task.priority().sameOrAfter(prevPriority), is(true));
}
}
}
/*
* test that a listener throwing an exception while handling a
* notification does not prevent publication notification to the
* executor
*/
public void testClusterStateTaskListenerThrowingExceptionIsOkay() throws InterruptedException {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
final CountDownLatch latch = new CountDownLatch(1);
final AtomicBoolean published = new AtomicBoolean();
clusterService.submitStateUpdateTask(
"testClusterStateTaskListenerThrowingExceptionIsOkay",
new Object(),
BasicClusterStateTaskConfig.create(Priority.NORMAL),
new ClusterStateTaskExecutor<Object>() {
@Override
public boolean runOnlyOnMaster() {
return false;
}
@Override
public BatchResult<Object> execute(ClusterState currentState, List<Object> tasks) throws Exception {
ClusterState newClusterState = ClusterState.builder(currentState).build();
return BatchResult.builder().successes(tasks).build(newClusterState, true);
}
@Override
public void clusterStatePublished(ClusterState newClusterState) {
published.set(true);
latch.countDown();
}
},
new AbstractClusterStateTaskListener() {
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
throw new IllegalStateException(source);
}
@Override
public void onFailure(String source, Throwable t) {
}
}
);
latch.await();
assertTrue(published.get());
}
public void testClusterStateBatchedUpdates() throws InterruptedException {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class);
final AtomicInteger counter = new AtomicInteger();
class Task {
private AtomicBoolean state = new AtomicBoolean();
public void execute() {
if (!state.compareAndSet(false, true)) {
throw new IllegalStateException();
} else {
counter.incrementAndGet();
}
}
}
int numberOfThreads = randomIntBetween(2, 8);
final int tasksSubmittedPerThread = randomIntBetween(1, 1024);
int numberOfExecutors = Math.max(1, numberOfThreads / 4);
final Semaphore semaphore = new Semaphore(numberOfExecutors);
class TaskExecutor extends ClusterStateTaskExecutor<Task> {
private AtomicInteger counter = new AtomicInteger();
private AtomicInteger batches = new AtomicInteger();
private AtomicInteger published = new AtomicInteger();
@Override
public BatchResult<Task> execute(ClusterState currentState, List<Task> tasks) throws Exception {
for (Task task : tasks) {
task.execute();
}
counter.addAndGet(tasks.size());
ClusterState maybeUpdatedClusterState = currentState;
if (randomBoolean()) {
maybeUpdatedClusterState = ClusterState.builder(currentState).build();
batches.incrementAndGet();
semaphore.acquire();
}
return BatchResult.<Task>builder().successes(tasks).build(maybeUpdatedClusterState, true);
}
@Override
public boolean runOnlyOnMaster() {
return false;
}
@Override
public void clusterStatePublished(ClusterState newClusterState) {
published.incrementAndGet();
semaphore.release();
}
}
final ConcurrentMap<String, AtomicInteger> counters = new ConcurrentHashMap<>();
final CountDownLatch updateLatch = new CountDownLatch(numberOfThreads * tasksSubmittedPerThread);
final ClusterStateTaskListener listener = new AbstractClusterStateTaskListener() {
@Override
public void onFailure(String source, Throwable t) {
assert false;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
counters.putIfAbsent(source, new AtomicInteger());
counters.get(source).incrementAndGet();
updateLatch.countDown();
}
};
List<TaskExecutor> executors = new ArrayList<>();
for (int i = 0; i < numberOfExecutors; i++) {
executors.add(new TaskExecutor());
}
// randomly assign tasks to executors
final List<TaskExecutor> assignments = new ArrayList<>();
for (int i = 0; i < numberOfThreads; i++) {
for (int j = 0; j < tasksSubmittedPerThread; j++) {
assignments.add(randomFrom(executors));
}
}
Map<TaskExecutor, Integer> counts = new HashMap<>();
for (TaskExecutor executor : assignments) {
Integer count = counts.get(executor);
if (count == null) {
counts.put(executor, 0);
}
counts.put(executor, counts.get(executor) + 1);
}
final CountDownLatch startGate = new CountDownLatch(1);
final CountDownLatch endGate = new CountDownLatch(numberOfThreads);
final AtomicBoolean interrupted = new AtomicBoolean();
for (int i = 0; i < numberOfThreads; i++) {
final int index = i;
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
try {
try {
startGate.await();
} catch (InterruptedException e) {
interrupted.set(true);
return;
}
for (int j = 0; j < tasksSubmittedPerThread; j++) {
ClusterStateTaskExecutor<Task> executor = assignments.get(index * tasksSubmittedPerThread + j);
clusterService.submitStateUpdateTask(
Thread.currentThread().getName(),
new Task(),
BasicClusterStateTaskConfig.create(randomFrom(Priority.values())),
executor,
listener);
}
} finally {
endGate.countDown();
}
}
});
thread.start();
}
startGate.countDown();
endGate.await();
// wait until all the cluster state updates have been processed
updateLatch.await();
// and until all of the publication callbacks have completed
semaphore.acquire(numberOfExecutors);
// assert the number of executed tasks is correct
assertEquals(numberOfThreads * tasksSubmittedPerThread, counter.get());
// assert each executor executed the correct number of tasks
for (TaskExecutor executor : executors) {
if (counts.containsKey(executor)) {
assertEquals((int) counts.get(executor), executor.counter.get());
assertEquals(executor.batches.get(), executor.published.get());
}
}
// assert the correct number of clusterStateProcessed events were triggered
for (Map.Entry<String, AtomicInteger> entry : counters.entrySet()) {
assertEquals(entry.getValue().get(), tasksSubmittedPerThread);
}
}
@TestLogging("cluster:TRACE") // To ensure that we log cluster state events on TRACE level
public void testClusterStateUpdateLogging() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.build();
internalCluster().startNode(settings);
ClusterService clusterService1 = internalCluster().getInstance(ClusterService.class);
MockLogAppender mockAppender = new MockLogAppender();
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test1", "cluster.service", Level.DEBUG, "*processing [test1]: took * no change in cluster_state"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test2", "cluster.service", Level.TRACE, "*failed to execute cluster state update in *"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test3", "cluster.service", Level.DEBUG, "*processing [test3]: took * done applying updated cluster_state (version: *, uuid: *)"));
Logger rootLogger = Logger.getRootLogger();
rootLogger.addAppender(mockAppender);
try {
final CountDownLatch latch = new CountDownLatch(4);
clusterService1.submitStateUpdateTask("test1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
clusterService1.submitStateUpdateTask("test2", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
throw new IllegalArgumentException("Testing handling of exceptions in the cluster state task");
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
fail();
}
@Override
public void onFailure(String source, Throwable t) {
latch.countDown();
}
});
clusterService1.submitStateUpdateTask("test3", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return ClusterState.builder(currentState).incrementVersion().build();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
// Additional update task to make sure all previous logging made it to the logger
// We don't check logging for this on since there is no guarantee that it will occur before our check
clusterService1.submitStateUpdateTask("test4", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
assertThat(latch.await(1, TimeUnit.SECONDS), equalTo(true));
} finally {
rootLogger.removeAppender(mockAppender);
}
mockAppender.assertAllExpectationsMatched();
}
@Test
@TestLogging("cluster:WARN") // To ensure that we log cluster state events on WARN level
public void testLongClusterStateUpdateLogging() throws Exception {
Settings settings = settingsBuilder()
.put("discovery.type", "local")
.put(InternalClusterService.SETTING_CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD, "10s")
.build();
internalCluster().startNode(settings);
ClusterService clusterService1 = internalCluster().getInstance(ClusterService.class);
MockLogAppender mockAppender = new MockLogAppender();
mockAppender.addExpectation(new MockLogAppender.UnseenEventExpectation("test1 shouldn't see because setting is too low", "cluster.service", Level.WARN, "*cluster state update task [test1] took * above the warn threshold of *"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test2", "cluster.service", Level.WARN, "*cluster state update task [test2] took * above the warn threshold of 10ms"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test3", "cluster.service", Level.WARN, "*cluster state update task [test3] took * above the warn threshold of 10ms"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test4", "cluster.service", Level.WARN, "*cluster state update task [test4] took * above the warn threshold of 10ms"));
Logger rootLogger = Logger.getRootLogger();
rootLogger.addAppender(mockAppender);
try {
final CountDownLatch latch = new CountDownLatch(5);
final CountDownLatch processedFirstTask = new CountDownLatch(1);
clusterService1.submitStateUpdateTask("test1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
Thread.sleep(100);
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
processedFirstTask.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
processedFirstTask.await(1, TimeUnit.SECONDS);
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(settingsBuilder()
.put(InternalClusterService.SETTING_CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD, "10ms")));
clusterService1.submitStateUpdateTask("test2", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
Thread.sleep(100);
throw new IllegalArgumentException("Testing handling of exceptions in the cluster state task");
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
fail();
}
@Override
public void onFailure(String source, Throwable t) {
latch.countDown();
}
});
clusterService1.submitStateUpdateTask("test3", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
Thread.sleep(100);
return ClusterState.builder(currentState).incrementVersion().build();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
clusterService1.submitStateUpdateTask("test4", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
Thread.sleep(100);
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
// Additional update task to make sure all previous logging made it to the logger
// We don't check logging for this on since there is no guarantee that it will occur before our check
clusterService1.submitStateUpdateTask("test5", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Throwable t) {
fail();
}
});
assertThat(latch.await(5, TimeUnit.SECONDS), equalTo(true));
} finally {
rootLogger.removeAppender(mockAppender);
}
mockAppender.assertAllExpectationsMatched();
}
private static class BlockingTask extends ClusterStateUpdateTask {
private final CountDownLatch latch = new CountDownLatch(1);
public BlockingTask(Priority priority) {
super(priority);
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
latch.await();
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
}
public void release() {
latch.countDown();
}
}
private static class PrioritizedTask extends ClusterStateUpdateTask {
private final CountDownLatch latch;
private final List<PrioritizedTask> tasks;
private PrioritizedTask(Priority priority, CountDownLatch latch, List<PrioritizedTask> tasks) {
super(priority);
this.latch = latch;
this.tasks = tasks;
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
tasks.add(this);
latch.countDown();
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
latch.countDown();
}
}
public static class TestPlugin extends Plugin {
@Override
public String name() {
return "test plugin";
}
@Override
public String description() {
return "test plugin";
}
@Override
public Collection<Class<? extends LifecycleComponent>> nodeServices() {
List<Class<? extends LifecycleComponent>> services = new ArrayList<>(1);
services.add(MasterAwareService.class);
return services;
}
}
@Singleton
public static class MasterAwareService extends AbstractLifecycleComponent<MasterAwareService> implements LocalNodeMasterListener {
private final ClusterService clusterService;
private volatile boolean master;
@Inject
public MasterAwareService(Settings settings, ClusterService clusterService) {
super(settings);
clusterService.add(this);
this.clusterService = clusterService;
logger.info("initialized test service");
}
@Override
public void onMaster() {
logger.info("on master [" + clusterService.localNode() + "]");
master = true;
}
@Override
public void offMaster() {
logger.info("off master [" + clusterService.localNode() + "]");
master = false;
}
public boolean master() {
return master;
}
@Override
protected void doStart() {
}
@Override
protected void doStop() {
}
@Override
protected void doClose() {
}
@Override
public String executorName() {
return ThreadPool.Names.SAME;
}
}
}