/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.action.admin.cluster.node.tasks; import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.TaskOperationFailure; import org.elasticsearch.action.admin.cluster.health.ClusterHealthAction; import org.elasticsearch.action.admin.cluster.node.tasks.cancel.CancelTasksResponse; import org.elasticsearch.action.admin.cluster.node.tasks.get.GetTaskResponse; import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksAction; import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse; import org.elasticsearch.action.admin.indices.refresh.RefreshAction; import org.elasticsearch.action.admin.indices.upgrade.post.UpgradeAction; import org.elasticsearch.action.admin.indices.validate.query.ValidateQueryAction; import org.elasticsearch.action.bulk.BulkAction; import org.elasticsearch.action.fieldstats.FieldStatsAction; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexAction; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.search.SearchAction; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchTransportService; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.action.support.replication.ReplicationResponse; import org.elasticsearch.action.support.replication.TransportReplicationActionTests; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; import org.elasticsearch.tasks.TaskInfo; import org.elasticsearch.tasks.TaskResult; import org.elasticsearch.tasks.TaskResultsService; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.tasks.MockTaskManager; import org.elasticsearch.test.tasks.MockTaskManagerListener; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.transport.ReceiveTimeoutTransportException; import org.elasticsearch.transport.TransportService; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.function.Function; import static java.util.Collections.emptyList; import static java.util.Collections.singleton; import static org.elasticsearch.common.unit.TimeValue.timeValueMillis; import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.emptyCollectionOf; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; /** * Integration tests for task management API * <p> * We need at least 2 nodes so we have a master node a non-master node */ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, minNumDataNodes = 2, transportClientRatio = 0.0) public class TasksIT extends ESIntegTestCase { private Map<Tuple<String, String>, RecordingTaskManagerListener> listeners = new HashMap<>(); @Override protected Collection<Class<? extends Plugin>> getMockPlugins() { Collection<Class<? extends Plugin>> mockPlugins = new ArrayList<>(super.getMockPlugins()); mockPlugins.remove(MockTransportService.TestPlugin.class); return mockPlugins; } @Override protected Collection<Class<? extends Plugin>> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class, TestTaskPlugin.class); } @Override protected Collection<Class<? extends Plugin>> transportClientPlugins() { return nodePlugins(); } @Override protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put(MockTaskManager.USE_MOCK_TASK_MANAGER_SETTING.getKey(), true) .build(); } public void testTaskCounts() { // Run only on data nodes ListTasksResponse response = client().admin().cluster().prepareListTasks("data:true").setActions(ListTasksAction.NAME + "[n]") .get(); assertThat(response.getTasks().size(), greaterThanOrEqualTo(cluster().numDataNodes())); } public void testMasterNodeOperationTasks() { registerTaskManageListeners(ClusterHealthAction.NAME); // First run the health on the master node - should produce only one task on the master node internalCluster().masterClient().admin().cluster().prepareHealth().get(); assertEquals(1, numberOfEvents(ClusterHealthAction.NAME, Tuple::v1)); // counting only registration events assertEquals(1, numberOfEvents(ClusterHealthAction.NAME, event -> event.v1() == false)); // counting only unregistration events resetTaskManageListeners(ClusterHealthAction.NAME); // Now run the health on a non-master node - should produce one task on master and one task on another node internalCluster().nonMasterClient().admin().cluster().prepareHealth().get(); assertEquals(2, numberOfEvents(ClusterHealthAction.NAME, Tuple::v1)); // counting only registration events assertEquals(2, numberOfEvents(ClusterHealthAction.NAME, event -> event.v1() == false)); // counting only unregistration events List<TaskInfo> tasks = findEvents(ClusterHealthAction.NAME, Tuple::v1); // Verify that one of these tasks is a parent of another task if (tasks.get(0).getParentTaskId().isSet()) { assertParentTask(Collections.singletonList(tasks.get(0)), tasks.get(1)); } else { assertParentTask(Collections.singletonList(tasks.get(1)), tasks.get(0)); } } public void testTransportReplicationAllShardsTasks() { registerTaskManageListeners(FieldStatsAction.NAME); // main task registerTaskManageListeners(FieldStatsAction.NAME + "[s]"); // shard level tasks createIndex("test"); ensureGreen("test"); // Make sure all shards are allocated client().prepareFieldStats().setFields("field").get(); // the field stats operation should produce one main task NumShards numberOfShards = getNumShards("test"); assertEquals(1, numberOfEvents(FieldStatsAction.NAME, Tuple::v1)); // and then one operation per shard assertEquals(numberOfShards.numPrimaries, numberOfEvents(FieldStatsAction.NAME + "[s]", Tuple::v1)); // the shard level tasks should have the main task as a parent assertParentTask(findEvents(FieldStatsAction.NAME + "[s]", Tuple::v1), findEvents(FieldStatsAction.NAME, Tuple::v1).get(0)); } public void testTransportBroadcastByNodeTasks() { registerTaskManageListeners(UpgradeAction.NAME); // main task registerTaskManageListeners(UpgradeAction.NAME + "[n]"); // node level tasks createIndex("test"); ensureGreen("test"); // Make sure all shards are allocated client().admin().indices().prepareUpgrade("test").get(); // the percolate operation should produce one main task assertEquals(1, numberOfEvents(UpgradeAction.NAME, Tuple::v1)); // and then one operation per each node where shards are located assertEquals(internalCluster().nodesInclude("test").size(), numberOfEvents(UpgradeAction.NAME + "[n]", Tuple::v1)); // all node level tasks should have the main task as a parent assertParentTask(findEvents(UpgradeAction.NAME + "[n]", Tuple::v1), findEvents(UpgradeAction.NAME, Tuple::v1).get(0)); } public void testTransportReplicationSingleShardTasks() { registerTaskManageListeners(ValidateQueryAction.NAME); // main task registerTaskManageListeners(ValidateQueryAction.NAME + "[s]"); // shard level tasks createIndex("test"); ensureGreen("test"); // Make sure all shards are allocated client().admin().indices().prepareValidateQuery("test").get(); // the validate operation should produce one main task assertEquals(1, numberOfEvents(ValidateQueryAction.NAME, Tuple::v1)); // and then one operation assertEquals(1, numberOfEvents(ValidateQueryAction.NAME + "[s]", Tuple::v1)); // the shard level operation should have the main task as its parent assertParentTask(findEvents(ValidateQueryAction.NAME + "[s]", Tuple::v1), findEvents(ValidateQueryAction.NAME, Tuple::v1).get(0)); } public void testTransportBroadcastReplicationTasks() { registerTaskManageListeners(RefreshAction.NAME); // main task registerTaskManageListeners(RefreshAction.NAME + "[s]"); // shard level tasks registerTaskManageListeners(RefreshAction.NAME + "[s][*]"); // primary and replica shard tasks createIndex("test"); ensureGreen("test"); // Make sure all shards are allocated client().admin().indices().prepareRefresh("test").get(); // the refresh operation should produce one main task NumShards numberOfShards = getNumShards("test"); logger.debug("number of shards, total: [{}], primaries: [{}] ", numberOfShards.totalNumShards, numberOfShards.numPrimaries); logger.debug("main events {}", numberOfEvents(RefreshAction.NAME, Tuple::v1)); logger.debug("main event node {}", findEvents(RefreshAction.NAME, Tuple::v1).get(0).getTaskId().getNodeId()); logger.debug("[s] events {}", numberOfEvents(RefreshAction.NAME + "[s]", Tuple::v1)); logger.debug("[s][*] events {}", numberOfEvents(RefreshAction.NAME + "[s][*]", Tuple::v1)); logger.debug("nodes with the index {}", internalCluster().nodesInclude("test")); assertEquals(1, numberOfEvents(RefreshAction.NAME, Tuple::v1)); // Because it's broadcast replication action we will have as many [s] level requests // as we have primary shards on the coordinating node plus we will have one task per primary outside of the // coordinating node due to replication. // If all primaries are on the coordinating node, the number of tasks should be equal to the number of primaries // If all primaries are not on the coordinating node, the number of tasks should be equal to the number of primaries times 2 assertThat(numberOfEvents(RefreshAction.NAME + "[s]", Tuple::v1), greaterThanOrEqualTo(numberOfShards.numPrimaries)); assertThat(numberOfEvents(RefreshAction.NAME + "[s]", Tuple::v1), lessThanOrEqualTo(numberOfShards.numPrimaries * 2)); // Verify that all [s] events have the proper parent // This is complicated because if the shard task runs on the same node it has main task as a parent // but if it runs on non-coordinating node it would have another intermediate [s] task on the coordinating node as a parent TaskInfo mainTask = findEvents(RefreshAction.NAME, Tuple::v1).get(0); List<TaskInfo> sTasks = findEvents(RefreshAction.NAME + "[s]", Tuple::v1); for (TaskInfo taskInfo : sTasks) { if (mainTask.getTaskId().getNodeId().equals(taskInfo.getTaskId().getNodeId())) { // This shard level task runs on the same node as a parent task - it should have the main task as a direct parent assertParentTask(Collections.singletonList(taskInfo), mainTask); } else { String description = taskInfo.getDescription(); // This shard level task runs on another node - it should have a corresponding shard level task on the node where main task // is running List<TaskInfo> sTasksOnRequestingNode = findEvents(RefreshAction.NAME + "[s]", event -> event.v1() && mainTask.getTaskId().getNodeId().equals(event.v2().getTaskId().getNodeId()) && description.equals(event.v2().getDescription())); // There should be only one parent task assertEquals(1, sTasksOnRequestingNode.size()); assertParentTask(Collections.singletonList(taskInfo), sTasksOnRequestingNode.get(0)); } } // we will have as many [s][p] and [s][r] tasks as we have primary and replica shards assertEquals(numberOfShards.totalNumShards, numberOfEvents(RefreshAction.NAME + "[s][*]", Tuple::v1)); // we the [s][p] and [s][r] tasks should have a corresponding [s] task on the same node as a parent List<TaskInfo> spEvents = findEvents(RefreshAction.NAME + "[s][*]", Tuple::v1); for (TaskInfo taskInfo : spEvents) { List<TaskInfo> sTask; if (taskInfo.getAction().endsWith("[s][p]")) { // A [s][p] level task should have a corresponding [s] level task on the same node sTask = findEvents(RefreshAction.NAME + "[s]", event -> event.v1() && taskInfo.getTaskId().getNodeId().equals(event.v2().getTaskId().getNodeId()) && taskInfo.getDescription().equals(event.v2().getDescription())); } else { // A [s][r] level task should have a corresponding [s] level task on the a different node (where primary is located) sTask = findEvents(RefreshAction.NAME + "[s]", event -> event.v1() && taskInfo.getParentTaskId().getNodeId().equals(event.v2().getTaskId().getNodeId()) && taskInfo .getDescription() .equals(event.v2().getDescription())); } // There should be only one parent task assertEquals(1, sTask.size()); assertParentTask(Collections.singletonList(taskInfo), sTask.get(0)); } } public void testTransportBulkTasks() { registerTaskManageListeners(BulkAction.NAME); // main task registerTaskManageListeners(BulkAction.NAME + "[s]"); // shard task registerTaskManageListeners(BulkAction.NAME + "[s][p]"); // shard task on primary registerTaskManageListeners(BulkAction.NAME + "[s][r]"); // shard task on replica createIndex("test"); ensureGreen("test"); // Make sure all shards are allocated to catch replication tasks client().prepareBulk().add(client().prepareIndex("test", "doc", "test_id") .setSource("{\"foo\": \"bar\"}", XContentType.JSON)).get(); // the bulk operation should produce one main task List<TaskInfo> topTask = findEvents(BulkAction.NAME, Tuple::v1); assertEquals(1, topTask.size()); assertEquals("requests[1], indices[test]", topTask.get(0).getDescription()); // we should also get 1 or 2 [s] operation with main operation as a parent // in case the primary is located on the coordinating node we will have 1 operation, otherwise - 2 List<TaskInfo> shardTasks = findEvents(BulkAction.NAME + "[s]", Tuple::v1); assertThat(shardTasks.size(), allOf(lessThanOrEqualTo(2), greaterThanOrEqualTo(1))); // Select the effective shard task TaskInfo shardTask; if (shardTasks.size() == 1) { // we have only one task - it's going to be the parent task for all [s][p] and [s][r] tasks shardTask = shardTasks.get(0); // and it should have the main task as a parent assertParentTask(shardTask, findEvents(BulkAction.NAME, Tuple::v1).get(0)); assertEquals("requests[1], index[test]", shardTask.getDescription()); } else { if (shardTasks.get(0).getParentTaskId().equals(shardTasks.get(1).getTaskId())) { // task 1 is the parent of task 0, that means that task 0 will control [s][p] and [s][r] tasks shardTask = shardTasks.get(0); // in turn the parent of the task 1 should be the main task assertParentTask(shardTasks.get(1), findEvents(BulkAction.NAME, Tuple::v1).get(0)); assertEquals("requests[1], index[test]", shardTask.getDescription()); } else { // otherwise task 1 will control [s][p] and [s][r] tasks shardTask = shardTasks.get(1); // in turn the parent of the task 0 should be the main task assertParentTask(shardTasks.get(0), findEvents(BulkAction.NAME, Tuple::v1).get(0)); assertEquals("requests[1], index[test]", shardTask.getDescription()); } } // we should also get one [s][p] operation with shard operation as a parent assertEquals(1, numberOfEvents(BulkAction.NAME + "[s][p]", Tuple::v1)); assertParentTask(findEvents(BulkAction.NAME + "[s][p]", Tuple::v1), shardTask); // we should get as many [s][r] operations as we have replica shards // they all should have the same shard task as a parent assertEquals(getNumShards("test").numReplicas, numberOfEvents(BulkAction.NAME + "[s][r]", Tuple::v1)); assertParentTask(findEvents(BulkAction.NAME + "[s][r]", Tuple::v1), shardTask); } public void testSearchTaskDescriptions() { registerTaskManageListeners(SearchAction.NAME); // main task registerTaskManageListeners(SearchAction.NAME + "[*]"); // shard task createIndex("test"); ensureGreen("test"); // Make sure all shards are allocated to catch replication tasks client().prepareIndex("test", "doc", "test_id").setSource("{\"foo\": \"bar\"}", XContentType.JSON) .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); assertSearchResponse(client().prepareSearch("test").setTypes("doc").setQuery(QueryBuilders.matchAllQuery()).get()); // the search operation should produce one main task List<TaskInfo> mainTask = findEvents(SearchAction.NAME, Tuple::v1); assertEquals(1, mainTask.size()); assertThat(mainTask.get(0).getDescription(), startsWith("indices[test], types[doc], search_type[")); assertThat(mainTask.get(0).getDescription(), containsString("\"query\":{\"match_all\"")); // check that if we have any shard-level requests they all have non-zero length description List<TaskInfo> shardTasks = findEvents(SearchAction.NAME + "[*]", Tuple::v1); for (TaskInfo taskInfo : shardTasks) { assertThat(taskInfo.getParentTaskId(), notNullValue()); assertEquals(mainTask.get(0).getTaskId(), taskInfo.getParentTaskId()); switch (taskInfo.getAction()) { case SearchTransportService.QUERY_ACTION_NAME: case SearchTransportService.DFS_ACTION_NAME: assertTrue(taskInfo.getDescription(), Regex.simpleMatch("shardId[[test][*]]", taskInfo.getDescription())); break; case SearchTransportService.QUERY_ID_ACTION_NAME: assertTrue(taskInfo.getDescription(), Regex.simpleMatch("id[*], indices[test]", taskInfo.getDescription())); break; case SearchTransportService.FETCH_ID_ACTION_NAME: assertTrue(taskInfo.getDescription(), Regex.simpleMatch("id[*], size[1], lastEmittedDoc[null]", taskInfo.getDescription())); break; default: fail("Unexpected action [" + taskInfo.getAction() + "] with description [" + taskInfo.getDescription() + "]"); } // assert that all task descriptions have non-zero length assertThat(taskInfo.getDescription().length(), greaterThan(0)); } } /** * Very basic "is it plugged in" style test that indexes a document and makes sure that you can fetch the status of the process. The * goal here is to verify that the large moving parts that make fetching task status work fit together rather than to verify any * particular status results from indexing. For that, look at {@link TransportReplicationActionTests}. We intentionally don't use the * task recording mechanism used in other places in this test so we can make sure that the status fetching works properly over the wire. */ public void testCanFetchIndexStatus() throws Exception { // First latch waits for the task to start, second on blocks it from finishing. CountDownLatch taskRegistered = new CountDownLatch(1); CountDownLatch letTaskFinish = new CountDownLatch(1); Thread index = null; try { for (TransportService transportService : internalCluster().getInstances(TransportService.class)) { ((MockTaskManager) transportService.getTaskManager()).addListener(new MockTaskManagerListener() { @Override public void onTaskRegistered(Task task) { if (task.getAction().startsWith(IndexAction.NAME)) { taskRegistered.countDown(); logger.debug("Blocking [{}] starting", task); try { assertTrue(letTaskFinish.await(10, TimeUnit.SECONDS)); } catch (InterruptedException e) { throw new RuntimeException(e); } } } @Override public void onTaskUnregistered(Task task) { } @Override public void waitForTaskCompletion(Task task) { } }); } // Need to run the task in a separate thread because node client's .execute() is blocked by our task listener index = new Thread(() -> { IndexResponse indexResponse = client().prepareIndex("test", "test").setSource("test", "test").get(); assertArrayEquals(ReplicationResponse.EMPTY, indexResponse.getShardInfo().getFailures()); }); index.start(); assertTrue(taskRegistered.await(10, TimeUnit.SECONDS)); // waiting for at least one task to be registered ListTasksResponse listResponse = client().admin().cluster().prepareListTasks().setActions("indices:data/write/index*") .setDetailed(true).get(); assertThat(listResponse.getTasks(), not(empty())); for (TaskInfo task : listResponse.getTasks()) { assertNotNull(task.getStatus()); GetTaskResponse getResponse = client().admin().cluster().prepareGetTask(task.getTaskId()).get(); assertFalse("task should still be running", getResponse.getTask().isCompleted()); TaskInfo fetchedWithGet = getResponse.getTask().getTask(); assertEquals(task.getId(), fetchedWithGet.getId()); assertEquals(task.getType(), fetchedWithGet.getType()); assertEquals(task.getAction(), fetchedWithGet.getAction()); assertEquals(task.getDescription(), fetchedWithGet.getDescription()); assertEquals(task.getStatus(), fetchedWithGet.getStatus()); assertEquals(task.getStartTime(), fetchedWithGet.getStartTime()); assertThat(fetchedWithGet.getRunningTimeNanos(), greaterThanOrEqualTo(task.getRunningTimeNanos())); assertEquals(task.isCancellable(), fetchedWithGet.isCancellable()); assertEquals(task.getParentTaskId(), fetchedWithGet.getParentTaskId()); } } finally { letTaskFinish.countDown(); if (index != null) { index.join(); } assertBusy(() -> { assertEquals(emptyList(), client().admin().cluster().prepareListTasks().setActions("indices:data/write/index*").get().getTasks()); }); } } public void testTasksCancellation() throws Exception { // Start blocking test task // Get real client (the plugin is not registered on transport nodes) ActionFuture<TestTaskPlugin.NodesResponse> future = TestTaskPlugin.TestTaskAction.INSTANCE.newRequestBuilder(client()).execute(); logger.info("--> started test tasks"); // Wait for the task to start on all nodes assertBusy(() -> assertEquals(internalCluster().size(), client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME + "[n]").get().getTasks().size())); logger.info("--> cancelling the main test task"); CancelTasksResponse cancelTasksResponse = client().admin().cluster().prepareCancelTasks() .setActions(TestTaskPlugin.TestTaskAction.NAME).get(); assertEquals(1, cancelTasksResponse.getTasks().size()); future.get(); logger.info("--> checking that test tasks are not running"); assertEquals(0, client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME + "*").get().getTasks().size()); } public void testTasksUnblocking() throws Exception { // Start blocking test task ActionFuture<TestTaskPlugin.NodesResponse> future = TestTaskPlugin.TestTaskAction.INSTANCE.newRequestBuilder(client()).execute(); // Wait for the task to start on all nodes assertBusy(() -> assertEquals(internalCluster().size(), client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME + "[n]").get().getTasks().size())); TestTaskPlugin.UnblockTestTasksAction.INSTANCE.newRequestBuilder(client()).get(); future.get(); assertEquals(0, client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME + "[n]").get() .getTasks().size()); } public void testListTasksWaitForCompletion() throws Exception { waitForCompletionTestCase(randomBoolean(), id -> client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME) .setWaitForCompletion(true).execute(), response -> { assertThat(response.getNodeFailures(), empty()); assertThat(response.getTaskFailures(), empty()); assertThat(response.getTasks(), hasSize(1)); TaskInfo task = response.getTasks().get(0); assertEquals(TestTaskPlugin.TestTaskAction.NAME, task.getAction()); } ); } public void testGetTaskWaitForCompletionWithoutStoringResult() throws Exception { waitForCompletionTestCase(false, id -> client().admin().cluster().prepareGetTask(id).setWaitForCompletion(true).execute(), response -> { assertTrue(response.getTask().isCompleted()); //We didn't store the result so it won't come back when we wait assertNull(response.getTask().getResponse()); //But the task's details should still be there because we grabbed a reference to the task before waiting for it to complete assertNotNull(response.getTask().getTask()); assertEquals(TestTaskPlugin.TestTaskAction.NAME, response.getTask().getTask().getAction()); } ); } public void testGetTaskWaitForCompletionWithStoringResult() throws Exception { waitForCompletionTestCase(true, id -> client().admin().cluster().prepareGetTask(id).setWaitForCompletion(true).execute(), response -> { assertTrue(response.getTask().isCompleted()); // We stored the task so we should get its results assertEquals(0, response.getTask().getResponseAsMap().get("failure_count")); // The task's details should also be there assertNotNull(response.getTask().getTask()); assertEquals(TestTaskPlugin.TestTaskAction.NAME, response.getTask().getTask().getAction()); } ); } /** * Test wait for completion. * @param storeResult should the task store its results * @param wait start waiting for a task. Accepts that id of the task to wait for and returns a future waiting for it. * @param validator validate the response and return the task ids that were found */ private <T> void waitForCompletionTestCase(boolean storeResult, Function<TaskId, ActionFuture<T>> wait, Consumer<T> validator) throws Exception { // Start blocking test task ActionFuture<TestTaskPlugin.NodesResponse> future = TestTaskPlugin.TestTaskAction.INSTANCE.newRequestBuilder(client()) .setShouldStoreResult(storeResult).execute(); ActionFuture<T> waitResponseFuture; TaskId taskId; try { taskId = waitForTestTaskStartOnAllNodes(); // Wait for the task to start assertBusy(() -> client().admin().cluster().prepareGetTask(taskId).get()); // Register listeners so we can be sure the waiting started CountDownLatch waitForWaitingToStart = new CountDownLatch(1); for (TransportService transportService : internalCluster().getInstances(TransportService.class)) { ((MockTaskManager) transportService.getTaskManager()).addListener(new MockTaskManagerListener() { @Override public void waitForTaskCompletion(Task task) { waitForWaitingToStart.countDown(); } @Override public void onTaskRegistered(Task task) { } @Override public void onTaskUnregistered(Task task) { } }); } // Spin up a request to wait for the test task to finish waitResponseFuture = wait.apply(taskId); /* Wait for the wait to start. This should count down just *before* we wait for completion but after the list/get has got a * reference to the running task. Because we unblock immediately after this the task may no longer be running for us to wait * on which is fine. */ waitForWaitingToStart.await(); } finally { // Unblock the request so the wait for completion request can finish TestTaskPlugin.UnblockTestTasksAction.INSTANCE.newRequestBuilder(client()).get(); } // Now that the task is unblocked the list response will come back T waitResponse = waitResponseFuture.get(); validator.accept(waitResponse); TestTaskPlugin.NodesResponse response = future.get(); assertEquals(emptyList(), response.failures()); } public void testListTasksWaitForTimeout() throws Exception { waitForTimeoutTestCase(id -> { ListTasksResponse response = client().admin().cluster().prepareListTasks() .setActions(TestTaskPlugin.TestTaskAction.NAME).setWaitForCompletion(true).setTimeout(timeValueMillis(100)) .get(); assertThat(response.getNodeFailures(), not(empty())); return response.getNodeFailures(); }); } public void testGetTaskWaitForTimeout() throws Exception { waitForTimeoutTestCase(id -> { Exception e = expectThrows(Exception.class, () -> client().admin().cluster().prepareGetTask(id).setWaitForCompletion(true).setTimeout(timeValueMillis(100)).get()); return singleton(e); }); } /** * Test waiting for a task that times out. * @param wait wait for the running task and return all the failures you accumulated waiting for it */ private void waitForTimeoutTestCase(Function<TaskId, ? extends Iterable<? extends Throwable>> wait) throws Exception { // Start blocking test task ActionFuture<TestTaskPlugin.NodesResponse> future = TestTaskPlugin.TestTaskAction.INSTANCE.newRequestBuilder(client()).execute(); try { TaskId taskId = waitForTestTaskStartOnAllNodes(); // Wait for the task to start assertBusy(() -> client().admin().cluster().prepareGetTask(taskId).get()); // Spin up a request that should wait for those tasks to finish // It will timeout because we haven't unblocked the tasks Iterable<? extends Throwable> failures = wait.apply(taskId); for (Throwable failure : failures) { assertNotNull( ExceptionsHelper.unwrap(failure, ElasticsearchTimeoutException.class, ReceiveTimeoutTransportException.class)); } } finally { // Now we can unblock those requests TestTaskPlugin.UnblockTestTasksAction.INSTANCE.newRequestBuilder(client()).get(); } future.get(); } /** * Wait for the test task to be running on all nodes and return the TaskId of the primary task. */ private TaskId waitForTestTaskStartOnAllNodes() throws Exception { assertBusy(() -> { List<TaskInfo> tasks = client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME + "[n]") .get().getTasks(); assertEquals(internalCluster().size(), tasks.size()); }); List<TaskInfo> task = client().admin().cluster().prepareListTasks().setActions(TestTaskPlugin.TestTaskAction.NAME).get().getTasks(); assertThat(task, hasSize(1)); return task.get(0).getTaskId(); } public void testTasksListWaitForNoTask() throws Exception { // Spin up a request to wait for no matching tasks ActionFuture<ListTasksResponse> waitResponseFuture = client().admin().cluster().prepareListTasks() .setActions(TestTaskPlugin.TestTaskAction.NAME + "[n]").setWaitForCompletion(true).setTimeout(timeValueMillis(10)) .execute(); // It should finish quickly and without complaint assertThat(waitResponseFuture.get().getTasks(), empty()); } public void testTasksGetWaitForNoTask() throws Exception { // Spin up a request to wait for no matching tasks ActionFuture<GetTaskResponse> waitResponseFuture = client().admin().cluster().prepareGetTask("notfound:1") .setWaitForCompletion(true).setTimeout(timeValueMillis(10)) .execute(); // It should finish quickly and without complaint expectNotFound(waitResponseFuture::get); } public void testTasksWaitForAllTask() throws Exception { // Spin up a request to wait for all tasks in the cluster to make sure it doesn't cause an infinite loop ListTasksResponse response = client().admin().cluster().prepareListTasks().setWaitForCompletion(true) .setTimeout(timeValueSeconds(10)).get(); // It should finish quickly and without complaint and list the list tasks themselves assertThat(response.getNodeFailures(), emptyCollectionOf(FailedNodeException.class)); assertThat(response.getTaskFailures(), emptyCollectionOf(TaskOperationFailure.class)); assertThat(response.getTasks().size(), greaterThanOrEqualTo(1)); } public void testTaskStoringSuccesfulResult() throws Exception { // Randomly create an empty index to make sure the type is created automatically if (randomBoolean()) { logger.info("creating an empty results index with custom settings"); assertAcked(client().admin().indices().prepareCreate(TaskResultsService.TASK_INDEX)); } registerTaskManageListeners(TestTaskPlugin.TestTaskAction.NAME); // we need this to get task id of the process // Start non-blocking test task TestTaskPlugin.TestTaskAction.INSTANCE.newRequestBuilder(client()).setShouldStoreResult(true).setShouldBlock(false).get(); List<TaskInfo> events = findEvents(TestTaskPlugin.TestTaskAction.NAME, Tuple::v1); assertEquals(1, events.size()); TaskInfo taskInfo = events.get(0); TaskId taskId = taskInfo.getTaskId(); GetResponse resultDoc = client() .prepareGet(TaskResultsService.TASK_INDEX, TaskResultsService.TASK_TYPE, taskId.toString()).get(); assertTrue(resultDoc.isExists()); Map<String, Object> source = resultDoc.getSource(); @SuppressWarnings("unchecked") Map<String, Object> task = (Map<String, Object>) source.get("task"); assertEquals(taskInfo.getTaskId().getNodeId(), task.get("node")); assertEquals(taskInfo.getAction(), task.get("action")); assertEquals(Long.toString(taskInfo.getId()), task.get("id").toString()); @SuppressWarnings("unchecked") Map<String, Object> result = (Map<String, Object>) source.get("response"); assertEquals("0", result.get("failure_count").toString()); assertNull(source.get("failure")); assertNoFailures(client().admin().indices().prepareRefresh(TaskResultsService.TASK_INDEX).get()); SearchResponse searchResponse = client().prepareSearch(TaskResultsService.TASK_INDEX) .setTypes(TaskResultsService.TASK_TYPE) .setSource(SearchSourceBuilder.searchSource().query(QueryBuilders.termQuery("task.action", taskInfo.getAction()))) .get(); assertEquals(1L, searchResponse.getHits().getTotalHits()); searchResponse = client().prepareSearch(TaskResultsService.TASK_INDEX).setTypes(TaskResultsService.TASK_TYPE) .setSource(SearchSourceBuilder.searchSource().query(QueryBuilders.termQuery("task.node", taskInfo.getTaskId().getNodeId()))) .get(); assertEquals(1L, searchResponse.getHits().getTotalHits()); GetTaskResponse getResponse = expectFinishedTask(taskId); assertEquals(result, getResponse.getTask().getResponseAsMap()); assertNull(getResponse.getTask().getError()); } public void testTaskStoringFailureResult() throws Exception { registerTaskManageListeners(TestTaskPlugin.TestTaskAction.NAME); // we need this to get task id of the process // Start non-blocking test task that should fail assertThrows( TestTaskPlugin.TestTaskAction.INSTANCE.newRequestBuilder(client()) .setShouldFail(true) .setShouldStoreResult(true) .setShouldBlock(false), IllegalStateException.class ); List<TaskInfo> events = findEvents(TestTaskPlugin.TestTaskAction.NAME, Tuple::v1); assertEquals(1, events.size()); TaskInfo failedTaskInfo = events.get(0); TaskId failedTaskId = failedTaskInfo.getTaskId(); GetResponse failedResultDoc = client() .prepareGet(TaskResultsService.TASK_INDEX, TaskResultsService.TASK_TYPE, failedTaskId.toString()) .get(); assertTrue(failedResultDoc.isExists()); Map<String, Object> source = failedResultDoc.getSource(); @SuppressWarnings("unchecked") Map<String, Object> task = (Map<String, Object>) source.get("task"); assertEquals(failedTaskInfo.getTaskId().getNodeId(), task.get("node")); assertEquals(failedTaskInfo.getAction(), task.get("action")); assertEquals(Long.toString(failedTaskInfo.getId()), task.get("id").toString()); @SuppressWarnings("unchecked") Map<String, Object> error = (Map<String, Object>) source.get("error"); assertEquals("Simulating operation failure", error.get("reason")); assertEquals("illegal_state_exception", error.get("type")); assertNull(source.get("result")); GetTaskResponse getResponse = expectFinishedTask(failedTaskId); assertNull(getResponse.getTask().getResponse()); assertEquals(error, getResponse.getTask().getErrorAsMap()); } public void testGetTaskNotFound() throws Exception { // Node isn't found, tasks index doesn't even exist expectNotFound(() -> client().admin().cluster().prepareGetTask("not_a_node:1").get()); // Node exists but the task still isn't found expectNotFound(() -> client().admin().cluster().prepareGetTask(new TaskId(internalCluster().getNodeNames()[0], 1)).get()); } public void testNodeNotFoundButTaskFound() throws Exception { // Save a fake task that looks like it is from a node that isn't part of the cluster CyclicBarrier b = new CyclicBarrier(2); TaskResultsService resultsService = internalCluster().getInstance(TaskResultsService.class); resultsService.storeResult( new TaskResult(new TaskInfo(new TaskId("fake", 1), "test", "test", "", null, 0, 0, false, TaskId.EMPTY_TASK_ID), new RuntimeException("test")), new ActionListener<Void>() { @Override public void onResponse(Void response) { try { b.await(); } catch (InterruptedException | BrokenBarrierException e) { onFailure(e); } } @Override public void onFailure(Exception e) { throw new RuntimeException(e); } }); b.await(); // Now we can find it! GetTaskResponse response = expectFinishedTask(new TaskId("fake:1")); assertEquals("test", response.getTask().getTask().getAction()); assertNotNull(response.getTask().getError()); assertNull(response.getTask().getResponse()); } @Override public void tearDown() throws Exception { for (Map.Entry<Tuple<String, String>, RecordingTaskManagerListener> entry : listeners.entrySet()) { ((MockTaskManager) internalCluster().getInstance(TransportService.class, entry.getKey().v1()).getTaskManager()) .removeListener(entry.getValue()); } listeners.clear(); super.tearDown(); } /** * Registers recording task event listeners with the given action mask on all nodes */ private void registerTaskManageListeners(String actionMasks) { for (String nodeName : internalCluster().getNodeNames()) { DiscoveryNode node = internalCluster().getInstance(ClusterService.class, nodeName).localNode(); RecordingTaskManagerListener listener = new RecordingTaskManagerListener(node.getId(), actionMasks.split(",")); ((MockTaskManager) internalCluster().getInstance(TransportService.class, nodeName).getTaskManager()).addListener(listener); RecordingTaskManagerListener oldListener = listeners.put(new Tuple<>(node.getName(), actionMasks), listener); assertNull(oldListener); } } /** * Resets all recording task event listeners with the given action mask on all nodes */ private void resetTaskManageListeners(String actionMasks) { for (Map.Entry<Tuple<String, String>, RecordingTaskManagerListener> entry : listeners.entrySet()) { if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { entry.getValue().reset(); } } } /** * Returns the number of events that satisfy the criteria across all nodes * * @param actionMasks action masks to match * @return number of events that satisfy the criteria */ private int numberOfEvents(String actionMasks, Function<Tuple<Boolean, TaskInfo>, Boolean> criteria) { return findEvents(actionMasks, criteria).size(); } /** * Returns all events that satisfy the criteria across all nodes * * @param actionMasks action masks to match * @return number of events that satisfy the criteria */ private List<TaskInfo> findEvents(String actionMasks, Function<Tuple<Boolean, TaskInfo>, Boolean> criteria) { List<TaskInfo> events = new ArrayList<>(); for (Map.Entry<Tuple<String, String>, RecordingTaskManagerListener> entry : listeners.entrySet()) { if (actionMasks == null || entry.getKey().v2().equals(actionMasks)) { for (Tuple<Boolean, TaskInfo> taskEvent : entry.getValue().getEvents()) { if (criteria.apply(taskEvent)) { events.add(taskEvent.v2()); } } } } return events; } /** * Asserts that all tasks in the tasks list have the same parentTask */ private void assertParentTask(List<TaskInfo> tasks, TaskInfo parentTask) { for (TaskInfo task : tasks) { assertParentTask(task, parentTask); } } private void assertParentTask(TaskInfo task, TaskInfo parentTask) { assertTrue(task.getParentTaskId().isSet()); assertEquals(parentTask.getTaskId().getNodeId(), task.getParentTaskId().getNodeId()); assertTrue(Strings.hasLength(task.getParentTaskId().getNodeId())); assertEquals(parentTask.getId(), task.getParentTaskId().getId()); } private ResourceNotFoundException expectNotFound(ThrowingRunnable r) { Exception e = expectThrows(Exception.class, r); ResourceNotFoundException notFound = (ResourceNotFoundException) ExceptionsHelper.unwrap(e, ResourceNotFoundException.class); if (notFound == null) throw new RuntimeException("Expected ResourceNotFoundException", e); return notFound; } /** * Fetch the task status from the list tasks API using it's "fallback to get from the task index" behavior. Asserts some obvious stuff * about the fetched task and returns a map of it's status. */ private GetTaskResponse expectFinishedTask(TaskId taskId) throws IOException { GetTaskResponse response = client().admin().cluster().prepareGetTask(taskId).get(); assertTrue("the task should have been completed before fetching", response.getTask().isCompleted()); TaskInfo info = response.getTask().getTask(); assertEquals(taskId, info.getTaskId()); assertNull(info.getStatus()); // The test task doesn't have any status return response; } }