/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.cluster.routing; import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.ESAllocationTestCase; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.junit.After; import org.junit.Before; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import static java.util.Collections.singleton; import static org.elasticsearch.cluster.routing.DelayedAllocationService.CLUSTER_UPDATE_TASK_SOURCE; import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; import static org.elasticsearch.common.unit.TimeValue.timeValueMillis; import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; import static org.hamcrest.Matchers.equalTo; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; public class DelayedAllocationServiceTests extends ESAllocationTestCase { private TestDelayAllocationService delayedAllocationService; private MockAllocationService allocationService; private ClusterService clusterService; private ThreadPool threadPool; @Before public void createDelayedAllocationService() { threadPool = new TestThreadPool(getTestName()); clusterService = mock(ClusterService.class); allocationService = createAllocationService(Settings.EMPTY, new DelayedShardsMockGatewayAllocator()); delayedAllocationService = new TestDelayAllocationService(Settings.EMPTY, threadPool, clusterService, allocationService); verify(clusterService).addListener(delayedAllocationService); } @After public void shutdownThreadPool() throws Exception { terminate(threadPool); } public void testNoDelayedUnassigned() throws Exception { MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT) .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "0")) .numberOfShards(1).numberOfReplicas(1)) .build(); ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) .metaData(metaData) .routingTable(RoutingTable.builder().addAsNew(metaData.index("test")).build()).build(); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).localNodeId("node1").masterNodeId("node1")) .build(); clusterState = allocationService.reroute(clusterState, "reroute"); // starting primaries clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); // starting replicas clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(false)); ClusterState prevState = clusterState; // remove node2 and reroute DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(clusterState.nodes()).remove("node2"); boolean nodeAvailableForAllocation = randomBoolean(); if (nodeAvailableForAllocation) { nodes.add(newNode("node3")); } clusterState = ClusterState.builder(clusterState).nodes(nodes).build(); clusterState = allocationService.deassociateDeadNodes(clusterState, true, "reroute"); ClusterState newState = clusterState; List<ShardRouting> unassignedShards = newState.getRoutingTable().shardsWithState(ShardRoutingState.UNASSIGNED); if (nodeAvailableForAllocation) { assertThat(unassignedShards.size(), equalTo(0)); } else { assertThat(unassignedShards.size(), equalTo(1)); assertThat(unassignedShards.get(0).unassignedInfo().isDelayed(), equalTo(false)); } delayedAllocationService.clusterChanged(new ClusterChangedEvent("test", newState, prevState)); verifyNoMoreInteractions(clusterService); assertNull(delayedAllocationService.delayedRerouteTask.get()); } public void testDelayedUnassignedScheduleReroute() throws Exception { TimeValue delaySetting = timeValueMillis(100); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT) .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delaySetting)) .numberOfShards(1).numberOfReplicas(1)) .build(); ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) .metaData(metaData) .routingTable(RoutingTable.builder().addAsNew(metaData.index("test")).build()).build(); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).localNodeId("node1").masterNodeId("node1")) .build(); final long baseTimestampNanos = System.nanoTime(); allocationService.setNanoTimeOverride(baseTimestampNanos); clusterState = allocationService.reroute(clusterState, "reroute"); // starting primaries clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); // starting replicas clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); assertFalse("no shards should be unassigned", clusterState.getRoutingNodes().unassigned().size() > 0); String nodeId = null; final List<ShardRouting> allShards = clusterState.getRoutingTable().allShards("test"); // we need to find the node with the replica otherwise we will not reroute for (ShardRouting shardRouting : allShards) { if (shardRouting.primary() == false) { nodeId = shardRouting.currentNodeId(); break; } } assertNotNull(nodeId); // remove node that has replica and reroute clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove(nodeId)).build(); clusterState = allocationService.deassociateDeadNodes(clusterState, true, "reroute"); ClusterState stateWithDelayedShard = clusterState; // make sure the replica is marked as delayed (i.e. not reallocated) assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShard)); ShardRouting delayedShard = stateWithDelayedShard.getRoutingNodes().unassigned().iterator().next(); assertEquals(baseTimestampNanos, delayedShard.unassignedInfo().getUnassignedTimeInNanos()); // mock ClusterService.submitStateUpdateTask() method CountDownLatch latch = new CountDownLatch(1); AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask = new AtomicReference<>(); doAnswer(invocationOnMock -> { clusterStateUpdateTask.set((ClusterStateUpdateTask)invocationOnMock.getArguments()[1]); latch.countDown(); return null; }).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class)); assertNull(delayedAllocationService.delayedRerouteTask.get()); long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int)delaySetting.nanos() - 1)).nanos(); long clusterChangeEventTimestampNanos = baseTimestampNanos + delayUntilClusterChangeEvent; delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos); delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithDelayedShard, clusterState)); // check that delayed reroute task was created and registered with the proper settings DelayedAllocationService.DelayedRerouteTask delayedRerouteTask = delayedAllocationService.delayedRerouteTask.get(); assertNotNull(delayedRerouteTask); assertFalse(delayedRerouteTask.cancelScheduling.get()); assertThat(delayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos)); assertThat(delayedRerouteTask.nextDelay.nanos(), equalTo(delaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos))); // check that submitStateUpdateTask() was invoked on the cluster service mock assertTrue(latch.await(30, TimeUnit.SECONDS)); verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask.get())); // advance the time on the allocation service to a timestamp that happened after the delayed scheduling long nanoTimeForReroute = clusterChangeEventTimestampNanos + delaySetting.nanos() + timeValueMillis(randomInt(200)).nanos(); allocationService.setNanoTimeOverride(nanoTimeForReroute); // apply cluster state ClusterState stateWithRemovedDelay = clusterStateUpdateTask.get().execute(stateWithDelayedShard); // check that shard is not delayed anymore assertEquals(0, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithRemovedDelay)); // check that task is now removed assertNull(delayedAllocationService.delayedRerouteTask.get()); // simulate calling listener (cluster change event) delayedAllocationService.setNanoTimeOverride(nanoTimeForReroute + timeValueMillis(randomInt(200)).nanos()); delayedAllocationService.clusterChanged( new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithRemovedDelay, stateWithDelayedShard)); // check that no new task is scheduled assertNull(delayedAllocationService.delayedRerouteTask.get()); // check that no further cluster state update was submitted verifyNoMoreInteractions(clusterService); } /** * This tests that a new delayed reroute is scheduled right after a delayed reroute was run */ public void testDelayedUnassignedScheduleRerouteAfterDelayedReroute() throws Exception { TimeValue shortDelaySetting = timeValueMillis(100); TimeValue longDelaySetting = TimeValue.timeValueSeconds(1); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("short_delay") .settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shortDelaySetting)) .numberOfShards(1).numberOfReplicas(1)) .put(IndexMetaData.builder("long_delay") .settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), longDelaySetting)) .numberOfShards(1).numberOfReplicas(1)) .build(); ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData) .routingTable(RoutingTable.builder().addAsNew(metaData.index("short_delay")).addAsNew(metaData.index("long_delay")).build()) .nodes(DiscoveryNodes.builder() .add(newNode("node0", singleton(DiscoveryNode.Role.MASTER))).localNodeId("node0").masterNodeId("node0") .add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4"))).build(); // allocate shards clusterState = allocationService.reroute(clusterState, "reroute"); // start primaries clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); // start replicas clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); assertThat("all shards should be started", clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(4)); // find replica of short_delay ShardRouting shortDelayReplica = null; for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("short_delay")) { if (shardRouting.primary() == false) { shortDelayReplica = shardRouting; break; } } assertNotNull(shortDelayReplica); // find replica of long_delay ShardRouting longDelayReplica = null; for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("long_delay")) { if (shardRouting.primary() == false) { longDelayReplica = shardRouting; break; } } assertNotNull(longDelayReplica); final long baseTimestampNanos = System.nanoTime(); // remove node of shortDelayReplica and node of longDelayReplica and reroute ClusterState clusterStateBeforeNodeLeft = clusterState; clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()) .remove(shortDelayReplica.currentNodeId()) .remove(longDelayReplica.currentNodeId())) .build(); // make sure both replicas are marked as delayed (i.e. not reallocated) allocationService.setNanoTimeOverride(baseTimestampNanos); clusterState = allocationService.deassociateDeadNodes(clusterState, true, "reroute"); final ClusterState stateWithDelayedShards = clusterState; assertEquals(2, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShards)); RoutingNodes.UnassignedShards.UnassignedIterator iter = stateWithDelayedShards.getRoutingNodes().unassigned().iterator(); assertEquals(baseTimestampNanos, iter.next().unassignedInfo().getUnassignedTimeInNanos()); assertEquals(baseTimestampNanos, iter.next().unassignedInfo().getUnassignedTimeInNanos()); // mock ClusterService.submitStateUpdateTask() method CountDownLatch latch1 = new CountDownLatch(1); AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask1 = new AtomicReference<>(); doAnswer(invocationOnMock -> { clusterStateUpdateTask1.set((ClusterStateUpdateTask)invocationOnMock.getArguments()[1]); latch1.countDown(); return null; }).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class)); assertNull(delayedAllocationService.delayedRerouteTask.get()); long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int)shortDelaySetting.nanos() - 1)).nanos(); long clusterChangeEventTimestampNanos = baseTimestampNanos + delayUntilClusterChangeEvent; delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos); delayedAllocationService.clusterChanged( new ClusterChangedEvent("fake node left", stateWithDelayedShards, clusterStateBeforeNodeLeft)); // check that delayed reroute task was created and registered with the proper settings DelayedAllocationService.DelayedRerouteTask firstDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get(); assertNotNull(firstDelayedRerouteTask); assertFalse(firstDelayedRerouteTask.cancelScheduling.get()); assertThat(firstDelayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos)); assertThat(firstDelayedRerouteTask.nextDelay.nanos(), equalTo(UnassignedInfo.findNextDelayedAllocation(clusterChangeEventTimestampNanos, stateWithDelayedShards))); assertThat(firstDelayedRerouteTask.nextDelay.nanos(), equalTo(shortDelaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos))); // check that submitStateUpdateTask() was invoked on the cluster service mock assertTrue(latch1.await(30, TimeUnit.SECONDS)); verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask1.get())); // advance the time on the allocation service to a timestamp that happened after the delayed scheduling long nanoTimeForReroute = clusterChangeEventTimestampNanos + shortDelaySetting.nanos() + timeValueMillis(randomInt(50)).nanos(); allocationService.setNanoTimeOverride(nanoTimeForReroute); // apply cluster state ClusterState stateWithOnlyOneDelayedShard = clusterStateUpdateTask1.get().execute(stateWithDelayedShards); // check that shard is not delayed anymore assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithOnlyOneDelayedShard)); // check that task is now removed assertNull(delayedAllocationService.delayedRerouteTask.get()); // mock ClusterService.submitStateUpdateTask() method again CountDownLatch latch2 = new CountDownLatch(1); AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask2 = new AtomicReference<>(); doAnswer(invocationOnMock -> { clusterStateUpdateTask2.set((ClusterStateUpdateTask)invocationOnMock.getArguments()[1]); latch2.countDown(); return null; }).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class)); // simulate calling listener (cluster change event) delayUntilClusterChangeEvent = timeValueMillis(randomInt(50)).nanos(); clusterChangeEventTimestampNanos = nanoTimeForReroute + delayUntilClusterChangeEvent; delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos); delayedAllocationService.clusterChanged( new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithOnlyOneDelayedShard, stateWithDelayedShards)); // check that new delayed reroute task was created and registered with the proper settings DelayedAllocationService.DelayedRerouteTask secondDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get(); assertNotNull(secondDelayedRerouteTask); assertFalse(secondDelayedRerouteTask.cancelScheduling.get()); assertThat(secondDelayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos)); assertThat(secondDelayedRerouteTask.nextDelay.nanos(), equalTo(UnassignedInfo.findNextDelayedAllocation(clusterChangeEventTimestampNanos, stateWithOnlyOneDelayedShard))); assertThat(secondDelayedRerouteTask.nextDelay.nanos(), equalTo(longDelaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos))); // check that submitStateUpdateTask() was invoked on the cluster service mock assertTrue(latch2.await(30, TimeUnit.SECONDS)); verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask2.get())); // advance the time on the allocation service to a timestamp that happened after the delayed scheduling nanoTimeForReroute = clusterChangeEventTimestampNanos + longDelaySetting.nanos() + timeValueMillis(randomInt(50)).nanos(); allocationService.setNanoTimeOverride(nanoTimeForReroute); // apply cluster state ClusterState stateWithNoDelayedShards = clusterStateUpdateTask2.get().execute(stateWithOnlyOneDelayedShard); // check that shard is not delayed anymore assertEquals(0, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithNoDelayedShards)); // check that task is now removed assertNull(delayedAllocationService.delayedRerouteTask.get()); // simulate calling listener (cluster change event) delayedAllocationService.setNanoTimeOverride(nanoTimeForReroute + timeValueMillis(randomInt(50)).nanos()); delayedAllocationService.clusterChanged( new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithNoDelayedShards, stateWithOnlyOneDelayedShard)); // check that no new task is scheduled assertNull(delayedAllocationService.delayedRerouteTask.get()); // check that no further cluster state update was submitted verifyNoMoreInteractions(clusterService); } public void testDelayedUnassignedScheduleRerouteRescheduledOnShorterDelay() throws Exception { TimeValue delaySetting = timeValueSeconds(30); TimeValue shorterDelaySetting = timeValueMillis(100); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("foo").settings(settings(Version.CURRENT) .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delaySetting)) .numberOfShards(1).numberOfReplicas(1)) .put(IndexMetaData.builder("bar").settings(settings(Version.CURRENT) .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shorterDelaySetting)) .numberOfShards(1).numberOfReplicas(1)) .build(); ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) .metaData(metaData) .routingTable(RoutingTable.builder() .addAsNew(metaData.index("foo")) .addAsNew(metaData.index("bar")) .build()).build(); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder() .add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4")) .localNodeId("node1").masterNodeId("node1")) .build(); final long nodeLeftTimestampNanos = System.nanoTime(); allocationService.setNanoTimeOverride(nodeLeftTimestampNanos); clusterState = allocationService.reroute(clusterState, "reroute"); // starting primaries clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); // starting replicas clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); assertFalse("no shards should be unassigned", clusterState.getRoutingNodes().unassigned().size() > 0); String nodeIdOfFooReplica = null; for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("foo")) { if (shardRouting.primary() == false) { nodeIdOfFooReplica = shardRouting.currentNodeId(); break; } } assertNotNull(nodeIdOfFooReplica); // remove node that has replica and reroute clusterState = ClusterState.builder(clusterState).nodes( DiscoveryNodes.builder(clusterState.nodes()).remove(nodeIdOfFooReplica)).build(); clusterState = allocationService.deassociateDeadNodes(clusterState, true, "fake node left"); ClusterState stateWithDelayedShard = clusterState; // make sure the replica is marked as delayed (i.e. not reallocated) assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShard)); ShardRouting delayedShard = stateWithDelayedShard.getRoutingNodes().unassigned().iterator().next(); assertEquals(nodeLeftTimestampNanos, delayedShard.unassignedInfo().getUnassignedTimeInNanos()); assertNull(delayedAllocationService.delayedRerouteTask.get()); long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int)shorterDelaySetting.nanos() - 1)).nanos(); long clusterChangeEventTimestampNanos = nodeLeftTimestampNanos + delayUntilClusterChangeEvent; delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos); delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithDelayedShard, clusterState)); // check that delayed reroute task was created and registered with the proper settings DelayedAllocationService.DelayedRerouteTask delayedRerouteTask = delayedAllocationService.delayedRerouteTask.get(); assertNotNull(delayedRerouteTask); assertFalse(delayedRerouteTask.cancelScheduling.get()); assertThat(delayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos)); assertThat(delayedRerouteTask.nextDelay.nanos(), equalTo(delaySetting.nanos() - (clusterChangeEventTimestampNanos - nodeLeftTimestampNanos))); if (randomBoolean()) { // update settings with shorter delay ClusterState stateWithShorterDelay = ClusterState.builder(stateWithDelayedShard).metaData(MetaData.builder( stateWithDelayedShard.metaData()).updateSettings(Settings.builder().put( UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shorterDelaySetting).build(), "foo")).build(); delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos); delayedAllocationService.clusterChanged( new ClusterChangedEvent("apply shorter delay", stateWithShorterDelay, stateWithDelayedShard)); } else { // node leaves with replica shard of index bar that has shorter delay String nodeIdOfBarReplica = null; for (ShardRouting shardRouting : stateWithDelayedShard.getRoutingTable().allShards("bar")) { if (shardRouting.primary() == false) { nodeIdOfBarReplica = shardRouting.currentNodeId(); break; } } assertNotNull(nodeIdOfBarReplica); // remove node that has replica and reroute clusterState = ClusterState.builder(stateWithDelayedShard).nodes( DiscoveryNodes.builder(stateWithDelayedShard.nodes()).remove(nodeIdOfBarReplica)).build(); ClusterState stateWithShorterDelay = allocationService.deassociateDeadNodes(clusterState, true, "fake node left"); delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos); delayedAllocationService.clusterChanged( new ClusterChangedEvent("fake node left", stateWithShorterDelay, stateWithDelayedShard)); } // check that delayed reroute task was replaced by shorter reroute task DelayedAllocationService.DelayedRerouteTask shorterDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get(); assertNotNull(shorterDelayedRerouteTask); assertNotEquals(shorterDelayedRerouteTask, delayedRerouteTask); assertTrue(delayedRerouteTask.cancelScheduling.get()); // existing task was cancelled assertFalse(shorterDelayedRerouteTask.cancelScheduling.get()); assertThat(delayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos)); assertThat(shorterDelayedRerouteTask.nextDelay.nanos(), equalTo(shorterDelaySetting.nanos() - (clusterChangeEventTimestampNanos - nodeLeftTimestampNanos))); } private static class TestDelayAllocationService extends DelayedAllocationService { private volatile long nanoTimeOverride = -1L; TestDelayAllocationService(Settings settings, ThreadPool threadPool, ClusterService clusterService, AllocationService allocationService) { super(settings, threadPool, clusterService, allocationService); } @Override protected void assertClusterOrMasterStateThread() { // do not check this in the unit tests } public void setNanoTimeOverride(long nanoTime) { this.nanoTimeOverride = nanoTime; } @Override protected long currentNanoTime() { return nanoTimeOverride == -1L ? super.currentNanoTime() : nanoTimeOverride; } } }