/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.cluster.allocation; import org.apache.logging.log4j.Logger; import org.apache.lucene.util.IOUtils; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteResponse; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.allocation.RerouteExplanation; import org.elasticsearch.cluster.routing.allocation.RoutingExplanations; import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Allocation; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.elasticsearch.common.Priority; import org.elasticsearch.common.io.FileSystemUtils; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; import org.elasticsearch.test.InternalTestCluster; import java.nio.file.Path; import java.util.Arrays; import java.util.List; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_BLOCKS_METADATA; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_BLOCKS_READ; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_BLOCKS_WRITE; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_READ_ONLY; import static org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertBlocked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class ClusterRerouteIT extends ESIntegTestCase { private final Logger logger = Loggers.getLogger(ClusterRerouteIT.class); public void testRerouteWithCommands_disableAllocationSettings() throws Exception { Settings commonSettings = Settings.builder() .put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none") .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") .build(); rerouteWithCommands(commonSettings); } public void testRerouteWithCommands_enableAllocationSettings() throws Exception { Settings commonSettings = Settings.builder() .put(CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), Allocation.NONE.name()) .build(); rerouteWithCommands(commonSettings); } private void rerouteWithCommands(Settings commonSettings) throws Exception { List<String> nodesIds = internalCluster().startNodes(2, commonSettings); final String node_1 = nodesIds.get(0); final String node_2 = nodesIds.get(1); logger.info("--> create an index with 1 shard, 1 replica, nothing should allocate"); client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE) .setSettings(Settings.builder().put("index.number_of_shards", 1)) .execute().actionGet(); ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(2)); logger.info("--> explicitly allocate shard 1, *under dry_run*"); state = client().admin().cluster().prepareReroute() .setExplain(randomBoolean()) .add(new AllocateEmptyPrimaryAllocationCommand("test", 0, node_1, true)) .setDryRun(true) .execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING)); logger.info("--> get the state, verify nothing changed because of the dry run"); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(2)); logger.info("--> explicitly allocate shard 1, actually allocating, no dry run"); state = client().admin().cluster().prepareReroute() .setExplain(randomBoolean()) .add(new AllocateEmptyPrimaryAllocationCommand("test", 0, node_1, true)) .execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING)); ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> get the state, verify shard 1 primary allocated"); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.STARTED)); logger.info("--> move shard 1 primary from node1 to node2"); state = client().admin().cluster().prepareReroute() .setExplain(randomBoolean()) .add(new MoveAllocationCommand("test", 0, node_1, node_2)) .execute().actionGet().getState(); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.RELOCATING)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_2).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING)); healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> get the state, verify shard 1 primary moved from node1 to node2"); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_2).getId()).iterator().next().state(), equalTo(ShardRoutingState.STARTED)); } public void testRerouteWithAllocateLocalGateway_disableAllocationSettings() throws Exception { Settings commonSettings = Settings.builder() .put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none") .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") .build(); rerouteWithAllocateLocalGateway(commonSettings); } public void testRerouteWithAllocateLocalGateway_enableAllocationSettings() throws Exception { Settings commonSettings = Settings.builder() .put(CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), Allocation.NONE.name()) .build(); rerouteWithAllocateLocalGateway(commonSettings); } public void testDelayWithALargeAmountOfShards() throws Exception { Settings commonSettings = Settings.builder() .put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1) .put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING.getKey(), 1) .build(); logger.info("--> starting 4 nodes"); String node_1 = internalCluster().startNode(commonSettings); internalCluster().startNode(commonSettings); internalCluster().startNode(commonSettings); internalCluster().startNode(commonSettings); assertThat(cluster().size(), equalTo(4)); ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("4").execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> create indices"); for (int i = 0; i < 25; i++) { client().admin().indices().prepareCreate("test" + i) .setSettings(Settings.builder() .put("index.number_of_shards", 5).put("index.number_of_replicas", 1) .put("index.unassigned.node_left.delayed_timeout", randomIntBetween(250, 1000) + "ms")) .execute().actionGet(); } ensureGreen(TimeValue.timeValueMinutes(1)); logger.info("--> stopping node1"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node_1)); // This might run slowly on older hardware ensureGreen(TimeValue.timeValueMinutes(2)); } private void rerouteWithAllocateLocalGateway(Settings commonSettings) throws Exception { logger.info("--> starting 2 nodes"); String node_1 = internalCluster().startNode(commonSettings); internalCluster().startNode(commonSettings); assertThat(cluster().size(), equalTo(2)); ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("2").execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> create an index with 1 shard, 1 replica, nothing should allocate"); client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE) .setSettings(Settings.builder().put("index.number_of_shards", 1)) .execute().actionGet(); ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(2)); logger.info("--> explicitly allocate shard 1, actually allocating, no dry run"); state = client().admin().cluster().prepareReroute() .setExplain(randomBoolean()) .add(new AllocateEmptyPrimaryAllocationCommand("test", 0, node_1, true)) .execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING)); healthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> get the state, verify shard 1 primary allocated"); state = client().admin().cluster().prepareState().execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.STARTED)); client().prepareIndex("test", "type", "1").setSource("field", "value").setRefreshPolicy(RefreshPolicy.IMMEDIATE).get(); final Index index = resolveIndex("test"); logger.info("--> closing all nodes"); Path[] shardLocation = internalCluster().getInstance(NodeEnvironment.class, node_1).availableShardPaths(new ShardId(index, 0)); assertThat(FileSystemUtils.exists(shardLocation), equalTo(true)); // make sure the data is there! internalCluster().closeNonSharedNodes(false); // don't wipe data directories the index needs to be there! logger.info("--> deleting the shard data [{}] ", Arrays.toString(shardLocation)); assertThat(FileSystemUtils.exists(shardLocation), equalTo(true)); // verify again after cluster was shut down IOUtils.rm(shardLocation); logger.info("--> starting nodes back, will not allocate the shard since it has no data, but the index will be there"); node_1 = internalCluster().startNode(commonSettings); internalCluster().startNode(commonSettings); // wait a bit for the cluster to realize that the shard is not there... // TODO can we get around this? the cluster is RED, so what do we wait for? client().admin().cluster().prepareReroute().get(); assertThat(client().admin().cluster().prepareHealth().setWaitForNodes("2").execute().actionGet().getStatus(), equalTo(ClusterHealthStatus.RED)); logger.info("--> explicitly allocate primary"); state = client().admin().cluster().prepareReroute() .setExplain(randomBoolean()) .add(new AllocateEmptyPrimaryAllocationCommand("test", 0, node_1, true)) .execute().actionGet().getState(); assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1)); assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING)); logger.info("--> get the state, verify shard 1 primary allocated"); final String nodeToCheck = node_1; assertBusy(() -> { ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); String nodeId = clusterState.nodes().resolveNode(nodeToCheck).getId(); assertThat(clusterState.getRoutingNodes().node(nodeId).iterator().next().state(), equalTo(ShardRoutingState.STARTED)); }); } public void testRerouteExplain() { Settings commonSettings = Settings.builder().build(); logger.info("--> starting a node"); String node_1 = internalCluster().startNode(commonSettings); assertThat(cluster().size(), equalTo(1)); ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("1").execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> create an index with 1 shard"); client().admin().indices().prepareCreate("test") .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0)) .execute().actionGet(); ensureGreen("test"); logger.info("--> disable allocation"); Settings newSettings = Settings.builder() .put(CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), Allocation.NONE.name()) .build(); client().admin().cluster().prepareUpdateSettings().setTransientSettings(newSettings).execute().actionGet(); logger.info("--> starting a second node"); String node_2 = internalCluster().startNode(commonSettings); assertThat(cluster().size(), equalTo(2)); healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("2").execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); logger.info("--> try to move the shard from node1 to node2"); MoveAllocationCommand cmd = new MoveAllocationCommand("test", 0, node_1, node_2); ClusterRerouteResponse resp = client().admin().cluster().prepareReroute().add(cmd).setExplain(true).execute().actionGet(); RoutingExplanations e = resp.getExplanations(); assertThat(e.explanations().size(), equalTo(1)); RerouteExplanation explanation = e.explanations().get(0); assertThat(explanation.command().name(), equalTo(cmd.name())); assertThat(((MoveAllocationCommand)explanation.command()).shardId(), equalTo(cmd.shardId())); assertThat(((MoveAllocationCommand)explanation.command()).fromNode(), equalTo(cmd.fromNode())); assertThat(((MoveAllocationCommand)explanation.command()).toNode(), equalTo(cmd.toNode())); assertThat(explanation.decisions().type(), equalTo(Decision.Type.YES)); } public void testClusterRerouteWithBlocks() throws Exception { List<String> nodesIds = internalCluster().startNodes(2); logger.info("--> create an index with 1 shard and 0 replicas"); assertAcked(prepareCreate("test-blocks").setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0))); ensureGreen("test-blocks"); logger.info("--> check that the index has 1 shard"); ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState(); List<ShardRouting> shards = state.routingTable().allShards("test-blocks"); assertThat(shards, hasSize(1)); logger.info("--> check that the shard is allocated"); ShardRouting shard = shards.get(0); assertThat(shard.assignedToNode(), equalTo(true)); logger.info("--> retrieve the node where the shard is allocated"); DiscoveryNode node = state.nodes().resolveNode(shard.currentNodeId()); assertNotNull(node); // toggle is used to mve the shard from one node to another int toggle = nodesIds.indexOf(node.getName()); // Rerouting shards is not blocked for (String blockSetting : Arrays.asList(SETTING_BLOCKS_READ, SETTING_BLOCKS_WRITE, SETTING_READ_ONLY, SETTING_BLOCKS_METADATA)) { try { enableIndexBlock("test-blocks", blockSetting); assertAcked(client().admin().cluster().prepareReroute() .add(new MoveAllocationCommand("test-blocks", 0, nodesIds.get(toggle % 2), nodesIds.get(++toggle % 2)))); ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForYellowStatus().setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); } finally { disableIndexBlock("test-blocks", blockSetting); } } // Rerouting shards is blocked when the cluster is read only try { setClusterReadOnly(true); assertBlocked(client().admin().cluster().prepareReroute() .add(new MoveAllocationCommand("test-blocks", 1, nodesIds.get(toggle % 2), nodesIds.get(++toggle % 2)))); } finally { setClusterReadOnly(false); } } }