/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing.allocation;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ESAllocationTestCase;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
import org.elasticsearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.shard.ShardId;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import static org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED;
import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.nullValue;
public class FailedShardsRoutingTests extends ESAllocationTestCase {
private final Logger logger = Loggers.getLogger(FailedShardsRoutingTests.class);
public void testFailedShardPrimaryRelocatingToAndFrom() {
AllocationService allocation = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.build());
logger.info("--> building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable routingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(routingTable).build();
logger.info("--> adding 2 nodes on same rack and do rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder()
.add(newNode("node1"))
.add(newNode("node2"))
).build();
clusterState = allocation.reroute(clusterState, "reroute");
// starting primaries
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
// starting replicas
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
logger.info("--> verifying all is allocated");
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node1").iterator().next().state(), equalTo(STARTED));
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node2").iterator().next().state(), equalTo(STARTED));
logger.info("--> adding additional node");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes())
.add(newNode("node3"))
).build();
clusterState = allocation.reroute(clusterState, "reroute");
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node1").iterator().next().state(), equalTo(STARTED));
assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().node("node2").iterator().next().state(), equalTo(STARTED));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0));
String origPrimaryNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
String origReplicaNodeId = clusterState.routingTable().index("test").shard(0).replicaShards().get(0).currentNodeId();
logger.info("--> moving primary shard to node3");
AllocationService.CommandsResult commandsResult = allocation.reroute(clusterState, new AllocationCommands(
new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")),
false, false);
assertThat(commandsResult.getClusterState(), not(equalTo(clusterState)));
clusterState = commandsResult.getClusterState();
assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING));
assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
logger.info("--> fail primary shard recovering instance on node3 being initialized");
clusterState = allocation.applyFailedShard(clusterState, clusterState.getRoutingNodes().node("node3").iterator().next());
assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(STARTED));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0));
logger.info("--> moving primary shard to node3");
commandsResult = allocation.reroute(clusterState, new AllocationCommands(
new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")),
false, false);
assertThat(commandsResult.getClusterState(), not(equalTo(clusterState)));
clusterState = commandsResult.getClusterState();
assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING));
assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
logger.info("--> fail primary shard recovering instance on node1 being relocated");
clusterState = allocation.applyFailedShard(clusterState, clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next());
// check promotion of replica to primary
assertThat(clusterState.getRoutingNodes().node(origReplicaNodeId).iterator().next().state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), equalTo(origReplicaNodeId));
assertThat(clusterState.routingTable().index("test").shard(0).replicaShards().get(0).currentNodeId(), anyOf(equalTo(origPrimaryNodeId), equalTo("node3")));
}
public void testFailPrimaryStartedCheckReplicaElected() {
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.build());
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable initialRoutingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(initialRoutingTable).build();
logger.info("Adding two nodes and performing rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logger.info("Start the shards (primaries)");
RoutingNodes routingNodes = clusterState.getRoutingNodes();
ClusterState newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(1));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), anyOf(equalTo("node1"), equalTo("node2")));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).currentNodeId(), anyOf(equalTo("node2"), equalTo("node1")));
}
logger.info("Start the shards (backups)");
routingNodes = clusterState.getRoutingNodes();
newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(1));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), anyOf(equalTo("node1"), equalTo("node2")));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).currentNodeId(), anyOf(equalTo("node2"), equalTo("node1")));
}
logger.info("fail the primary shard, will have no place to be rerouted to (single node), so stays unassigned");
ShardRouting shardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
newState = strategy.applyFailedShard(clusterState, shardToFail);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shard(0).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(0).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), not(equalTo(shardToFail.currentNodeId())));
assertThat(clusterState.routingTable().index("test").shard(0).primaryShard().state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), anyOf(equalTo("node1"), equalTo("node2")));
assertThat(clusterState.routingTable().index("test").shard(0).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(0).replicaShards().get(0).state(), equalTo(UNASSIGNED));
}
public void testFirstAllocationFailureSingleNode() {
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.build());
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable initialRoutingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(initialRoutingTable).build();
logger.info("Adding single node and performing rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build();
ClusterState newState = strategy.reroute(clusterState, "reroute");
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(1));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(INITIALIZING));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), equalTo("node1"));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
}
logger.info("fail the first shard, will have no place to be rerouted to (single node), so stays unassigned");
ShardRouting firstShard = clusterState.getRoutingNodes().node("node1").iterator().next();
newState = strategy.applyFailedShard(clusterState, firstShard);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(1));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(UNASSIGNED));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), nullValue());
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
}
}
public void testSingleShardMultipleAllocationFailures() {
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.build());
logger.info("Building initial routing table");
int numberOfReplicas = scaledRandomIntBetween(2, 10);
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(numberOfReplicas))
.build();
RoutingTable initialRoutingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(initialRoutingTable).build();
logger.info("Adding {} nodes and performing rerouting", numberOfReplicas + 1);
DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder();
for (int i = 0; i < numberOfReplicas + 1; i++) {
nodeBuilder.add(newNode("node" + Integer.toString(i)));
}
clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build();
while (!clusterState.routingTable().shardsWithState(UNASSIGNED).isEmpty()) {
// start all initializing
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
// and assign more unassigned
clusterState = strategy.reroute(clusterState, "reroute");
}
int shardsToFail = randomIntBetween(1, numberOfReplicas);
ArrayList<FailedShard> failedShards = new ArrayList<>();
RoutingNodes routingNodes = clusterState.getRoutingNodes();
Set<String> failedNodes = new HashSet<>();
Set<ShardRouting> shardRoutingsToFail = new HashSet<>();
for (int i = 0; i < shardsToFail; i++) {
String failedNode = "node" + Integer.toString(randomInt(numberOfReplicas));
logger.info("failing shard on node [{}]", failedNode);
ShardRouting shardToFail = routingNodes.node(failedNode).iterator().next();
if (shardRoutingsToFail.contains(shardToFail) == false) {
failedShards.add(new FailedShard(shardToFail, null, null));
failedNodes.add(failedNode);
shardRoutingsToFail.add(shardToFail);
}
}
clusterState = strategy.applyFailedShards(clusterState, failedShards);
routingNodes = clusterState.getRoutingNodes();
for (FailedShard failedShard : failedShards) {
if (routingNodes.getByAllocationId(failedShard.getRoutingEntry().shardId(),
failedShard.getRoutingEntry().allocationId().getId()) != null) {
fail("shard " + failedShard + " was not failed");
}
}
for (String failedNode : failedNodes) {
if (!routingNodes.node(failedNode).isEmpty()) {
fail("shard was re-assigned to failed node " + failedNode);
}
}
}
public void testFirstAllocationFailureTwoNodes() {
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.build());
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable initialRoutingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(initialRoutingTable).build();
logger.info("Adding two nodes and performing rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
ClusterState newState = strategy.reroute(clusterState, "reroute");
assertThat(newState, not(clusterState));
clusterState = newState;
final String nodeHoldingPrimary = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(1));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(INITIALIZING));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), equalTo(nodeHoldingPrimary));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
}
logger.info("fail the first shard, will start INITIALIZING on the second node");
final ShardRouting firstShard = clusterState.getRoutingNodes().node(nodeHoldingPrimary).iterator().next();
newState = strategy.applyFailedShard(clusterState, firstShard);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
final String nodeHoldingPrimary2 = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
assertThat(nodeHoldingPrimary2, not(equalTo(nodeHoldingPrimary)));
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(1));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(INITIALIZING));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), not(equalTo(nodeHoldingPrimary)));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
}
}
public void testRebalanceFailure() {
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 10)
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
.build());
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(1))
.build();
RoutingTable initialRoutingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(initialRoutingTable).build();
logger.info("Adding two nodes and performing rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logger.info("Start the shards (primaries)");
RoutingNodes routingNodes = clusterState.getRoutingNodes();
ClusterState newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(2));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), anyOf(equalTo("node1"), equalTo("node2")));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).currentNodeId(), anyOf(equalTo("node2"), equalTo("node1")));
}
logger.info("Start the shards (backups)");
routingNodes = clusterState.getRoutingNodes();
newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(2));
for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().currentNodeId(), anyOf(equalTo("node1"), equalTo("node2")));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(STARTED));
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).currentNodeId(), anyOf(equalTo("node2"), equalTo("node1")));
}
logger.info("Adding third node and reroute");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
newState = strategy.reroute(clusterState, "reroute");
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingNodes = clusterState.getRoutingNodes();
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(2));
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED, RELOCATING), equalTo(2));
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), lessThan(3));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED, RELOCATING), equalTo(2));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), lessThan(3));
assertThat(routingNodes.node("node3").numberOfShardsWithState(INITIALIZING), equalTo(1));
logger.info("Fail the shards on node 3");
ShardRouting shardToFail = routingNodes.node("node3").iterator().next();
newState = strategy.applyFailedShard(clusterState, shardToFail);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingNodes = clusterState.getRoutingNodes();
assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(2));
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED, RELOCATING), equalTo(2));
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), lessThan(3));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED, RELOCATING), equalTo(2));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), lessThan(3));
assertThat(routingNodes.node("node3").numberOfShardsWithState(INITIALIZING), equalTo(1));
// make sure the failedShard is not INITIALIZING again on node3
assertThat(routingNodes.node("node3").iterator().next().shardId(), not(equalTo(shardToFail.shardId())));
}
public void testFailAllReplicasInitializingOnPrimaryFail() {
AllocationService allocation = createAllocationService(Settings.builder()
.build());
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(2))
.build();
RoutingTable routingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
.metaData(metaData).routingTable(routingTable).build();
ShardId shardId = new ShardId(metaData.index("test").getIndex(), 0);
// add 4 nodes
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4"))).build();
clusterState = ClusterState.builder(clusterState).routingTable(allocation.reroute(clusterState, "reroute").routingTable()).build();
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(2));
// start primary shards
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));
// start one replica so it can take over.
clusterState = allocation.applyStartedShards(clusterState,
Collections.singletonList(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0)));
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
ShardRouting startedReplica = clusterState.getRoutingNodes().activeReplica(shardId);
// fail the primary shard, check replicas get removed as well...
ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
// the primary gets allocated on another node, replicas are initializing
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));
ShardRouting newPrimaryShard = clusterState.routingTable().index("test").shard(0).primaryShard();
assertThat(newPrimaryShard, not(equalTo(primaryShardToFail)));
assertThat(newPrimaryShard.allocationId(), equalTo(startedReplica.allocationId()));
}
public void testFailAllReplicasInitializingOnPrimaryFailWhileHavingAReplicaToElect() {
AllocationService allocation = createAllocationService(Settings.builder()
.build());
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(2))
.build();
RoutingTable routingTable = RoutingTable.builder()
.addAsNew(metaData.index("test"))
.build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(routingTable).build();
// add 4 nodes
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4"))).build();
clusterState = allocation.reroute(clusterState, "reroute");
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(2));
// start primary shards
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));
// start another replica shard, while keep one initializing
clusterState = allocation.applyStartedShards(clusterState, Collections.singletonList(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0)));
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
// fail the primary shard, check one replica gets elected to primary, others become INITIALIZING (from it)
ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));
ShardRouting newPrimaryShard = clusterState.routingTable().index("test").shard(0).primaryShard();
assertThat(newPrimaryShard, not(equalTo(primaryShardToFail)));
}
}