/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing.allocation;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ESAllocationTestCase;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.cluster.routing.RecoverySource.SnapshotRecoverySource;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingHelper;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.snapshots.Snapshot;
import org.elasticsearch.snapshots.SnapshotId;
import org.elasticsearch.test.gateway.TestGatewayAllocator;
import java.util.Collections;
import static org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED;
import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED;
import static org.hamcrest.Matchers.equalTo;
public class ThrottlingAllocationTests extends ESAllocationTestCase {
private final Logger logger = Loggers.getLogger(ThrottlingAllocationTests.class);
public void testPrimaryRecoveryThrottling() {
TestGatewayAllocator gatewayAllocator = new TestGatewayAllocator();
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 3)
.put("cluster.routing.allocation.node_initial_primaries_recoveries", 3)
.build(), gatewayAllocator);
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(10).numberOfReplicas(1))
.build();
ClusterState clusterState = createRecoveryStateAndInitalizeAllocations(metaData, gatewayAllocator);
logger.info("start one node, do reroute, only 3 should initialize");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(17));
logger.info("start initializing, another 3 should initialize");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(14));
logger.info("start initializing, another 3 should initialize");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(6));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(11));
logger.info("start initializing, another 1 should initialize");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(9));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(10));
logger.info("start initializing, all primaries should be started");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(10));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(10));
}
public void testReplicaAndPrimaryRecoveryThrottling() {
TestGatewayAllocator gatewayAllocator = new TestGatewayAllocator();
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 3)
.put("cluster.routing.allocation.concurrent_source_recoveries", 3)
.put("cluster.routing.allocation.node_initial_primaries_recoveries", 3)
.build(),
gatewayAllocator);
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(5).numberOfReplicas(1))
.build();
ClusterState clusterState = createRecoveryStateAndInitalizeAllocations(metaData, gatewayAllocator);
logger.info("with one node, do reroute, only 3 should initialize");
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(7));
logger.info("start initializing, another 2 should initialize");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(2));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(5));
logger.info("start initializing, all primaries should be started");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(5));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(5));
logger.info("start another node, replicas should start being allocated");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node2"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(5));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(3));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(2));
logger.info("start initializing replicas");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(8));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(2));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
logger.info("start initializing replicas, all should be started");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(10));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
}
public void testThrottleIncomingAndOutgoing() {
TestGatewayAllocator gatewayAllocator = new TestGatewayAllocator();
Settings settings = Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", 5)
.put("cluster.routing.allocation.node_initial_primaries_recoveries", 5)
.put("cluster.routing.allocation.cluster_concurrent_rebalance", 5)
.build();
AllocationService strategy = createAllocationService(settings, gatewayAllocator);
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(9).numberOfReplicas(0))
.build();
ClusterState clusterState = createRecoveryStateAndInitalizeAllocations(metaData, gatewayAllocator);
logger.info("with one node, do reroute, only 5 should initialize");
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(5));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(4));
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 5);
logger.info("start initializing, all primaries should be started");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(5));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(4));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
logger.info("start another 2 nodes, 5 shards should be relocating - at most 5 are allowed per node");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node2")).add(newNode("node3"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(4));
assertThat(clusterState.routingTable().shardsWithState(RELOCATING).size(), equalTo(5));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(5));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node2"), 3);
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node3"), 2);
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 0);
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 5);
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
logger.info("start the relocating shards, one more shard should relocate away from node1");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(8));
assertThat(clusterState.routingTable().shardsWithState(RELOCATING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node2"), 0);
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node3"), 1);
assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 0);
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 1);
}
public void testOutgoingThrottlesAllocation() {
TestGatewayAllocator gatewayAllocator = new TestGatewayAllocator();
AllocationService strategy = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_outgoing_recoveries", 1)
.build(), gatewayAllocator);
logger.info("Building initial routing table");
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(2))
.build();
ClusterState clusterState = createRecoveryStateAndInitalizeAllocations(metaData, gatewayAllocator);
logger.info("with one node, do reroute, only 1 should initialize");
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(2));
logger.info("start initializing");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(2));
logger.info("start one more node, first non-primary should start being allocated");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node2"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(1));
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 1);
logger.info("start initializing non-primary");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().shardsWithState(INITIALIZING));
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(0));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(1));
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 0);
logger.info("start one more node, initializing second non-primary");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 1);
logger.info("start one more node");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 1);
logger.info("move started non-primary to new node");
AllocationService.CommandsResult commandsResult = strategy.reroute(clusterState, new AllocationCommands(
new MoveAllocationCommand("test", 0, "node2", "node4")), true, false);
assertEquals(commandsResult.explanations().explanations().size(), 1);
assertEquals(commandsResult.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE);
// even though it is throttled, move command still forces allocation
clusterState = commandsResult.getClusterState();
assertThat(clusterState.routingTable().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(RELOCATING).size(), equalTo(1));
assertThat(clusterState.routingTable().shardsWithState(INITIALIZING).size(), equalTo(2));
assertThat(clusterState.routingTable().shardsWithState(UNASSIGNED).size(), equalTo(0));
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), 2);
assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), 0);
}
private ClusterState createRecoveryStateAndInitalizeAllocations(MetaData metaData, TestGatewayAllocator gatewayAllocator) {
DiscoveryNode node1 = newNode("node1");
MetaData.Builder metaDataBuilder = new MetaData.Builder(metaData);
RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
for (ObjectCursor<IndexMetaData> cursor: metaData.indices().values()) {
Index index = cursor.value.getIndex();
IndexMetaData.Builder indexMetaDataBuilder = IndexMetaData.builder(cursor.value);
final int recoveryType = randomInt(5);
if (recoveryType <= 4) {
addInSyncAllocationIds(index, indexMetaDataBuilder, gatewayAllocator, node1);
}
IndexMetaData indexMetaData = indexMetaDataBuilder.build();
metaDataBuilder.put(indexMetaData, false);
switch (recoveryType) {
case 0:
routingTableBuilder.addAsRecovery(indexMetaData);
break;
case 1:
routingTableBuilder.addAsFromCloseToOpen(indexMetaData);
break;
case 2:
routingTableBuilder.addAsFromDangling(indexMetaData);
break;
case 3:
routingTableBuilder.addAsNewRestore(indexMetaData,
new SnapshotRecoverySource(new Snapshot("repo", new SnapshotId("snap", "randomId")), Version.CURRENT,
indexMetaData.getIndex().getName()), new IntHashSet());
break;
case 4:
routingTableBuilder.addAsRestore(indexMetaData,
new SnapshotRecoverySource(new Snapshot("repo", new SnapshotId("snap", "randomId")), Version.CURRENT,
indexMetaData.getIndex().getName()));
break;
case 5:
routingTableBuilder.addAsNew(indexMetaData);
break;
default:
throw new IndexOutOfBoundsException();
}
}
return ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
.nodes(DiscoveryNodes.builder().add(node1))
.metaData(metaDataBuilder.build())
.routingTable(routingTableBuilder.build()).build();
}
private void addInSyncAllocationIds(Index index, IndexMetaData.Builder indexMetaData,
TestGatewayAllocator gatewayAllocator, DiscoveryNode node1) {
for (int shard = 0; shard < indexMetaData.numberOfShards(); shard++) {
final boolean primary = randomBoolean();
final ShardRouting unassigned = ShardRouting.newUnassigned(new ShardId(index, shard), primary,
primary ?
RecoverySource.StoreRecoverySource.EMPTY_STORE_INSTANCE :
RecoverySource.PeerRecoverySource.INSTANCE,
new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "test")
);
ShardRouting started = ShardRoutingHelper.moveToStarted(ShardRoutingHelper.initialize(unassigned, node1.getId()));
indexMetaData.putInSyncAllocationIds(shard, Collections.singleton(started.allocationId().getId()));
gatewayAllocator.addKnownAllocation(started);
}
}
}