/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.cluster.routing.allocation; import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ESAllocationTestCase; import org.elasticsearch.cluster.EmptyClusterInfoService; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.gateway.TestGatewayAllocator; import java.util.Collections; import java.util.List; import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase { private AllocationService strategy; @Override public void setUp() throws Exception { super.setUp(); strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); } private ClusterState createInitialClusterState() { MetaData.Builder metaBuilder = MetaData.builder(); metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); MetaData metaData = metaBuilder.build(); RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); routingTableBuilder.addAsNew(metaData.index("idx")); RoutingTable routingTable = routingTableBuilder.build(); ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) .metaData(metaData).routingTable(routingTable).build(); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState, "reroute", false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(prevRoutingTable.index("idx").shards().size(), 1); assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); return clusterState; } public void testSingleRetryOnIgnore() { ClusterState clusterState = createInitialClusterState(); RoutingTable routingTable = clusterState.routingTable(); final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries-1; i++) { List<FailedShard> failedShards = Collections.singletonList( new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); } // now we go and check that we are actually stick to unassigned on the next failure List<FailedShard> failedShards = Collections.singletonList( new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); // manual reroute should retry once newState = strategy.reroute(clusterState, new AllocationCommands(), false, true).getClusterState(); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); // now we go and check that we are actually stick to unassigned on the next failure ie. no retry failedShards = Collections.singletonList( new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); newState = strategy.applyFailedShards(clusterState, failedShards); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries+1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); } public void testFailedAllocation() { ClusterState clusterState = createInitialClusterState(); RoutingTable routingTable = clusterState.routingTable(); final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries-1; i++) { List<FailedShard> failedShards = Collections.singletonList( new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.state(), INITIALIZING); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), i+1); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom" + i); // MaxRetryAllocationDecider#canForceAllocatePrimary should return YES decisions because canAllocate returns YES here assertEquals(Decision.YES, new MaxRetryAllocationDecider(Settings.EMPTY).canForceAllocatePrimary( unassignedPrimary, null, new RoutingAllocation(null, null, clusterState, null, 0, false))); } // now we go and check that we are actually stick to unassigned on the next failure { List<FailedShard> failedShards = Collections.singletonList( new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), retries); assertEquals(unassignedPrimary.state(), UNASSIGNED); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom"); // MaxRetryAllocationDecider#canForceAllocatePrimary should return a NO decision because canAllocate returns NO here assertEquals(Decision.NO, new MaxRetryAllocationDecider(Settings.EMPTY).canForceAllocatePrimary( unassignedPrimary, null, new RoutingAllocation(null, null, clusterState, null, 0, false))); } // change the settings and ensure we can do another round of allocation for that index. clusterState = ClusterState.builder(clusterState).routingTable(routingTable) .metaData(MetaData.builder(clusterState.metaData()) .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", retries+1).build() ).build(), true).build()).build(); ClusterState newState = strategy.reroute(clusterState, "settings changed", false); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); // good we are initializing and we are maintaining failure information assertEquals(routingTable.index("idx").shards().size(), 1); ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), retries); assertEquals(unassignedPrimary.state(), INITIALIZING); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom"); // bumped up the max retry count, so canForceAllocatePrimary should return a YES decision assertEquals(Decision.YES, new MaxRetryAllocationDecider(Settings.EMPTY).canForceAllocatePrimary( routingTable.index("idx").shard(0).shards().get(0), null, new RoutingAllocation(null, null, clusterState, null, 0, false))); // now we start the shard clusterState = strategy.applyStartedShards(clusterState, Collections.singletonList( routingTable.index("idx").shard(0).shards().get(0))); routingTable = clusterState.routingTable(); // all counters have been reset to 0 ie. no unassigned info assertEquals(routingTable.index("idx").shards().size(), 1); assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo()); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED); // now fail again and see if it has a new counter List<FailedShard> failedShards = Collections.singletonList( new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", new UnsupportedOperationException())); newState = strategy.applyFailedShards(clusterState, failedShards); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingTable = newState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), 1); assertEquals(unassignedPrimary.state(), UNASSIGNED); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "ZOOOMG"); // Counter reset, so MaxRetryAllocationDecider#canForceAllocatePrimary should return a YES decision assertEquals(Decision.YES, new MaxRetryAllocationDecider(Settings.EMPTY).canForceAllocatePrimary( unassignedPrimary, null, new RoutingAllocation(null, null, clusterState, null, 0, false))); } }