/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ESAllocationTestCase;
import org.elasticsearch.cluster.health.ClusterStateHealth;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.node.DiscoveryNodes.Builder;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.routing.allocation.FailedShard;
import org.elasticsearch.common.settings.Settings;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.not;
public class PrimaryTermsTests extends ESAllocationTestCase {
private static final String TEST_INDEX_1 = "test1";
private static final String TEST_INDEX_2 = "test2";
private int numberOfShards;
private int numberOfReplicas;
private static final Settings DEFAULT_SETTINGS = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
private AllocationService allocationService;
private ClusterState clusterState;
private final Map<String, long[]> primaryTermsPerIndex = new HashMap<>();
@Override
public void setUp() throws Exception {
super.setUp();
this.allocationService = createAllocationService(Settings.builder()
.put("cluster.routing.allocation.node_concurrent_recoveries", Integer.MAX_VALUE) // don't limit recoveries
.put("cluster.routing.allocation.node_initial_primaries_recoveries", Integer.MAX_VALUE)
.build());
this.numberOfShards = randomIntBetween(1, 5);
this.numberOfReplicas = randomIntBetween(0, 5);
logger.info("Setup test with {} shards and {} replicas.", this.numberOfShards, this.numberOfReplicas);
this.primaryTermsPerIndex.clear();
MetaData metaData = MetaData.builder()
.put(createIndexMetaData(TEST_INDEX_1))
.put(createIndexMetaData(TEST_INDEX_2))
.build();
RoutingTable routingTable = new RoutingTable.Builder()
.add(new IndexRoutingTable.Builder(metaData.index(TEST_INDEX_1).getIndex()).initializeAsNew(metaData.index(TEST_INDEX_1))
.build())
.add(new IndexRoutingTable.Builder(metaData.index(TEST_INDEX_2).getIndex()).initializeAsNew(metaData.index(TEST_INDEX_2))
.build())
.build();
this.clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
.metaData(metaData).routingTable(routingTable).build();
}
/**
* puts primary shard routings into initializing state
*/
private void initPrimaries() {
logger.info("adding {} nodes and performing rerouting", this.numberOfReplicas + 1);
Builder discoBuilder = DiscoveryNodes.builder();
for (int i = 0; i < this.numberOfReplicas + 1; i++) {
discoBuilder = discoBuilder.add(newNode("node" + i));
}
this.clusterState = ClusterState.builder(clusterState).nodes(discoBuilder).build();
ClusterState rerouteResult = allocationService.reroute(clusterState, "reroute");
assertThat(rerouteResult, not(equalTo(this.clusterState)));
applyRerouteResult(rerouteResult);
primaryTermsPerIndex.keySet().forEach(this::incrementPrimaryTerm);
}
private void incrementPrimaryTerm(String index) {
final long[] primaryTerms = primaryTermsPerIndex.get(index);
for (int i = 0; i < primaryTerms.length; i++) {
primaryTerms[i]++;
}
}
private void incrementPrimaryTerm(String index, int shard) {
primaryTermsPerIndex.get(index)[shard]++;
}
private boolean startInitializingShards(String index) {
final List<ShardRouting> startedShards = this.clusterState.getRoutingNodes().shardsWithState(index, INITIALIZING);
logger.info("start primary shards for index [{}]: {} ", index, startedShards);
ClusterState rerouteResult = allocationService.applyStartedShards(this.clusterState, startedShards);
boolean changed = rerouteResult.equals(this.clusterState) == false;
applyRerouteResult(rerouteResult);
return changed;
}
private void applyRerouteResult(ClusterState newClusterState) {
ClusterState previousClusterState = this.clusterState;
ClusterState.Builder builder = ClusterState.builder(newClusterState).incrementVersion();
if (previousClusterState.routingTable() != newClusterState.routingTable()) {
builder.routingTable(RoutingTable.builder(newClusterState.routingTable()).version(newClusterState.routingTable().version() + 1)
.build());
}
if (previousClusterState.metaData() != newClusterState.metaData()) {
builder.metaData(MetaData.builder(newClusterState.metaData()).version(newClusterState.metaData().version() + 1));
}
this.clusterState = builder.build();
final ClusterStateHealth clusterHealth = new ClusterStateHealth(clusterState);
logger.info("applied reroute. active shards: p [{}], t [{}], init shards: [{}], relocating: [{}]",
clusterHealth.getActivePrimaryShards(), clusterHealth.getActiveShards(),
clusterHealth.getInitializingShards(), clusterHealth.getRelocatingShards());
}
private void failSomePrimaries(String index) {
final IndexRoutingTable indexShardRoutingTable = clusterState.routingTable().index(index);
Set<Integer> shardIdsToFail = new HashSet<>();
for (int i = 1 + randomInt(numberOfShards - 1); i > 0; i--) {
shardIdsToFail.add(randomInt(numberOfShards - 1));
}
logger.info("failing primary shards {} for index [{}]", shardIdsToFail, index);
List<FailedShard> failedShards = new ArrayList<>();
for (int shard : shardIdsToFail) {
failedShards.add(new FailedShard(indexShardRoutingTable.shard(shard).primaryShard(), "test", null));
incrementPrimaryTerm(index, shard); // the primary failure should increment the primary term;
}
applyRerouteResult(allocationService.applyFailedShards(this.clusterState, failedShards,Collections.emptyList()));
}
private void addNodes() {
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
final int newNodes = randomInt(10);
logger.info("adding [{}] nodes", newNodes);
for (int i = 0; i < newNodes; i++) {
nodesBuilder.add(newNode("extra_" + i));
}
this.clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
applyRerouteResult(allocationService.reroute(this.clusterState, "nodes added"));
}
private IndexMetaData.Builder createIndexMetaData(String indexName) {
primaryTermsPerIndex.put(indexName, new long[numberOfShards]);
final IndexMetaData.Builder builder = new IndexMetaData.Builder(indexName)
.settings(DEFAULT_SETTINGS)
.numberOfReplicas(this.numberOfReplicas)
.numberOfShards(this.numberOfShards);
for (int i = 0; i < numberOfShards; i++) {
builder.primaryTerm(i, randomInt(200));
primaryTermsPerIndex.get(indexName)[i] = builder.primaryTerm(i);
}
return builder;
}
private void assertAllPrimaryTerm() {
primaryTermsPerIndex.keySet().forEach(this::assertPrimaryTerm);
}
private void assertPrimaryTerm(String index) {
final long[] terms = primaryTermsPerIndex.get(index);
final IndexMetaData indexMetaData = clusterState.metaData().index(index);
for (IndexShardRoutingTable shardRoutingTable : this.clusterState.routingTable().index(index)) {
final int shard = shardRoutingTable.shardId().id();
assertThat("primary term mismatch between indexMetaData of [" + index + "] and shard [" + shard + "]'s routing",
indexMetaData.primaryTerm(shard), equalTo(terms[shard]));
}
}
public void testPrimaryTermMetaDataSync() {
assertAllPrimaryTerm();
initPrimaries();
assertAllPrimaryTerm();
startInitializingShards(TEST_INDEX_1);
assertAllPrimaryTerm();
startInitializingShards(TEST_INDEX_2);
assertAllPrimaryTerm();
// now start all replicas too
startInitializingShards(TEST_INDEX_1);
startInitializingShards(TEST_INDEX_2);
assertAllPrimaryTerm();
// relocations shouldn't change much
addNodes();
assertAllPrimaryTerm();
boolean changed = true;
while (changed) {
changed = startInitializingShards(TEST_INDEX_1);
assertAllPrimaryTerm();
changed |= startInitializingShards(TEST_INDEX_2);
assertAllPrimaryTerm();
}
// primary promotion
failSomePrimaries(TEST_INDEX_1);
assertAllPrimaryTerm();
// stablize cluster
changed = true;
while (changed) {
changed = startInitializingShards(TEST_INDEX_1);
assertAllPrimaryTerm();
changed |= startInitializingShards(TEST_INDEX_2);
assertAllPrimaryTerm();
}
}
}