/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.cluster.allocation; import com.carrotsearch.hppc.ObjectIntHashMap; import org.apache.logging.log4j.Logger; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; import org.elasticsearch.common.Priority; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.discovery.zen.ZenDiscovery; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import java.util.List; import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; @ClusterScope(scope= ESIntegTestCase.Scope.TEST, numDataNodes =0, minNumDataNodes = 2) public class AwarenessAllocationIT extends ESIntegTestCase { private final Logger logger = Loggers.getLogger(AwarenessAllocationIT.class); @Override protected int numberOfReplicas() { return 1; } public void testSimpleAwareness() throws Exception { Settings commonSettings = Settings.builder() .put("cluster.routing.allocation.awareness.attributes", "rack_id") .build(); logger.info("--> starting 2 nodes on the same rack"); internalCluster().startNodes(2, Settings.builder().put(commonSettings).put("node.attr.rack_id", "rack_1").build()); createIndex("test1"); createIndex("test2"); NumShards test1 = getNumShards("test1"); NumShards test2 = getNumShards("test2"); //no replicas will be allocated as both indices end up on a single node final int totalPrimaries = test1.numPrimaries + test2.numPrimaries; ensureGreen(); logger.info("--> starting 1 node on a different rack"); final String node3 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.rack_id", "rack_2").build()); // On slow machines the initial relocation might be delayed assertThat(awaitBusy( () -> { logger.info("--> waiting for no relocation"); ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").setWaitForNoRelocatingShards(true).get(); if (clusterHealth.isTimedOut()) { return false; } logger.info("--> checking current state"); ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); // verify that we have all the primaries on node3 ObjectIntHashMap<String> counts = new ObjectIntHashMap<>(); for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { for (ShardRouting shardRouting : indexShardRoutingTable) { counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1); } } } return counts.get(node3) == totalPrimaries; }, 10, TimeUnit.SECONDS ), equalTo(true)); } public void testAwarenessZones() throws Exception { Settings commonSettings = Settings.builder() .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "zone.values", "a,b") .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone") .put(ZenDiscovery.JOIN_TIMEOUT_SETTING.getKey(), "10s") .build(); logger.info("--> starting 4 nodes on different zones"); List<String> nodes = internalCluster().startNodes( Settings.builder().put(commonSettings).put("node.attr.zone", "a").build(), Settings.builder().put(commonSettings).put("node.attr.zone", "b").build(), Settings.builder().put(commonSettings).put("node.attr.zone", "b").build(), Settings.builder().put(commonSettings).put("node.attr.zone", "a").build() ); String A_0 = nodes.get(0); String B_0 = nodes.get(1); String B_1 = nodes.get(2); String A_1 = nodes.get(3); logger.info("--> waiting for nodes to form a cluster"); ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForNodes("4").execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); client().admin().indices().prepareCreate("test") .setSettings(Settings.builder().put("index.number_of_shards", 5) .put("index.number_of_replicas", 1)).execute().actionGet(); logger.info("--> waiting for shards to be allocated"); health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); ObjectIntHashMap<String> counts = new ObjectIntHashMap<>(); for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { for (ShardRouting shardRouting : indexShardRoutingTable) { counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1); } } } assertThat(counts.get(A_1), anyOf(equalTo(2),equalTo(3))); assertThat(counts.get(B_1), anyOf(equalTo(2),equalTo(3))); assertThat(counts.get(A_0), anyOf(equalTo(2),equalTo(3))); assertThat(counts.get(B_0), anyOf(equalTo(2),equalTo(3))); } public void testAwarenessZonesIncrementalNodes() throws Exception { Settings commonSettings = Settings.builder() .put("cluster.routing.allocation.awareness.force.zone.values", "a,b") .put("cluster.routing.allocation.awareness.attributes", "zone") .build(); logger.info("--> starting 2 nodes on zones 'a' & 'b'"); List<String> nodes = internalCluster().startNodes( Settings.builder().put(commonSettings).put("node.attr.zone", "a").build(), Settings.builder().put(commonSettings).put("node.attr.zone", "b").build() ); String A_0 = nodes.get(0); String B_0 = nodes.get(1); client().admin().indices().prepareCreate("test") .setSettings(Settings.builder().put("index.number_of_shards", 5) .put("index.number_of_replicas", 1)).execute().actionGet(); ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); ObjectIntHashMap<String> counts = new ObjectIntHashMap<>(); for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { for (ShardRouting shardRouting : indexShardRoutingTable) { counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1); } } } assertThat(counts.get(A_0), equalTo(5)); assertThat(counts.get(B_0), equalTo(5)); logger.info("--> starting another node in zone 'b'"); String B_1 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.zone", "b").build()); health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); client().admin().cluster().prepareReroute().get(); health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); counts = new ObjectIntHashMap<>(); for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { for (ShardRouting shardRouting : indexShardRoutingTable) { counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1); } } } assertThat(counts.get(A_0), equalTo(5)); assertThat(counts.get(B_0), equalTo(3)); assertThat(counts.get(B_1), equalTo(2)); String noZoneNode = internalCluster().startNode(); health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); client().admin().cluster().prepareReroute().get(); health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); counts = new ObjectIntHashMap<>(); for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { for (ShardRouting shardRouting : indexShardRoutingTable) { counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1); } } } assertThat(counts.get(A_0), equalTo(5)); assertThat(counts.get(B_0), equalTo(3)); assertThat(counts.get(B_1), equalTo(2)); assertThat(counts.containsKey(noZoneNode), equalTo(false)); client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("cluster.routing.allocation.awareness.attributes", "").build()).get(); health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); clusterState = client().admin().cluster().prepareState().execute().actionGet().getState(); counts = new ObjectIntHashMap<>(); for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { for (ShardRouting shardRouting : indexShardRoutingTable) { counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1); } } } assertThat(counts.get(A_0), equalTo(3)); assertThat(counts.get(B_0), equalTo(3)); assertThat(counts.get(B_1), equalTo(2)); assertThat(counts.get(noZoneNode), equalTo(2)); } }