/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.indices.state; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.cluster.ClusterInfo; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MappingMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.gateway.GatewayAllocator; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.discovery.TestZenDiscovery; import org.elasticsearch.test.disruption.BlockClusterStateProcessing; import org.elasticsearch.test.junit.annotations.TestLogging; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0, transportClientRatio = 0) @TestLogging("_root:DEBUG") public class RareClusterStateIT extends ESIntegTestCase { @Override protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder().put(super.nodeSettings(nodeOrdinal)) .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); } @Override protected int numberOfShards() { return 1; } @Override protected int numberOfReplicas() { return 0; } public void testUnassignedShardAndEmptyNodesInRoutingTable() throws Exception { internalCluster().startNode(); createIndex("a"); ensureSearchable("a"); ClusterState current = clusterService().state(); GatewayAllocator allocator = internalCluster().getInstance(GatewayAllocator.class); AllocationDeciders allocationDeciders = new AllocationDeciders(Settings.EMPTY, Collections.emptyList()); RoutingNodes routingNodes = new RoutingNodes( ClusterState.builder(current) .routingTable(RoutingTable.builder(current.routingTable()).remove("a").addAsRecovery(current.metaData().index("a")).build()) .nodes(DiscoveryNodes.EMPTY_NODES) .build(), false ); RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime(), false); allocator.allocateUnassigned(routingAllocation); } public void testAssignmentWithJustAddedNodes() throws Exception { internalCluster().startNode(); final String index = "index"; prepareCreate(index).setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).get(); ensureGreen(index); // close to have some unassigned started shards shards.. client().admin().indices().prepareClose(index).get(); final String masterName = internalCluster().getMasterName(); final ClusterService clusterService = internalCluster().clusterService(masterName); final AllocationService allocationService = internalCluster().getInstance(AllocationService.class, masterName); clusterService.submitStateUpdateTask("test-inject-node-and-reroute", new ClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) throws Exception { // inject a node ClusterState.Builder builder = ClusterState.builder(currentState); builder.nodes(DiscoveryNodes.builder(currentState.nodes()).add(new DiscoveryNode("_non_existent", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT))); // open index final IndexMetaData indexMetaData = IndexMetaData.builder(currentState.metaData().index(index)).state(IndexMetaData.State.OPEN).build(); builder.metaData(MetaData.builder(currentState.metaData()).put(indexMetaData, true)); builder.blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeIndexBlocks(index)); ClusterState updatedState = builder.build(); RoutingTable.Builder routingTable = RoutingTable.builder(updatedState.routingTable()); routingTable.addAsRecovery(updatedState.metaData().index(index)); updatedState = ClusterState.builder(updatedState).routingTable(routingTable.build()).build(); return allocationService.reroute(updatedState, "reroute"); } @Override public void onFailure(String source, Exception e) { } }); ensureGreen(index); // remove the extra node clusterService.submitStateUpdateTask("test-remove-injected-node", new ClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) throws Exception { ClusterState.Builder builder = ClusterState.builder(currentState); builder.nodes(DiscoveryNodes.builder(currentState.nodes()).remove("_non_existent")); currentState = builder.build(); return allocationService.deassociateDeadNodes(currentState, true, "reroute"); } @Override public void onFailure(String source, Exception e) { } }); } @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/14932") public void testDeleteCreateInOneBulk() throws Exception { internalCluster().startNodes(2); assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut()); prepareCreate("test").setSettings(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, true).addMapping("type").get(); ensureGreen("test"); // now that the cluster is stable, remove publishing timeout assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0"))); Set<String> nodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames())); nodes.remove(internalCluster().getMasterName()); // block none master node. BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodes.iterator().next(), random()); internalCluster().setDisruptionScheme(disruption); logger.info("--> indexing a doc"); index("test", "type", "1"); refresh(); disruption.startDisrupting(); logger.info("--> delete index and recreate it"); assertFalse(client().admin().indices().prepareDelete("test").setTimeout("200ms").get().isAcknowledged()); assertFalse(prepareCreate("test").setTimeout("200ms").setSettings(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, true).get().isAcknowledged()); logger.info("--> letting cluster proceed"); disruption.stopDisrupting(); ensureGreen(TimeValue.timeValueMinutes(30), "test"); assertHitCount(client().prepareSearch("test").get(), 0); } public void testDelayedMappingPropagationOnPrimary() throws Exception { // Here we want to test that things go well if there is a first request // that adds mappings but before mappings are propagated to all nodes // another index request introduces the same mapping. The master node // will reply immediately since it did not change the cluster state // but the change might not be on the node that performed the indexing // operation yet Settings settings = Settings.builder() .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // explicitly set so it won't default to publish timeout .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait post commit as we are blocking things by design .build(); final List<String> nodeNames = internalCluster().startNodes(2, settings); assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut()); final String master = internalCluster().getMasterName(); assertThat(nodeNames, hasItem(master)); String otherNode = null; for (String node : nodeNames) { if (node.equals(master) == false) { otherNode = node; break; } } assertNotNull(otherNode); // Don't allocate the shard on the master node assertAcked(prepareCreate("index").setSettings(Settings.builder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) .put("index.routing.allocation.exclude._name", master)).get()); ensureGreen(); // Check routing tables ClusterState state = client().admin().cluster().prepareState().get().getState(); assertEquals(master, state.nodes().getMasterNode().getName()); List<ShardRouting> shards = state.routingTable().allShards("index"); assertThat(shards, hasSize(1)); for (ShardRouting shard : shards) { if (shard.primary()) { // primary must not be on the master node assertFalse(state.nodes().getMasterNodeId().equals(shard.currentNodeId())); } else { fail(); // only primaries } } // Block cluster state processing where our shard is BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random()); internalCluster().setDisruptionScheme(disruption); disruption.startDisrupting(); // Add a new mapping... final AtomicReference<Object> putMappingResponse = new AtomicReference<>(); client().admin().indices().preparePutMapping("index").setType("type").setSource("field", "type=long").execute(new ActionListener<PutMappingResponse>() { @Override public void onResponse(PutMappingResponse response) { putMappingResponse.set(response); } @Override public void onFailure(Exception e) { putMappingResponse.set(e); } }); // ...and wait for mappings to be available on master assertBusy(new Runnable() { @Override public void run() { ImmutableOpenMap<String, MappingMetaData> indexMappings = client().admin().indices().prepareGetMappings("index").get().getMappings().get("index"); assertNotNull(indexMappings); MappingMetaData typeMappings = indexMappings.get("type"); assertNotNull(typeMappings); Object properties; try { properties = typeMappings.getSourceAsMap().get("properties"); } catch (IOException e) { throw new AssertionError(e); } assertNotNull(properties); Object fieldMapping = ((Map<String, Object>) properties).get("field"); assertNotNull(fieldMapping); } }); final AtomicReference<Object> docIndexResponse = new AtomicReference<>(); client().prepareIndex("index", "type", "1").setSource("field", 42).execute(new ActionListener<IndexResponse>() { @Override public void onResponse(IndexResponse response) { docIndexResponse.set(response); } @Override public void onFailure(Exception e) { docIndexResponse.set(e); } }); // Wait a bit to make sure that the reason why we did not get a response // is that cluster state processing is blocked and not just that it takes // time to process the indexing request Thread.sleep(100); assertThat(putMappingResponse.get(), equalTo(null)); assertThat(docIndexResponse.get(), equalTo(null)); // Now make sure the indexing request finishes successfully disruption.stopDisrupting(); assertBusy(new Runnable() { @Override public void run() { assertThat(putMappingResponse.get(), instanceOf(PutMappingResponse.class)); PutMappingResponse resp = (PutMappingResponse) putMappingResponse.get(); assertTrue(resp.isAcknowledged()); assertThat(docIndexResponse.get(), instanceOf(IndexResponse.class)); IndexResponse docResp = (IndexResponse) docIndexResponse.get(); assertEquals(Arrays.toString(docResp.getShardInfo().getFailures()), 1, docResp.getShardInfo().getTotal()); } }); } public void testDelayedMappingPropagationOnReplica() throws Exception { // This is essentially the same thing as testDelayedMappingPropagationOnPrimary // but for replicas // Here we want to test that everything goes well if the mappings that // are needed for a document are not available on the replica at the // time of indexing it final List<String> nodeNames = internalCluster().startNodes(2, Settings.builder() .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // explicitly set so it won't default to publish timeout .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait post commit as we are blocking things by design .build()); assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut()); final String master = internalCluster().getMasterName(); assertThat(nodeNames, hasItem(master)); String otherNode = null; for (String node : nodeNames) { if (node.equals(master) == false) { otherNode = node; break; } } assertNotNull(otherNode); // Force allocation of the primary on the master node by first only allocating on the master // and then allowing all nodes so that the replica gets allocated on the other node assertAcked(prepareCreate("index").setSettings(Settings.builder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) .put("index.routing.allocation.include._name", master)).get()); assertAcked(client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder() .put("index.routing.allocation.include._name", "")).get()); ensureGreen(); // Check routing tables ClusterState state = client().admin().cluster().prepareState().get().getState(); assertEquals(master, state.nodes().getMasterNode().getName()); List<ShardRouting> shards = state.routingTable().allShards("index"); assertThat(shards, hasSize(2)); for (ShardRouting shard : shards) { if (shard.primary()) { // primary must be on the master assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId()); } else { assertTrue(shard.active()); } } // Block cluster state processing on the replica BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random()); internalCluster().setDisruptionScheme(disruption); disruption.startDisrupting(); final AtomicReference<Object> putMappingResponse = new AtomicReference<>(); client().admin().indices().preparePutMapping("index").setType("type").setSource("field", "type=long").execute(new ActionListener<PutMappingResponse>() { @Override public void onResponse(PutMappingResponse response) { putMappingResponse.set(response); } @Override public void onFailure(Exception e) { putMappingResponse.set(e); } }); final Index index = resolveIndex("index"); // Wait for mappings to be available on master assertBusy(new Runnable() { @Override public void run() { final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master); final IndexService indexService = indicesService.indexServiceSafe(index); assertNotNull(indexService); final MapperService mapperService = indexService.mapperService(); DocumentMapper mapper = mapperService.documentMapper("type"); assertNotNull(mapper); assertNotNull(mapper.mappers().getMapper("field")); } }); final AtomicReference<Object> docIndexResponse = new AtomicReference<>(); client().prepareIndex("index", "type", "1").setSource("field", 42).execute(new ActionListener<IndexResponse>() { @Override public void onResponse(IndexResponse response) { docIndexResponse.set(response); } @Override public void onFailure(Exception e) { docIndexResponse.set(e); } }); // Wait for document to be indexed on primary assertBusy(new Runnable() { @Override public void run() { assertTrue(client().prepareGet("index", "type", "1").setPreference("_primary").get().isExists()); } }); // The mappings have not been propagated to the replica yet as a consequence the document count not be indexed // We wait on purpose to make sure that the document is not indexed because the shard operation is stalled // and not just because it takes time to replicate the indexing request to the replica Thread.sleep(100); assertThat(putMappingResponse.get(), equalTo(null)); assertThat(docIndexResponse.get(), equalTo(null)); // Now make sure the indexing request finishes successfully disruption.stopDisrupting(); assertBusy(new Runnable() { @Override public void run() { assertThat(putMappingResponse.get(), instanceOf(PutMappingResponse.class)); PutMappingResponse resp = (PutMappingResponse) putMappingResponse.get(); assertTrue(resp.isAcknowledged()); assertThat(docIndexResponse.get(), instanceOf(IndexResponse.class)); IndexResponse docResp = (IndexResponse) docIndexResponse.get(); assertEquals(Arrays.toString(docResp.getShardInfo().getFailures()), 2, docResp.getShardInfo().getTotal()); // both shards should have succeeded } }); } }