/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index; import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING; import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.hamcrest.Matchers.equalTo; import java.util.Collection; import java.util.List; import org.elasticsearch.action.index.IndexAction; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.transport.TransportService; import org.junit.Test; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableSet; /** * Test failure when index replication actions fail mid-flight */ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) @ESIntegTestCase.SuppressLocalMode public class TransportIndexFailuresIT extends ESIntegTestCase { private static final Settings nodeSettings = Settings.settingsBuilder() .put("discovery.type", "zen") // <-- To override the local setting if set externally .put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly .put("discovery.zen.fd.ping_timeout", "1") // <-- for hitting simulated network failures quickly .put(DiscoverySettings.PUBLISH_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly .put("discovery.zen.minimum_master_nodes", 1) .build(); @Override protected Collection<Class<? extends Plugin>> nodePlugins() { return pluginList(MockTransportService.TestPlugin.class); } @Override protected int numberOfShards() { return 1; } @Override protected int numberOfReplicas() { return 1; } @Test public void testNetworkPartitionDuringReplicaIndexOp() throws Exception { final String INDEX = "testidx"; List<String> nodes = internalCluster().startNodesAsync(2, nodeSettings).get(); // Create index test with 1 shard, 1 replica and ensure it is green createIndex(INDEX); ensureGreen(INDEX); // Disable allocation so the replica cannot be reallocated when it fails Settings s = Settings.builder().put("cluster.routing.allocation.enable", "none").build(); client().admin().cluster().prepareUpdateSettings().setTransientSettings(s).get(); // Determine which node holds the primary shard ClusterState state = getNodeClusterState(nodes.get(0)); IndexShardRoutingTable shard = state.getRoutingTable().index(INDEX).shard(0); String primaryNode; String replicaNode; if (shard.getShards().get(0).primary()) { primaryNode = nodes.get(0); replicaNode = nodes.get(1); } else { primaryNode = nodes.get(1); replicaNode = nodes.get(0); } logger.info("--> primary shard is on {}", primaryNode); // Index a document to make sure everything works well IndexResponse resp = internalCluster().client(primaryNode).prepareIndex(INDEX, "doc").setSource("foo", "bar").get(); assertThat("document exists on primary node", internalCluster().client(primaryNode).prepareGet(INDEX, "doc", resp.getId()).setPreference("_only_local").get().isExists(), equalTo(true)); assertThat("document exists on replica node", internalCluster().client(replicaNode).prepareGet(INDEX, "doc", resp.getId()).setPreference("_only_local").get().isExists(), equalTo(true)); // Disrupt the network so indexing requests fail to replicate logger.info("--> preventing index/replica operations"); TransportService mockTransportService = internalCluster().getInstance(TransportService.class, primaryNode); ((MockTransportService) mockTransportService).addFailToSendNoConnectRule( internalCluster().getInstance(TransportService.class, replicaNode), ImmutableSet.of(IndexAction.NAME + "[r]") ); mockTransportService = internalCluster().getInstance(TransportService.class, replicaNode); ((MockTransportService) mockTransportService).addFailToSendNoConnectRule( internalCluster().getInstance(TransportService.class, primaryNode), ImmutableSet.of(IndexAction.NAME + "[r]") ); logger.info("--> indexing into primary"); // the replica shard should now be marked as failed because the replication operation will fail resp = internalCluster().client(primaryNode).prepareIndex(INDEX, "doc").setSource("foo", "baz").get(); // wait until the cluster reaches an exact yellow state, meaning replica has failed assertBusy(new Runnable() { @Override public void run() { assertThat(client().admin().cluster().prepareHealth().get().getStatus(), equalTo(ClusterHealthStatus.YELLOW)); } }); assertThat("document should still be indexed and available", client().prepareGet(INDEX, "doc", resp.getId()).get().isExists(), equalTo(true)); state = getNodeClusterState(randomFrom(nodes.toArray(Strings.EMPTY_ARRAY))); RoutingNodes rn = state.getRoutingNodes(); logger.info("--> counts: total: {}, unassigned: {}, initializing: {}, relocating: {}, started: {}", rn.shards(new Predicate<ShardRouting>() { @Override public boolean apply(ShardRouting input) { return true; } }).size(), rn.shardsWithState(UNASSIGNED).size(), rn.shardsWithState(INITIALIZING).size(), rn.shardsWithState(RELOCATING).size(), rn.shardsWithState(STARTED).size()); logger.info("--> unassigned: {}, initializing: {}, relocating: {}, started: {}", rn.shardsWithState(UNASSIGNED), rn.shardsWithState(INITIALIZING), rn.shardsWithState(RELOCATING), rn.shardsWithState(STARTED)); assertThat("only a single shard is now active (replica should be failed and not reallocated)", rn.shardsWithState(STARTED).size(), equalTo(1)); } private ClusterState getNodeClusterState(String node) { return internalCluster().client(node).admin().cluster().prepareState().setLocal(true).get().getState(); } }