/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.recovery; import org.apache.lucene.util.English; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.indices.recovery.PeerRecoveryTargetService; import org.elasticsearch.indices.recovery.RecoveryFileChunkRequest; import org.elasticsearch.node.RecoverySettingsChunkSizePlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.Transport; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportRequestOptions; import org.elasticsearch.transport.TransportService; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.node.RecoverySettingsChunkSizePlugin.CHUNK_SIZE_SETTING; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.hamcrest.Matchers.greaterThanOrEqualTo; @ESIntegTestCase.ClusterScope(numDataNodes = 2, numClientNodes = 0, scope = ESIntegTestCase.Scope.TEST) @SuppressCodecs("*") // test relies on exact file extensions public class TruncatedRecoveryIT extends ESIntegTestCase { @Override protected Collection<Class<? extends Plugin>> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class, RecoverySettingsChunkSizePlugin.class); } /** * This test tries to truncate some of larger files in the index to trigger leftovers on the recovery * target. This happens during recovery when the last chunk of the file is transferred to the replica * we just throw an exception to make sure the recovery fails and we leave some half baked files on the target. * Later we allow full recovery to ensure we can still recover and don't run into corruptions. */ public void testCancelRecoveryAndResume() throws Exception { assertTrue(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder() .put(CHUNK_SIZE_SETTING.getKey(), new ByteSizeValue(randomIntBetween(50, 300), ByteSizeUnit.BYTES))) .get().isAcknowledged()); NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get(); List<NodeStats> dataNodeStats = new ArrayList<>(); for (NodeStats stat : nodeStats.getNodes()) { if (stat.getNode().isDataNode()) { dataNodeStats.add(stat); } } assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2)); Collections.shuffle(dataNodeStats, random()); // we use 2 nodes a lucky and unlucky one // the lucky one holds the primary // the unlucky one gets the replica and the truncated leftovers NodeStats primariesNode = dataNodeStats.get(0); NodeStats unluckyNode = dataNodeStats.get(1); // create the index and prevent allocation on any other nodes than the lucky one // we have no replicas so far and make sure that we allocate the primary on the lucky node assertAcked(prepareCreate("test") .addMapping("type1", "field1", "type=text", "the_id", "type=text") .setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numberOfShards()) .put("index.routing.allocation.include._name", primariesNode.getNode().getName()))); // only allocate on the lucky node // index some docs and check if they are coming back int numDocs = randomIntBetween(100, 200); List<IndexRequestBuilder> builder = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { String id = Integer.toString(i); builder.add(client().prepareIndex("test", "type1", id).setSource("field1", English.intToEnglish(i), "the_id", id)); } indexRandom(true, builder); for (int i = 0; i < numDocs; i++) { String id = Integer.toString(i); assertHitCount(client().prepareSearch().setQuery(QueryBuilders.termQuery("the_id", id)).get(), 1); } ensureGreen(); // ensure we have flushed segments and make them a big one via optimize client().admin().indices().prepareFlush().setForce(true).get(); client().admin().indices().prepareForceMerge().setMaxNumSegments(1).setFlush(true).get(); final CountDownLatch latch = new CountDownLatch(1); final AtomicBoolean truncate = new AtomicBoolean(true); for (NodeStats dataNode : dataNodeStats) { MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, dataNode.getNode().getName())); mockTransportService.addDelegate(internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()), new MockTransportService.DelegateTransport(mockTransportService.original()) { @Override protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException { if (action.equals(PeerRecoveryTargetService.Actions.FILE_CHUNK)) { RecoveryFileChunkRequest req = (RecoveryFileChunkRequest) request; logger.debug("file chunk [{}] lastChunk: {}", req, req.lastChunk()); if ((req.name().endsWith("cfs") || req.name().endsWith("fdt")) && req.lastChunk() && truncate.get()) { latch.countDown(); throw new RuntimeException("Caused some truncated files for fun and profit"); } } super.sendRequest(connection, requestId, action, request, options); } }); } logger.info("--> bumping replicas to 1"); // client().admin().indices().prepareUpdateSettings("test").setSettings(Settings.builder() .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) .put("index.routing.allocation.include._name", // now allow allocation on all nodes primariesNode.getNode().getName() + "," + unluckyNode.getNode().getName())).get(); latch.await(); // at this point we got some truncated left overs on the replica on the unlucky node // now we are allowing the recovery to allocate again and finish to see if we wipe the truncated files truncate.compareAndSet(true, false); ensureGreen("test"); for (int i = 0; i < numDocs; i++) { String id = Integer.toString(i); assertHitCount(client().prepareSearch().setQuery(QueryBuilders.termQuery("the_id", id)).get(), 1); } } }