/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.snapshots; import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.hppc.IntSet; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.client.Client; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.NamedDiff; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.MetaDataIndexStateService; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Priority; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsFilter; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.node.Node; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.rest.AbstractRestChannel; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.RestResponse; import org.elasticsearch.rest.action.admin.cluster.RestClusterStateAction; import org.elasticsearch.rest.action.admin.cluster.RestGetRepositoriesAction; import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.TestCustomMetaData; import org.elasticsearch.test.rest.FakeRestRequest; import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.EnumSet; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertBlocked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.mockito.Mockito.mock; @ClusterScope(scope = Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCase { public static class TestCustomMetaDataPlugin extends Plugin { private final List<NamedWriteableRegistry.Entry> namedWritables = new ArrayList<>(); private final List<NamedXContentRegistry.Entry> namedXContents = new ArrayList<>(); public TestCustomMetaDataPlugin() { registerBuiltinWritables(); } private <T extends MetaData.Custom> void registerMetaDataCustom(String name, Writeable.Reader<T> reader, Writeable.Reader<NamedDiff> diffReader, CheckedFunction<XContentParser, T, IOException> parser) { namedWritables.add(new NamedWriteableRegistry.Entry(MetaData.Custom.class, name, reader)); namedWritables.add(new NamedWriteableRegistry.Entry(NamedDiff.class, name, diffReader)); namedXContents.add(new NamedXContentRegistry.Entry(MetaData.Custom.class, new ParseField(name), parser)); } private void registerBuiltinWritables() { registerMetaDataCustom(SnapshottableMetadata.TYPE, SnapshottableMetadata::readFrom, SnapshottableMetadata::readDiffFrom, SnapshottableMetadata::fromXContent); registerMetaDataCustom(NonSnapshottableMetadata.TYPE, NonSnapshottableMetadata::readFrom, NonSnapshottableMetadata::readDiffFrom, NonSnapshottableMetadata::fromXContent); registerMetaDataCustom(SnapshottableGatewayMetadata.TYPE, SnapshottableGatewayMetadata::readFrom, SnapshottableGatewayMetadata::readDiffFrom, SnapshottableGatewayMetadata::fromXContent); registerMetaDataCustom(NonSnapshottableGatewayMetadata.TYPE, NonSnapshottableGatewayMetadata::readFrom, NonSnapshottableGatewayMetadata::readDiffFrom, NonSnapshottableGatewayMetadata::fromXContent); registerMetaDataCustom(SnapshotableGatewayNoApiMetadata.TYPE, SnapshotableGatewayNoApiMetadata::readFrom, NonSnapshottableGatewayMetadata::readDiffFrom, SnapshotableGatewayNoApiMetadata::fromXContent); } @Override public List<NamedWriteableRegistry.Entry> getNamedWriteables() { return namedWritables; } @Override public List<NamedXContentRegistry.Entry> getNamedXContent() { return namedXContents; } } @Override protected Collection<Class<? extends Plugin>> nodePlugins() { return Arrays.asList(MockRepository.Plugin.class, TestCustomMetaDataPlugin.class); } public void testRestorePersistentSettings() throws Exception { logger.info("--> start 2 nodes"); internalCluster().startNode(); Client client = client(); String secondNode = internalCluster().startNode(); logger.info("--> wait for the second node to join the cluster"); assertThat(client.admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut(), equalTo(false)); logger.info("--> set test persistent setting"); client.admin().cluster().prepareUpdateSettings().setPersistentSettings( Settings.builder() .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2)) .execute().actionGet(); assertThat(client.admin().cluster().prepareState().setRoutingTable(false).setNodes(false).execute().actionGet().getState() .getMetaData().persistentSettings().getAsInt(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), -1), equalTo(2)); logger.info("--> create repository"); PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", randomRepoPath())).execute().actionGet(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); logger.info("--> start snapshot"); CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).execute().actionGet(); assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), equalTo(0)); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(0)); assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").execute().actionGet().getSnapshots().get(0).state(), equalTo(SnapshotState.SUCCESS)); logger.info("--> clean the test persistent setting"); client.admin().cluster().prepareUpdateSettings().setPersistentSettings( Settings.builder() .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 1)) .execute().actionGet(); assertThat(client.admin().cluster().prepareState().setRoutingTable(false).setNodes(false).execute().actionGet().getState() .getMetaData().persistentSettings().getAsInt(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), -1), equalTo(1)); stopNode(secondNode); assertThat(client.admin().cluster().prepareHealth().setWaitForNodes("1").get().isTimedOut(), equalTo(false)); logger.info("--> restore snapshot"); try { client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setRestoreGlobalState(true).setWaitForCompletion(true).execute().actionGet(); fail("can't restore minimum master nodes"); } catch (IllegalArgumentException ex) { assertEquals("illegal value can't update [discovery.zen.minimum_master_nodes] from [1] to [2]", ex.getMessage()); assertEquals("cannot set discovery.zen.minimum_master_nodes to more than the current master nodes count [1]", ex.getCause().getMessage()); } logger.info("--> ensure that zen discovery minimum master nodes wasn't restored"); assertThat(client.admin().cluster().prepareState().setRoutingTable(false).setNodes(false).execute().actionGet().getState() .getMetaData().persistentSettings().getAsInt(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), -1), not(equalTo(2))); } public void testRestoreCustomMetadata() throws Exception { Path tempDir = randomRepoPath(); logger.info("--> start node"); internalCluster().startNode(); Client client = client(); createIndex("test-idx"); logger.info("--> add custom persistent metadata"); updateClusterState(new ClusterStateUpdater() { @Override public ClusterState execute(ClusterState currentState) throws Exception { ClusterState.Builder builder = ClusterState.builder(currentState); MetaData.Builder metadataBuilder = MetaData.builder(currentState.metaData()); metadataBuilder.putCustom(SnapshottableMetadata.TYPE, new SnapshottableMetadata("before_snapshot_s")); metadataBuilder.putCustom(NonSnapshottableMetadata.TYPE, new NonSnapshottableMetadata("before_snapshot_ns")); metadataBuilder.putCustom(SnapshottableGatewayMetadata.TYPE, new SnapshottableGatewayMetadata("before_snapshot_s_gw")); metadataBuilder.putCustom(NonSnapshottableGatewayMetadata.TYPE, new NonSnapshottableGatewayMetadata("before_snapshot_ns_gw")); metadataBuilder.putCustom(SnapshotableGatewayNoApiMetadata.TYPE, new SnapshotableGatewayNoApiMetadata("before_snapshot_s_gw_noapi")); builder.metaData(metadataBuilder); return builder.build(); } }); logger.info("--> create repository"); PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", tempDir)).execute().actionGet(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); logger.info("--> start snapshot"); CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).execute().actionGet(); assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), greaterThan(0)); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().successfulShards())); assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").execute().actionGet().getSnapshots().get(0).state(), equalTo(SnapshotState.SUCCESS)); logger.info("--> change custom persistent metadata"); updateClusterState(new ClusterStateUpdater() { @Override public ClusterState execute(ClusterState currentState) throws Exception { ClusterState.Builder builder = ClusterState.builder(currentState); MetaData.Builder metadataBuilder = MetaData.builder(currentState.metaData()); if (randomBoolean()) { metadataBuilder.putCustom(SnapshottableMetadata.TYPE, new SnapshottableMetadata("after_snapshot_s")); } else { metadataBuilder.removeCustom(SnapshottableMetadata.TYPE); } metadataBuilder.putCustom(NonSnapshottableMetadata.TYPE, new NonSnapshottableMetadata("after_snapshot_ns")); if (randomBoolean()) { metadataBuilder.putCustom(SnapshottableGatewayMetadata.TYPE, new SnapshottableGatewayMetadata("after_snapshot_s_gw")); } else { metadataBuilder.removeCustom(SnapshottableGatewayMetadata.TYPE); } metadataBuilder.putCustom(NonSnapshottableGatewayMetadata.TYPE, new NonSnapshottableGatewayMetadata("after_snapshot_ns_gw")); metadataBuilder.removeCustom(SnapshotableGatewayNoApiMetadata.TYPE); builder.metaData(metadataBuilder); return builder.build(); } }); logger.info("--> delete repository"); assertAcked(client.admin().cluster().prepareDeleteRepository("test-repo")); logger.info("--> create repository"); putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo-2") .setType("fs").setSettings(Settings.builder().put("location", tempDir)).execute().actionGet(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); logger.info("--> restore snapshot"); client.admin().cluster().prepareRestoreSnapshot("test-repo-2", "test-snap").setRestoreGlobalState(true).setIndices("-*").setWaitForCompletion(true).execute().actionGet(); logger.info("--> make sure old repository wasn't restored"); assertThrows(client.admin().cluster().prepareGetRepositories("test-repo"), RepositoryMissingException.class); assertThat(client.admin().cluster().prepareGetRepositories("test-repo-2").get().repositories().size(), equalTo(1)); logger.info("--> check that custom persistent metadata was restored"); ClusterState clusterState = client.admin().cluster().prepareState().get().getState(); logger.info("Cluster state: {}", clusterState); MetaData metaData = clusterState.getMetaData(); assertThat(((SnapshottableMetadata) metaData.custom(SnapshottableMetadata.TYPE)).getData(), equalTo("before_snapshot_s")); assertThat(((NonSnapshottableMetadata) metaData.custom(NonSnapshottableMetadata.TYPE)).getData(), equalTo("after_snapshot_ns")); assertThat(((SnapshottableGatewayMetadata) metaData.custom(SnapshottableGatewayMetadata.TYPE)).getData(), equalTo("before_snapshot_s_gw")); assertThat(((NonSnapshottableGatewayMetadata) metaData.custom(NonSnapshottableGatewayMetadata.TYPE)).getData(), equalTo("after_snapshot_ns_gw")); logger.info("--> restart all nodes"); internalCluster().fullRestart(); ensureYellow(); logger.info("--> check that gateway-persistent custom metadata survived full cluster restart"); clusterState = client().admin().cluster().prepareState().get().getState(); logger.info("Cluster state: {}", clusterState); metaData = clusterState.getMetaData(); assertThat(metaData.custom(SnapshottableMetadata.TYPE), nullValue()); assertThat(metaData.custom(NonSnapshottableMetadata.TYPE), nullValue()); assertThat(((SnapshottableGatewayMetadata) metaData.custom(SnapshottableGatewayMetadata.TYPE)).getData(), equalTo("before_snapshot_s_gw")); assertThat(((NonSnapshottableGatewayMetadata) metaData.custom(NonSnapshottableGatewayMetadata.TYPE)).getData(), equalTo("after_snapshot_ns_gw")); // Shouldn't be returned as part of API response assertThat(metaData.custom(SnapshotableGatewayNoApiMetadata.TYPE), nullValue()); // But should still be in state metaData = internalCluster().getInstance(ClusterService.class).state().metaData(); assertThat(((SnapshotableGatewayNoApiMetadata) metaData.custom(SnapshotableGatewayNoApiMetadata.TYPE)).getData(), equalTo("before_snapshot_s_gw_noapi")); } private void updateClusterState(final ClusterStateUpdater updater) throws InterruptedException { final CountDownLatch countDownLatch = new CountDownLatch(1); final ClusterService clusterService = internalCluster().getInstance(ClusterService.class); clusterService.submitStateUpdateTask("test", new ClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) throws Exception { return updater.execute(currentState); } @Override public void onFailure(String source, @Nullable Exception e) { countDownLatch.countDown(); } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { countDownLatch.countDown(); } }); countDownLatch.await(); } private interface ClusterStateUpdater { ClusterState execute(ClusterState currentState) throws Exception; } public void testSnapshotDuringNodeShutdown() throws Exception { logger.info("--> start 2 nodes"); Client client = client(); assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_shards", 2).put("number_of_replicas", 0))); ensureGreen(); logger.info("--> indexing some data"); for (int i = 0; i < 100; i++) { index("test-idx", "doc", Integer.toString(i), "foo", "bar" + i); } refresh(); assertThat(client.prepareSearch("test-idx").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); logger.info("--> create repository"); logger.info("--> creating repository"); PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo") .setType("mock").setSettings( Settings.builder() .put("location", randomRepoPath()) .put("random", randomAlphaOfLength(10)) .put("wait_after_unblock", 200) ).get(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); // Pick one node and block it String blockedNode = blockNodeWithIndex("test-repo", "test-idx"); logger.info("--> snapshot"); client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get(); logger.info("--> waiting for block to kick in"); waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60)); logger.info("--> execution was blocked on node [{}], shutting it down", blockedNode); unblockNode("test-repo", blockedNode); logger.info("--> stopping node [{}]", blockedNode); stopNode(blockedNode); logger.info("--> waiting for completion"); SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60)); logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size()); logger.info("--> done"); } public void testSnapshotWithStuckNode() throws Exception { logger.info("--> start 2 nodes"); ArrayList<String> nodes = new ArrayList<>(); nodes.add(internalCluster().startNode()); nodes.add(internalCluster().startNode()); Client client = client(); assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_shards", 2).put("number_of_replicas", 0))); ensureGreen(); logger.info("--> indexing some data"); for (int i = 0; i < 100; i++) { index("test-idx", "doc", Integer.toString(i), "foo", "bar" + i); } refresh(); assertThat(client.prepareSearch("test-idx").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); logger.info("--> creating repository"); Path repo = randomRepoPath(); PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo") .setType("mock").setSettings( Settings.builder() .put("location", repo) .put("random", randomAlphaOfLength(10)) .put("wait_after_unblock", 200) ).get(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); // Pick one node and block it String blockedNode = blockNodeWithIndex("test-repo", "test-idx"); // Remove it from the list of available nodes nodes.remove(blockedNode); int numberOfFilesBeforeSnapshot = numberOfFiles(repo); logger.info("--> snapshot"); client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get(); logger.info("--> waiting for block to kick in"); waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60)); logger.info("--> execution was blocked on node [{}], aborting snapshot", blockedNode); ActionFuture<DeleteSnapshotResponse> deleteSnapshotResponseFuture = internalCluster().client(nodes.get(0)).admin().cluster().prepareDeleteSnapshot("test-repo", "test-snap").execute(); // Make sure that abort makes some progress Thread.sleep(100); unblockNode("test-repo", blockedNode); logger.info("--> stopping node [{}]", blockedNode); stopNode(blockedNode); try { DeleteSnapshotResponse deleteSnapshotResponse = deleteSnapshotResponseFuture.actionGet(); assertThat(deleteSnapshotResponse.isAcknowledged(), equalTo(true)); } catch (SnapshotMissingException ex) { // When master node is closed during this test, it sometime manages to delete the snapshot files before // completely stopping. In this case the retried delete snapshot operation on the new master can fail // with SnapshotMissingException } logger.info("--> making sure that snapshot no longer exists"); assertThrows(client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").execute(), SnapshotMissingException.class); // Subtract four files that will remain in the repository: // (1) index-1 // (2) index-0 (because we keep the previous version) and // (3) index-latest // (4) incompatible-snapshots assertThat("not all files were deleted during snapshot cancellation", numberOfFilesBeforeSnapshot, equalTo(numberOfFiles(repo) - 4)); logger.info("--> done"); } public void testRestoreIndexWithMissingShards() throws Exception { logger.info("--> start 2 nodes"); internalCluster().startNode(); internalCluster().startNode(); cluster().wipeIndices("_all"); logger.info("--> create an index that will have some unallocated shards"); assertAcked(prepareCreate("test-idx-some", 2, Settings.builder().put("number_of_shards", 6) .put("number_of_replicas", 0))); ensureGreen(); logger.info("--> indexing some data into test-idx-some"); for (int i = 0; i < 100; i++) { index("test-idx-some", "doc", Integer.toString(i), "foo", "bar" + i); } refresh(); assertThat(client().prepareSearch("test-idx-some").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); logger.info("--> shutdown one of the nodes"); internalCluster().stopRandomDataNode(); assertThat(client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setTimeout("1m").setWaitForNodes("<2").execute().actionGet().isTimedOut(), equalTo(false)); logger.info("--> create an index that will have all allocated shards"); assertAcked(prepareCreate("test-idx-all", 1, Settings.builder().put("number_of_shards", 6) .put("number_of_replicas", 0))); ensureGreen("test-idx-all"); logger.info("--> create an index that will be closed"); assertAcked(prepareCreate("test-idx-closed", 1, Settings.builder().put("number_of_shards", 4).put("number_of_replicas", 0))); ensureGreen("test-idx-closed"); logger.info("--> indexing some data into test-idx-all"); for (int i = 0; i < 100; i++) { index("test-idx-all", "doc", Integer.toString(i), "foo", "bar" + i); index("test-idx-closed", "doc", Integer.toString(i), "foo", "bar" + i); } refresh("test-idx-closed", "test-idx-all"); // don't refresh test-idx-some it will take 30 sec until it times out... assertThat(client().prepareSearch("test-idx-all").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); assertAcked(client().admin().indices().prepareClose("test-idx-closed")); logger.info("--> create an index that will have no allocated shards"); assertAcked(prepareCreate("test-idx-none", 1, Settings.builder().put("number_of_shards", 6) .put("index.routing.allocation.include.tag", "nowhere") .put("number_of_replicas", 0)).setWaitForActiveShards(ActiveShardCount.NONE).get()); assertTrue(client().admin().indices().prepareExists("test-idx-none").get().isExists()); logger.info("--> creating repository"); PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", randomRepoPath())).execute().actionGet(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); logger.info("--> start snapshot with default settings and closed index - should be blocked"); assertBlocked(client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1") .setIndices("test-idx-all", "test-idx-none", "test-idx-some", "test-idx-closed") .setWaitForCompletion(true), MetaDataIndexStateService.INDEX_CLOSED_BLOCK); logger.info("--> start snapshot with default settings without a closed index - should fail"); CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1") .setIndices("test-idx-all", "test-idx-none", "test-idx-some") .setWaitForCompletion(true).execute().actionGet(); assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.FAILED)); assertThat(createSnapshotResponse.getSnapshotInfo().reason(), containsString("Indices don't have primary shards")); if (randomBoolean()) { logger.info("checking snapshot completion using status"); client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-2") .setIndices("test-idx-all", "test-idx-none", "test-idx-some") .setWaitForCompletion(false).setPartial(true).execute().actionGet(); assertBusy(new Runnable() { @Override public void run() { SnapshotsStatusResponse snapshotsStatusResponse = client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap-2").get(); List<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots(); assertEquals(snapshotStatuses.size(), 1); logger.trace("current snapshot status [{}]", snapshotStatuses.get(0)); assertTrue(snapshotStatuses.get(0).getState().completed()); } }, 1, TimeUnit.MINUTES); SnapshotsStatusResponse snapshotsStatusResponse = client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap-2").get(); List<SnapshotStatus> snapshotStatuses = snapshotsStatusResponse.getSnapshots(); assertThat(snapshotStatuses.size(), equalTo(1)); SnapshotStatus snapshotStatus = snapshotStatuses.get(0); logger.info("State: [{}], Reason: [{}]", createSnapshotResponse.getSnapshotInfo().state(), createSnapshotResponse.getSnapshotInfo().reason()); assertThat(snapshotStatus.getShardsStats().getTotalShards(), equalTo(18)); assertThat(snapshotStatus.getShardsStats().getDoneShards(), lessThan(12)); assertThat(snapshotStatus.getShardsStats().getDoneShards(), greaterThan(6)); // There is slight delay between snapshot being marked as completed in the cluster state and on the file system // After it was marked as completed in the cluster state - we need to check if it's completed on the file system as well assertBusy(new Runnable() { @Override public void run() { GetSnapshotsResponse response = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap-2").get(); assertThat(response.getSnapshots().size(), equalTo(1)); SnapshotInfo snapshotInfo = response.getSnapshots().get(0); assertTrue(snapshotInfo.state().completed()); assertEquals(SnapshotState.PARTIAL, snapshotInfo.state()); } }, 1, TimeUnit.MINUTES); } else { logger.info("checking snapshot completion using wait_for_completion flag"); createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-2") .setIndices("test-idx-all", "test-idx-none", "test-idx-some") .setWaitForCompletion(true).setPartial(true).execute().actionGet(); logger.info("State: [{}], Reason: [{}]", createSnapshotResponse.getSnapshotInfo().state(), createSnapshotResponse.getSnapshotInfo().reason()); assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), equalTo(18)); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), lessThan(12)); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(6)); assertThat(client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap-2").execute().actionGet().getSnapshots().get(0).state(), equalTo(SnapshotState.PARTIAL)); } assertAcked(client().admin().indices().prepareClose("test-idx-some", "test-idx-all").execute().actionGet()); logger.info("--> restore incomplete snapshot - should fail"); assertThrows(client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setWaitForCompletion(true).execute(), SnapshotRestoreException.class); logger.info("--> restore snapshot for the index that was snapshotted completely"); RestoreSnapshotResponse restoreSnapshotResponse = client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-2").setRestoreGlobalState(false).setIndices("test-idx-all").setWaitForCompletion(true).execute().actionGet(); assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue()); assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6)); assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(6)); assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(0)); assertThat(client().prepareSearch("test-idx-all").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); logger.info("--> restore snapshot for the partial index"); cluster().wipeIndices("test-idx-some"); restoreSnapshotResponse = client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-2") .setRestoreGlobalState(false).setIndices("test-idx-some").setPartial(true).setWaitForCompletion(true).get(); assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue()); assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6)); assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), allOf(greaterThan(0), lessThan(6))); assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), greaterThan(0)); assertThat(client().prepareSearch("test-idx-some").setSize(0).get().getHits().getTotalHits(), allOf(greaterThan(0L), lessThan(100L))); logger.info("--> restore snapshot for the index that didn't have any shards snapshotted successfully"); cluster().wipeIndices("test-idx-none"); restoreSnapshotResponse = client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-2") .setRestoreGlobalState(false).setIndices("test-idx-none").setPartial(true).setWaitForCompletion(true).get(); assertThat(restoreSnapshotResponse.getRestoreInfo(), notNullValue()); assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), equalTo(6)); assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(0)); assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(6)); assertThat(client().prepareSearch("test-idx-some").setSize(0).get().getHits().getTotalHits(), allOf(greaterThan(0L), lessThan(100L))); } public void testRestoreIndexWithShardsMissingInLocalGateway() throws Exception { logger.info("--> start 2 nodes"); Settings nodeSettings = Settings.builder() .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), EnableAllocationDecider.Rebalance.NONE) .build(); internalCluster().startNode(nodeSettings); internalCluster().startNode(nodeSettings); cluster().wipeIndices("_all"); logger.info("--> create repository"); PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", randomRepoPath())).execute().actionGet(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); int numberOfShards = 6; logger.info("--> create an index that will have some unallocated shards"); assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_shards", numberOfShards) .put("number_of_replicas", 0))); ensureGreen(); logger.info("--> indexing some data into test-idx"); for (int i = 0; i < 100; i++) { index("test-idx", "doc", Integer.toString(i), "foo", "bar" + i); } refresh(); assertThat(client().prepareSearch("test-idx").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); logger.info("--> start snapshot"); assertThat(client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1").setIndices("test-idx").setWaitForCompletion(true).get().getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); logger.info("--> close the index"); assertAcked(client().admin().indices().prepareClose("test-idx")); logger.info("--> shutdown one of the nodes that should make half of the shards unavailable"); internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() { @Override public boolean clearData(String nodeName) { return true; } }); assertThat(client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setTimeout("1m").setWaitForNodes("2").execute().actionGet().isTimedOut(), equalTo(false)); logger.info("--> restore index snapshot"); assertThat(client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-1").setRestoreGlobalState(false).setWaitForCompletion(true).get().getRestoreInfo().successfulShards(), equalTo(6)); ensureGreen("test-idx"); assertThat(client().prepareSearch("test-idx").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); IntSet reusedShards = new IntHashSet(); for (RecoveryState recoveryState : client().admin().indices().prepareRecoveries("test-idx").get().shardRecoveryStates().get("test-idx")) { if (recoveryState.getIndex().reusedBytes() > 0) { reusedShards.add(recoveryState.getShardId().getId()); } } logger.info("--> check that at least half of the shards had some reuse: [{}]", reusedShards); assertThat(reusedShards.size(), greaterThanOrEqualTo(numberOfShards / 2)); } public void testRegistrationFailure() { logger.info("--> start first node"); internalCluster().startNode(); logger.info("--> start second node"); // Make sure the first node is elected as master internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false)); // Register mock repositories for (int i = 0; i < 5; i++) { client().admin().cluster().preparePutRepository("test-repo" + i) .setType("mock").setSettings(Settings.builder() .put("location", randomRepoPath())).setVerify(false).get(); } logger.info("--> make sure that properly setup repository can be registered on all nodes"); client().admin().cluster().preparePutRepository("test-repo-0") .setType("fs").setSettings(Settings.builder() .put("location", randomRepoPath())).get(); } public void testThatSensitiveRepositorySettingsAreNotExposed() throws Exception { Settings nodeSettings = Settings.builder().put().build(); logger.info("--> start two nodes"); internalCluster().startNodes(2, nodeSettings); // Register mock repositories client().admin().cluster().preparePutRepository("test-repo") .setType("mock").setSettings(Settings.builder() .put("location", randomRepoPath()) .put(MockRepository.Plugin.USERNAME_SETTING.getKey(), "notsecretusername") .put(MockRepository.Plugin.PASSWORD_SETTING.getKey(), "verysecretpassword") ).get(); NodeClient nodeClient = internalCluster().getInstance(NodeClient.class); RestGetRepositoriesAction getRepoAction = new RestGetRepositoriesAction(nodeSettings, mock(RestController.class), internalCluster().getInstance(SettingsFilter.class)); RestRequest getRepoRequest = new FakeRestRequest(); getRepoRequest.params().put("repository", "test-repo"); final CountDownLatch getRepoLatch = new CountDownLatch(1); final AtomicReference<AssertionError> getRepoError = new AtomicReference<>(); getRepoAction.handleRequest(getRepoRequest, new AbstractRestChannel(getRepoRequest, true) { @Override public void sendResponse(RestResponse response) { try { assertThat(response.content().utf8ToString(), containsString("notsecretusername")); assertThat(response.content().utf8ToString(), not(containsString("verysecretpassword"))); } catch (AssertionError ex) { getRepoError.set(ex); } getRepoLatch.countDown(); } }, nodeClient); assertTrue(getRepoLatch.await(1, TimeUnit.SECONDS)); if (getRepoError.get() != null) { throw getRepoError.get(); } RestClusterStateAction clusterStateAction = new RestClusterStateAction(nodeSettings, mock(RestController.class), internalCluster().getInstance(SettingsFilter.class)); RestRequest clusterStateRequest = new FakeRestRequest(); final CountDownLatch clusterStateLatch = new CountDownLatch(1); final AtomicReference<AssertionError> clusterStateError = new AtomicReference<>(); clusterStateAction.handleRequest(clusterStateRequest, new AbstractRestChannel(clusterStateRequest, true) { @Override public void sendResponse(RestResponse response) { try { assertThat(response.content().utf8ToString(), containsString("notsecretusername")); assertThat(response.content().utf8ToString(), not(containsString("verysecretpassword"))); } catch (AssertionError ex) { clusterStateError.set(ex); } clusterStateLatch.countDown(); } }, nodeClient); assertTrue(clusterStateLatch.await(1, TimeUnit.SECONDS)); if (clusterStateError.get() != null) { throw clusterStateError.get(); } } public void testMasterShutdownDuringSnapshot() throws Exception { logger.info("--> starting two master nodes and two data nodes"); internalCluster().startMasterOnlyNodes(2); internalCluster().startDataOnlyNodes(2); final Client client = client(); logger.info("--> creating repository"); assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder() .put("location", randomRepoPath()) .put("compress", randomBoolean()) .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES))); assertAcked(prepareCreate("test-idx", 0, Settings.builder().put("number_of_shards", between(1, 20)) .put("number_of_replicas", 0))); ensureGreen(); logger.info("--> indexing some data"); final int numdocs = randomIntBetween(10, 100); IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs]; for (int i = 0; i < builders.length; i++) { builders[i] = client().prepareIndex("test-idx", "type1", Integer.toString(i)).setSource("field1", "bar " + i); } indexRandom(true, builders); flushAndRefresh(); final int numberOfShards = getNumShards("test-idx").numPrimaries; logger.info("number of shards: {}", numberOfShards); dataNodeClient().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get(); logger.info("--> stopping master node"); internalCluster().stopCurrentMasterNode(); logger.info("--> wait until the snapshot is done"); assertBusy(() -> { GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get(); SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0); assertTrue(snapshotInfo.state().completed()); }, 1, TimeUnit.MINUTES); logger.info("--> verify that snapshot was succesful"); GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get(); SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0); assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards()); assertEquals(0, snapshotInfo.failedShards()); } /** * Tests that a shrunken index (created via the shrink APIs) and subsequently snapshotted * can be restored when the node the shrunken index was created on is no longer part of * the cluster. */ public void testRestoreShrinkIndex() throws Exception { logger.info("--> starting a master node and a data node"); internalCluster().startMasterOnlyNode(); internalCluster().startDataOnlyNode(); final Client client = client(); final String repo = "test-repo"; final String snapshot = "test-snap"; final String sourceIdx = "test-idx"; final String shrunkIdx = "test-idx-shrunk"; logger.info("--> creating repository"); assertAcked(client.admin().cluster().preparePutRepository(repo).setType("fs") .setSettings(Settings.builder().put("location", randomRepoPath()) .put("compress", randomBoolean()))); assertAcked(prepareCreate(sourceIdx, 0, Settings.builder() .put("number_of_shards", between(2, 10)).put("number_of_replicas", 0))); ensureGreen(); logger.info("--> indexing some data"); IndexRequestBuilder[] builders = new IndexRequestBuilder[randomIntBetween(10, 100)]; for (int i = 0; i < builders.length; i++) { builders[i] = client().prepareIndex(sourceIdx, "type1", Integer.toString(i)).setSource("field1", "bar " + i); } indexRandom(true, builders); flushAndRefresh(); logger.info("--> shrink the index"); assertAcked(client.admin().indices().prepareUpdateSettings(sourceIdx) .setSettings(Settings.builder().put("index.blocks.write", true)).get()); assertAcked(client.admin().indices().prepareShrinkIndex(sourceIdx, shrunkIdx).get()); logger.info("--> snapshot the shrunk index"); CreateSnapshotResponse createResponse = client.admin().cluster() .prepareCreateSnapshot(repo, snapshot) .setWaitForCompletion(true).setIndices(shrunkIdx).get(); assertEquals(SnapshotState.SUCCESS, createResponse.getSnapshotInfo().state()); logger.info("--> delete index and stop the data node"); assertAcked(client.admin().indices().prepareDelete(sourceIdx).get()); assertAcked(client.admin().indices().prepareDelete(shrunkIdx).get()); internalCluster().stopRandomDataNode(); client().admin().cluster().prepareHealth().setTimeout("30s").setWaitForNodes("1"); logger.info("--> start a new data node"); final Settings dataSettings = Settings.builder() .put(Node.NODE_NAME_SETTING.getKey(), randomAlphaOfLength(5)) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) // to get a new node id .build(); internalCluster().startDataOnlyNode(dataSettings); client().admin().cluster().prepareHealth().setTimeout("30s").setWaitForNodes("2"); logger.info("--> restore the shrunk index and ensure all shards are allocated"); RestoreSnapshotResponse restoreResponse = client().admin().cluster() .prepareRestoreSnapshot(repo, snapshot).setWaitForCompletion(true) .setIndices(shrunkIdx).get(); assertEquals(restoreResponse.getRestoreInfo().totalShards(), restoreResponse.getRestoreInfo().successfulShards()); ensureYellow(); } public static class SnapshottableMetadata extends TestCustomMetaData { public static final String TYPE = "test_snapshottable"; public SnapshottableMetadata(String data) { super(data); } @Override public String getWriteableName() { return TYPE; } public static SnapshottableMetadata readFrom(StreamInput in) throws IOException { return readFrom(SnapshottableMetadata::new, in); } public static NamedDiff<MetaData.Custom> readDiffFrom(StreamInput in) throws IOException { return readDiffFrom(TYPE, in); } public static SnapshottableMetadata fromXContent(XContentParser parser) throws IOException { return fromXContent(SnapshottableMetadata::new, parser); } @Override public EnumSet<MetaData.XContentContext> context() { return MetaData.API_AND_SNAPSHOT; } } public static class NonSnapshottableMetadata extends TestCustomMetaData { public static final String TYPE = "test_non_snapshottable"; public NonSnapshottableMetadata(String data) { super(data); } @Override public String getWriteableName() { return TYPE; } public static NonSnapshottableMetadata readFrom(StreamInput in) throws IOException { return readFrom(NonSnapshottableMetadata::new, in); } public static NamedDiff<MetaData.Custom> readDiffFrom(StreamInput in) throws IOException { return readDiffFrom(TYPE, in); } public static NonSnapshottableMetadata fromXContent(XContentParser parser) throws IOException { return fromXContent(NonSnapshottableMetadata::new, parser); } @Override public EnumSet<MetaData.XContentContext> context() { return MetaData.API_ONLY; } } public static class SnapshottableGatewayMetadata extends TestCustomMetaData { public static final String TYPE = "test_snapshottable_gateway"; public SnapshottableGatewayMetadata(String data) { super(data); } @Override public String getWriteableName() { return TYPE; } public static SnapshottableGatewayMetadata readFrom(StreamInput in) throws IOException { return readFrom(SnapshottableGatewayMetadata::new, in); } public static NamedDiff<MetaData.Custom> readDiffFrom(StreamInput in) throws IOException { return readDiffFrom(TYPE, in); } public static SnapshottableGatewayMetadata fromXContent(XContentParser parser) throws IOException { return fromXContent(SnapshottableGatewayMetadata::new, parser); } @Override public EnumSet<MetaData.XContentContext> context() { return EnumSet.of(MetaData.XContentContext.API, MetaData.XContentContext.SNAPSHOT, MetaData.XContentContext.GATEWAY); } } public static class NonSnapshottableGatewayMetadata extends TestCustomMetaData { public static final String TYPE = "test_non_snapshottable_gateway"; public NonSnapshottableGatewayMetadata(String data) { super(data); } @Override public String getWriteableName() { return TYPE; } public static NonSnapshottableGatewayMetadata readFrom(StreamInput in) throws IOException { return readFrom(NonSnapshottableGatewayMetadata::new, in); } public static NamedDiff<MetaData.Custom> readDiffFrom(StreamInput in) throws IOException { return readDiffFrom(TYPE, in); } public static NonSnapshottableGatewayMetadata fromXContent(XContentParser parser) throws IOException { return fromXContent(NonSnapshottableGatewayMetadata::new, parser); } @Override public EnumSet<MetaData.XContentContext> context() { return MetaData.API_AND_GATEWAY; } } public static class SnapshotableGatewayNoApiMetadata extends TestCustomMetaData { public static final String TYPE = "test_snapshottable_gateway_no_api"; public SnapshotableGatewayNoApiMetadata(String data) { super(data); } @Override public String getWriteableName() { return TYPE; } public static SnapshotableGatewayNoApiMetadata readFrom(StreamInput in) throws IOException { return readFrom(SnapshotableGatewayNoApiMetadata::new, in); } public static SnapshotableGatewayNoApiMetadata fromXContent(XContentParser parser) throws IOException { return fromXContent(SnapshotableGatewayNoApiMetadata::new, parser); } @Override public EnumSet<MetaData.XContentContext> context() { return EnumSet.of(MetaData.XContentContext.GATEWAY, MetaData.XContentContext.SNAPSHOT); } } }