/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.translog; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import joptsimple.OptionParser; import joptsimple.OptionSet; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.NativeFSLockFactory; import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.cli.MockTerminal; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.routing.GroupShardsIterator; import org.elasticsearch.cluster.routing.ShardIterator; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.common.Priority; import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.MockEngineFactoryPlugin; import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.engine.MockEngineSupport; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import org.elasticsearch.test.transport.MockTransportService; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.TimeUnit; import static org.elasticsearch.common.util.CollectionUtils.iterableAsArrayList; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.notNullValue; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0) public class TruncateTranslogIT extends ESIntegTestCase { @Override protected Collection<Class<? extends Plugin>> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class, MockEngineFactoryPlugin.class); } public void testCorruptTranslogTruncation() throws Exception { internalCluster().startNodes(1, Settings.EMPTY); assertAcked(prepareCreate("test").setSettings(Settings.builder() .put("index.number_of_shards", 1) .put("index.number_of_replicas", 0) .put("index.refresh_interval", "-1") .put(MockEngineSupport.DISABLE_FLUSH_ON_CLOSE.getKey(), true) // never flush - always recover from translog )); ensureYellow(); // Index some documents int numDocs = scaledRandomIntBetween(100, 1000); IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs]; for (int i = 0; i < builders.length; i++) { builders[i] = client().prepareIndex("test", "type").setSource("foo", "bar"); } disableTranslogFlush("test"); indexRandom(false, false, false, Arrays.asList(builders)); Set<Path> translogDirs = getTranslogDirs("test"); TruncateTranslogCommand ttc = new TruncateTranslogCommand(); MockTerminal t = new MockTerminal(); OptionParser parser = ttc.getParser(); for (Path translogDir : translogDirs) { OptionSet options = parser.parse("-d", translogDir.toAbsolutePath().toString(), "-b"); // Try running it before the shard is closed, it should flip out because it can't acquire the lock try { logger.info("--> running truncate while index is open on [{}]", translogDir.toAbsolutePath()); ttc.execute(t, options, null /* TODO: env should be real here, and ttc should actually use it... */); fail("expected the truncate command to fail not being able to acquire the lock"); } catch (Exception e) { assertThat(e.getMessage(), containsString("Failed to lock shard's directory")); } } // Corrupt the translog file(s) logger.info("--> corrupting translog"); corruptRandomTranslogFiles("test"); // Restart the single node logger.info("--> restarting node"); internalCluster().fullRestart(); client().admin().cluster().prepareHealth().setWaitForYellowStatus() .setTimeout(new TimeValue(1000, TimeUnit.MILLISECONDS)) .setWaitForEvents(Priority.LANGUID) .get(); try { client().prepareSearch("test").setQuery(matchAllQuery()).get(); fail("all shards should be failed due to a corrupted translog"); } catch (SearchPhaseExecutionException e) { // Good, all shards should be failed because there is only a // single shard and its translog is corrupt } // Close the index so we can actually truncate the translog logger.info("--> closing 'test' index"); client().admin().indices().prepareClose("test").get(); for (Path translogDir : translogDirs) { final Path idxLocation = translogDir.getParent().resolve("index"); assertBusy(() -> { logger.info("--> checking that lock has been released for {}", idxLocation); try (Directory dir = FSDirectory.open(idxLocation, NativeFSLockFactory.INSTANCE); Lock writeLock = dir.obtainLock(IndexWriter.WRITE_LOCK_NAME)) { // Great, do nothing, we just wanted to obtain the lock } catch (LockObtainFailedException lofe) { logger.info("--> failed acquiring lock for {}", idxLocation); fail("still waiting for lock release at [" + idxLocation + "]"); } catch (IOException ioe) { fail("Got an IOException: " + ioe); } }); OptionSet options = parser.parse("-d", translogDir.toAbsolutePath().toString(), "-b"); logger.info("--> running truncate translog command for [{}]", translogDir.toAbsolutePath()); ttc.execute(t, options, null /* TODO: env should be real here, and ttc should actually use it... */); logger.info("--> output:\n{}", t.getOutput()); } // Re-open index logger.info("--> opening 'test' index"); client().admin().indices().prepareOpen("test").get(); ensureYellow("test"); // Run a search and make sure it succeeds SearchResponse resp = client().prepareSearch("test").setQuery(matchAllQuery()).get(); ElasticsearchAssertions.assertNoFailures(resp); } private Set<Path> getTranslogDirs(String indexName) throws IOException { ClusterState state = client().admin().cluster().prepareState().get().getState(); GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[]{indexName}, false); final Index idx = state.metaData().index(indexName).getIndex(); List<ShardIterator> iterators = iterableAsArrayList(shardIterators); ShardIterator shardIterator = RandomPicks.randomFrom(random(), iterators); ShardRouting shardRouting = shardIterator.nextOrNull(); assertNotNull(shardRouting); assertTrue(shardRouting.primary()); assertTrue(shardRouting.assignedToNode()); String nodeId = shardRouting.currentNodeId(); NodesStatsResponse nodeStatses = client().admin().cluster().prepareNodesStats(nodeId).setFs(true).get(); Set<Path> translogDirs = new TreeSet<>(); // treeset makes sure iteration order is deterministic for (FsInfo.Path fsPath : nodeStatses.getNodes().get(0).getFs()) { String path = fsPath.getPath(); final String relativeDataLocationPath = "indices/"+ idx.getUUID() +"/" + Integer.toString(shardRouting.getId()) + "/translog"; Path translogPath = PathUtils.get(path).resolve(relativeDataLocationPath); if (Files.isDirectory(translogPath)) { translogDirs.add(translogPath); } } return translogDirs; } private void corruptRandomTranslogFiles(String indexName) throws IOException { Set<Path> translogDirs = getTranslogDirs(indexName); Set<Path> files = new TreeSet<>(); // treeset makes sure iteration order is deterministic for (Path translogDir : translogDirs) { if (Files.isDirectory(translogDir)) { logger.info("--> path: {}", translogDir); try (DirectoryStream<Path> stream = Files.newDirectoryStream(translogDir)) { for (Path item : stream) { logger.info("--> File: {}", item); if (Files.isRegularFile(item) && item.getFileName().toString().startsWith("translog-")) { files.add(item); } } } } } Path fileToCorrupt = null; if (!files.isEmpty()) { int corruptions = randomIntBetween(5, 20); for (int i = 0; i < corruptions; i++) { fileToCorrupt = RandomPicks.randomFrom(random(), files); try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) { // read raf.position(randomIntBetween(0, (int) Math.min(Integer.MAX_VALUE, raf.size() - 1))); long filePointer = raf.position(); ByteBuffer bb = ByteBuffer.wrap(new byte[1]); raf.read(bb); bb.flip(); // corrupt byte oldValue = bb.get(0); byte newValue = (byte) (oldValue + 1); bb.put(0, newValue); // rewrite raf.position(filePointer); raf.write(bb); logger.info("--> corrupting file {} -- flipping at position {} from {} to {} file: {}", fileToCorrupt, filePointer, Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt); } } } assertThat("no file corrupted", fileToCorrupt, notNullValue()); } /** Disables translog flushing for the specified index */ private static void disableTranslogFlush(String index) { Settings settings = Settings.builder() .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(1, ByteSizeUnit.PB)) .build(); client().admin().indices().prepareUpdateSettings(index).setSettings(settings).get(); } }