/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.spy; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.HashSet; import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.util.StringUtils; import org.junit.Test; import com.google.common.collect.Sets; /** * This tests data recovery mode for the NameNode. */ public class TestNameNodeRecovery { private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class); private static final StartupOption recoverStartOpt = StartupOption.RECOVER; private static final File TEST_DIR = PathUtils.getTestDir(TestNameNodeRecovery.class); static { recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL); EditLogFileOutputStream.setShouldSkipFsyncForTesting(true); } static void runEditLogTest(EditLogTestSetup elts) throws IOException { final File TEST_LOG_NAME = new File(TEST_DIR, "test_edit_log"); final OpInstanceCache cache = new OpInstanceCache(); EditLogFileOutputStream elfos = null; EditLogFileInputStream elfis = null; try { elfos = new EditLogFileOutputStream(new Configuration(), TEST_LOG_NAME, 0); elfos.create(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION); elts.addTransactionsToLog(elfos, cache); elfos.setReadyToFlush(); elfos.flushAndSync(true); elfos.close(); elfos = null; elfis = new EditLogFileInputStream(TEST_LOG_NAME); elfis.setMaxOpSize(elts.getMaxOpSize()); // reading through normally will get you an exception Set<Long> validTxIds = elts.getValidTxIds(); FSEditLogOp op = null; long prevTxId = 0; try { while (true) { op = elfis.nextOp(); if (op == null) { break; } LOG.debug("read txid " + op.txid); if (!validTxIds.contains(op.getTransactionId())) { fail("read txid " + op.getTransactionId() + ", which we did not expect to find."); } validTxIds.remove(op.getTransactionId()); prevTxId = op.getTransactionId(); } if (elts.getLastValidTxId() != -1) { fail("failed to throw IoException as expected"); } } catch (IOException e) { if (elts.getLastValidTxId() == -1) { fail("expected all transactions to be valid, but got exception " + "on txid " + prevTxId); } else { assertEquals(prevTxId, elts.getLastValidTxId()); } } if (elts.getLastValidTxId() != -1) { // let's skip over the bad transaction op = null; prevTxId = 0; try { while (true) { op = elfis.nextValidOp(); if (op == null) { break; } prevTxId = op.getTransactionId(); assertTrue(validTxIds.remove(op.getTransactionId())); } } catch (Throwable e) { fail("caught IOException while trying to skip over bad " + "transaction. message was " + e.getMessage() + "\nstack trace\n" + StringUtils.stringifyException(e)); } } // We should have read every valid transaction. assertTrue(validTxIds.isEmpty()); } finally { IOUtils.cleanup(LOG, elfos, elfis); } } /** * A test scenario for the edit log */ private static abstract class EditLogTestSetup { /** * Set up the edit log. */ abstract public void addTransactionsToLog(EditLogOutputStream elos, OpInstanceCache cache) throws IOException; /** * Get the transaction ID right before the transaction which causes the * normal edit log loading process to bail out-- or -1 if the first * transaction should be bad. */ abstract public long getLastValidTxId(); /** * Get the transaction IDs which should exist and be valid in this * edit log. **/ abstract public Set<Long> getValidTxIds(); /** * Return the maximum opcode size we will use for input. */ public int getMaxOpSize() { return DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_DEFAULT; } } static void padEditLog(EditLogOutputStream elos, int paddingLength) throws IOException { if (paddingLength <= 0) { return; } byte buf[] = new byte[4096]; for (int i = 0; i < buf.length; i++) { buf[i] = (byte)-1; } int pad = paddingLength; while (pad > 0) { int toWrite = pad > buf.length ? buf.length : pad; elos.writeRaw(buf, 0, toWrite); pad -= toWrite; } } static void addDeleteOpcode(EditLogOutputStream elos, OpInstanceCache cache, long txId, String path) throws IOException { DeleteOp op = DeleteOp.getInstance(cache); op.setTransactionId(txId); op.setPath(path); op.setTimestamp(0); elos.write(op); } /** * Test the scenario where we have an empty edit log. * * This class is also useful in testing whether we can correctly handle * various amounts of padding bytes at the end of the log. We should be * able to handle any amount of padding (including no padding) without * throwing an exception. */ private static class EltsTestEmptyLog extends EditLogTestSetup { private final int paddingLength; public EltsTestEmptyLog(int paddingLength) { this.paddingLength = paddingLength; } public void addTransactionsToLog(EditLogOutputStream elos, OpInstanceCache cache) throws IOException { padEditLog(elos, paddingLength); } public long getLastValidTxId() { return -1; } public Set<Long> getValidTxIds() { return new HashSet<Long>(); } } /** Test an empty edit log */ @Test(timeout=180000) public void testEmptyLog() throws IOException { runEditLogTest(new EltsTestEmptyLog(0)); } /** Test an empty edit log with padding */ @Test(timeout=180000) public void testEmptyPaddedLog() throws IOException { runEditLogTest(new EltsTestEmptyLog( EditLogFileOutputStream.MIN_PREALLOCATION_LENGTH)); } /** Test an empty edit log with extra-long padding */ @Test(timeout=180000) public void testEmptyExtraPaddedLog() throws IOException { runEditLogTest(new EltsTestEmptyLog( 3 * EditLogFileOutputStream.MIN_PREALLOCATION_LENGTH)); } /** * Test using a non-default maximum opcode length. */ private static class EltsTestNonDefaultMaxOpSize extends EditLogTestSetup { public EltsTestNonDefaultMaxOpSize() { } @Override public void addTransactionsToLog(EditLogOutputStream elos, OpInstanceCache cache) throws IOException { addDeleteOpcode(elos, cache, 0, "/foo"); addDeleteOpcode(elos, cache, 1, "/supercalifragalisticexpialadocius.supercalifragalisticexpialadocius"); } @Override public long getLastValidTxId() { return 0; } @Override public Set<Long> getValidTxIds() { return Sets.newHashSet(0L); } public int getMaxOpSize() { return 40; } } /** Test an empty edit log with extra-long padding */ @Test(timeout=180000) public void testNonDefaultMaxOpSize() throws IOException { runEditLogTest(new EltsTestNonDefaultMaxOpSize()); } /** * Test the scenario where an edit log contains some padding (0xff) bytes * followed by valid opcode data. * * These edit logs are corrupt, but all the opcodes should be recoverable * with recovery mode. */ private static class EltsTestOpcodesAfterPadding extends EditLogTestSetup { private final int paddingLength; public EltsTestOpcodesAfterPadding(int paddingLength) { this.paddingLength = paddingLength; } public void addTransactionsToLog(EditLogOutputStream elos, OpInstanceCache cache) throws IOException { padEditLog(elos, paddingLength); addDeleteOpcode(elos, cache, 0, "/foo"); } public long getLastValidTxId() { return 0; } public Set<Long> getValidTxIds() { return Sets.newHashSet(0L); } } @Test(timeout=180000) public void testOpcodesAfterPadding() throws IOException { runEditLogTest(new EltsTestOpcodesAfterPadding( EditLogFileOutputStream.MIN_PREALLOCATION_LENGTH)); } @Test(timeout=180000) public void testOpcodesAfterExtraPadding() throws IOException { runEditLogTest(new EltsTestOpcodesAfterPadding( 3 * EditLogFileOutputStream.MIN_PREALLOCATION_LENGTH)); } private static class EltsTestGarbageInEditLog extends EditLogTestSetup { final private long BAD_TXID = 4; final private long MAX_TXID = 10; @Override public void addTransactionsToLog(EditLogOutputStream elos, OpInstanceCache cache) throws IOException { for (long txid = 1; txid <= MAX_TXID; txid++) { if (txid == BAD_TXID) { byte garbage[] = { 0x1, 0x2, 0x3 }; elos.writeRaw(garbage, 0, garbage.length); } else { DeleteOp op; op = DeleteOp.getInstance(cache); op.setTransactionId(txid); op.setPath("/foo." + txid); op.setTimestamp(txid); elos.write(op); } } } @Override public long getLastValidTxId() { return BAD_TXID - 1; } @Override public Set<Long> getValidTxIds() { return Sets.newHashSet(1L , 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L); } } /** Test that we can successfully recover from a situation where there is * garbage in the middle of the edit log file output stream. */ @Test(timeout=180000) public void testSkipEdit() throws IOException { runEditLogTest(new EltsTestGarbageInEditLog()); } /** * An algorithm for corrupting an edit log. */ static interface Corruptor { /* * Corrupt an edit log file. * * @param editFile The edit log file */ public void corrupt(File editFile) throws IOException; /* * Explain whether we need to read the log in recovery mode * * @param finalized True if the edit log in question is finalized. * We're a little more lax about reading unfinalized * logs. We will allow a small amount of garbage at * the end. In a finalized log, every byte must be * perfect. * * @return Whether we need to read the log in recovery mode */ public boolean needRecovery(boolean finalized); /* * Get the name of this corruptor * * @return The Corruptor name */ public String getName(); } static class TruncatingCorruptor implements Corruptor { @Override public void corrupt(File editFile) throws IOException { // Corrupt the last edit long fileLen = editFile.length(); RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.setLength(fileLen - 1); rwf.close(); } @Override public boolean needRecovery(boolean finalized) { return finalized; } @Override public String getName() { return "truncated"; } } static class PaddingCorruptor implements Corruptor { @Override public void corrupt(File editFile) throws IOException { // Add junk to the end of the file RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.seek(editFile.length()); for (int i = 0; i < 129; i++) { rwf.write((byte)0); } rwf.write(0xd); rwf.write(0xe); rwf.write(0xa); rwf.write(0xd); rwf.close(); } @Override public boolean needRecovery(boolean finalized) { // With finalized edit logs, we ignore what's at the end as long as we // can make it to the correct transaction ID. // With unfinalized edit logs, the finalization process ignores garbage // at the end. return false; } @Override public String getName() { return "padFatal"; } } static class SafePaddingCorruptor implements Corruptor { private final byte padByte; public SafePaddingCorruptor(byte padByte) { this.padByte = padByte; assert ((this.padByte == 0) || (this.padByte == -1)); } @Override public void corrupt(File editFile) throws IOException { // Add junk to the end of the file RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.seek(editFile.length()); rwf.write((byte)-1); for (int i = 0; i < 1024; i++) { rwf.write(padByte); } rwf.close(); } @Override public boolean needRecovery(boolean finalized) { return false; } @Override public String getName() { return "pad" + ((int)padByte); } } /** * Create a test configuration that will exercise the initializeGenericKeys * code path. This is a regression test for HDFS-4279. */ static void setupRecoveryTestConf(Configuration conf) throws IOException { conf.set(DFSConfigKeys.DFS_NAMESERVICES, "ns1"); conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1"); conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, "ns1"), "nn1,nn2"); String baseDir = System.getProperty( MiniDFSCluster.PROP_TEST_BUILD_DATA, "build/test/data") + "/dfs/"; File nameDir = new File(baseDir, "nameR"); File secondaryDir = new File(baseDir, "namesecondaryR"); conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys. DFS_NAMENODE_NAME_DIR_KEY, "ns1", "nn1"), nameDir.getCanonicalPath()); conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys. DFS_NAMENODE_CHECKPOINT_DIR_KEY, "ns1", "nn1"), secondaryDir.getCanonicalPath()); conf.unset(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY); conf.unset(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY); FileUtils.deleteQuietly(nameDir); if (!nameDir.mkdirs()) { throw new RuntimeException("failed to make directory " + nameDir.getAbsolutePath()); } FileUtils.deleteQuietly(secondaryDir); if (!secondaryDir.mkdirs()) { throw new RuntimeException("failed to make directory " + secondaryDir.getAbsolutePath()); } } static void testNameNodeRecoveryImpl(Corruptor corruptor, boolean finalize) throws IOException { final String TEST_PATH = "/test/path/dir"; final String TEST_PATH2 = "/second/dir"; final boolean needRecovery = corruptor.needRecovery(finalize); // start a cluster Configuration conf = new HdfsConfiguration(); setupRecoveryTestConf(conf); MiniDFSCluster cluster = null; FileSystem fileSys = null; StorageDirectory sd = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) .manageNameDfsDirs(false).build(); cluster.waitActive(); if (!finalize) { // Normally, the in-progress edit log would be finalized by // FSEditLog#endCurrentLogSegment. For testing purposes, we // disable that here. FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog()); doNothing().when(spyLog).endCurrentLogSegment(true); cluster.getNameNode().getFSImage().setEditLogForTesting(spyLog); } fileSys = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); FSImage fsimage = namesystem.getFSImage(); fileSys.mkdirs(new Path(TEST_PATH)); fileSys.mkdirs(new Path(TEST_PATH2)); sd = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS).next(); } finally { if (cluster != null) { cluster.shutdown(); } } File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile(); assertTrue("Should exist: " + editFile, editFile.exists()); // Corrupt the edit log LOG.info("corrupting edit log file '" + editFile + "'"); corruptor.corrupt(editFile); // If needRecovery == true, make sure that we can't start the // cluster normally before recovery cluster = null; try { LOG.debug("trying to start normally (this should fail)..."); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) .enableManagedDfsDirsRedundancy(false).format(false).build(); cluster.waitActive(); cluster.shutdown(); if (needRecovery) { fail("expected the corrupted edit log to prevent normal startup"); } } catch (IOException e) { if (!needRecovery) { LOG.error("Got unexpected failure with " + corruptor.getName() + corruptor, e); fail("got unexpected exception " + e.getMessage()); } } finally { if (cluster != null) { cluster.shutdown(); } } // Perform NameNode recovery. // Even if there was nothing wrong previously (needRecovery == false), // this should still work fine. cluster = null; try { LOG.debug("running recovery..."); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) .enableManagedDfsDirsRedundancy(false).format(false) .startupOption(recoverStartOpt).build(); } catch (IOException e) { fail("caught IOException while trying to recover. " + "message was " + e.getMessage() + "\nstack trace\n" + StringUtils.stringifyException(e)); } finally { if (cluster != null) { cluster.shutdown(); } } // Make sure that we can start the cluster normally after recovery cluster = null; try { LOG.debug("starting cluster normally after recovery..."); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) .enableManagedDfsDirsRedundancy(false).format(false).build(); LOG.debug("successfully recovered the " + corruptor.getName() + " corrupted edit log"); cluster.waitActive(); assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH))); } catch (IOException e) { fail("failed to recover. Error message: " + e.getMessage()); } finally { if (cluster != null) { cluster.shutdown(); } } } /** Test that we can successfully recover from a situation where the last * entry in the edit log has been truncated. */ @Test(timeout=180000) public void testRecoverTruncatedEditLog() throws IOException { testNameNodeRecoveryImpl(new TruncatingCorruptor(), true); testNameNodeRecoveryImpl(new TruncatingCorruptor(), false); } /** Test that we can successfully recover from a situation where the last * entry in the edit log has been padded with garbage. */ @Test(timeout=180000) public void testRecoverPaddedEditLog() throws IOException { testNameNodeRecoveryImpl(new PaddingCorruptor(), true); testNameNodeRecoveryImpl(new PaddingCorruptor(), false); } /** Test that don't need to recover from a situation where the last * entry in the edit log has been padded with 0. */ @Test(timeout=180000) public void testRecoverZeroPaddedEditLog() throws IOException { testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)0), true); testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)0), false); } /** Test that don't need to recover from a situation where the last * entry in the edit log has been padded with 0xff bytes. */ @Test(timeout=180000) public void testRecoverNegativeOnePaddedEditLog() throws IOException { testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)-1), true); testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)-1), false); } }