/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile; import org.apache.hadoop.util.StringUtils; import org.junit.Test; /** * This tests data recovery mode for the NameNode. */ public class TestNameNodeRecovery { private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class); private static StartupOption recoverStartOpt = StartupOption.RECOVER; static { recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL); } static interface Corruptor { public void corrupt(File editFile) throws IOException; public boolean fatalCorruption(); } static class TruncatingCorruptor implements Corruptor { @Override public void corrupt(File editFile) throws IOException { // Corrupt the last edit long fileLen = editFile.length(); RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.setLength(fileLen - 1); rwf.close(); } @Override public boolean fatalCorruption() { return true; } } static final void pad(RandomAccessFile rwf, byte b, int amt) throws IOException { byte buf[] = new byte[1024]; for (int i = 0; i < buf.length; i++) { buf[i] = 0; } while (amt > 0) { int len = (amt < buf.length) ? amt : buf.length; rwf.write(buf, 0, len); amt -= len; } } static class PaddingCorruptor implements Corruptor { @Override public void corrupt(File editFile) throws IOException { // Add junk to the end of the file RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.seek(editFile.length()); pad(rwf, (byte)0, 2098176); rwf.write(0x44); rwf.close(); } @Override public boolean fatalCorruption() { return true; } } static class SafePaddingCorruptor implements Corruptor { private byte padByte; public SafePaddingCorruptor(byte padByte) { this.padByte = padByte; assert ((this.padByte == 0) || (this.padByte == -1)); } @Override public void corrupt(File editFile) throws IOException { // Add junk to the end of the file RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); rwf.seek(editFile.length()); rwf.write((byte)-1); pad(rwf, padByte, 2098176); rwf.close(); } @Override public boolean fatalCorruption() { return false; } } static void testNameNodeRecoveryImpl(Corruptor corruptor) throws IOException { final String TEST_PATH = "/test/path/dir"; final String TEST_PATH2 = "/alt/test/path"; // Start up the mini dfs cluster Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_TOLERATION_LENGTH_KEY, -1); MiniDFSCluster cluster; cluster = new MiniDFSCluster(0, conf, 0, true, true, false, StartupOption.FORMAT, null, null, null); cluster.waitActive(); FileSystem fileSys = cluster.getFileSystem(); fileSys.mkdirs(new Path(TEST_PATH)); fileSys.mkdirs(new Path(TEST_PATH2)); List<File> nameEditsDirs = (List<File>)FSNamesystem.getNamespaceEditsDirs(conf); cluster.shutdown(); File dir = nameEditsDirs.get(0); //has only one File editFile = new File(new File(dir, "current"), NameNodeFile.EDITS.getName()); assertTrue("Should exist: " + editFile, editFile.exists()); corruptor.corrupt(editFile); // Check how our corruption affected NameNode startup. try { LOG.debug("trying to start normally (this should fail)..."); cluster = new MiniDFSCluster(0, conf, 0, false, true, false, StartupOption.REGULAR, null, null, null); cluster.waitActive(); if (corruptor.fatalCorruption()) { fail("expected the truncated edit log to prevent normal startup"); } } catch (IOException e) { if (!corruptor.fatalCorruption()) { fail("expected to be able to start up normally, but couldn't."); } } finally { cluster.shutdown(); } // Perform recovery try { LOG.debug("running recovery..."); cluster = new MiniDFSCluster(0, conf, 0, false, true, false, StartupOption.RECOVER, null, null, null); cluster.waitActive(); } catch (IOException e) { fail("caught IOException while trying to recover. " + "message was " + e.getMessage() + "\nstack trace\n" + StringUtils.stringifyException(e)); } finally { cluster.shutdown(); } // Make sure that we can start the cluster normally after recovery try { cluster = new MiniDFSCluster(0, conf, 0, false, true, false, StartupOption.REGULAR, null, null, null); cluster.waitActive(); assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH))); } catch (IOException e) { fail("failed to recover. Error message: " + e.getMessage()); } finally { cluster.shutdown(); } } /** Test that we can successfully recover from a situation where the last * entry in the edit log has been truncated. */ @Test(timeout=180000) public void testRecoverTruncatedEditLog() throws IOException { testNameNodeRecoveryImpl(new TruncatingCorruptor()); LOG.debug("testRecoverTruncatedEditLog: successfully recovered the " + "truncated edit log"); } /** Test that we can successfully recover from a situation where garbage * bytes have been added to the end of the file. */ @Test(timeout=180000) public void testRecoverPaddedEditLog() throws IOException { testNameNodeRecoveryImpl(new PaddingCorruptor()); LOG.debug("testRecoverPaddedEditLog: successfully recovered the " + "padded edit log"); } /** Test that we can successfully recover from a situation where 0 * bytes have been added to the end of the file. */ @Test(timeout=180000) public void testRecoverZeroPaddedEditLog() throws IOException { testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)0)); } /** Test that we can successfully recover from a situation where -1 * bytes have been added to the end of the file. */ @Test(timeout=180000) public void testRecoverNegativeOnePaddedEditLog() throws IOException { testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte)-1)); } }