/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.*;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Test;
/**
* Test the name node's ability to recover from partially corrupted storage
* directories.
*/
public class TestNameNodeCorruptionRecovery {
private static final Log LOG = LogFactory.getLog(
TestNameNodeCorruptionRecovery.class);
private MiniDFSCluster cluster;
@After
public void tearDownCluster() {
if (cluster != null) {
cluster.shutdown();
}
}
/**
* Test that a corrupted fstime file in a single storage directory does not
* prevent the NN from starting up.
*/
@Test
public void testFsTimeFileCorrupt() throws IOException, InterruptedException {
cluster = new MiniDFSCluster(new Configuration(), 0, true, null);
cluster.waitActive();
assertEquals(cluster.getNameDirs().size(), 2);
// Get the first fstime file and truncate it.
truncateStorageDirFile(cluster, NameNodeFile.TIME, 0);
// Make sure we can start up despite the fact the fstime file is corrupted.
cluster.restartNameNode();
}
/**
* Tests that a cluster's image is not damaged if checkpoint fails after
* writing checkpoint time to the image directory but before writing checkpoint
* time to the edits directory. This is a very rare failure scenario that can
* only occur if the namenode is configured with separate directories for image
* and edits. This test simulates the failure by forcing the fstime file for
* edits to contain 0, so that it appears the checkpoint time for edits is less
* than the checkpoint time for image.
*/
@Test
public void testEditsFsTimeLessThanImageFsTime() throws Exception {
// Create a cluster with separate directories for image and edits.
Configuration conf = new Configuration();
File testDir = new File(System.getProperty("test.build.data",
"build/test/data"), "dfs/");
conf.set("dfs.name.dir", new File(testDir, "name").getPath());
conf.set("dfs.name.edits.dir", new File(testDir, "edits").getPath());
cluster = new MiniDFSCluster(0, conf, 1, true, false, true, null, null, null,
null);
cluster.waitActive();
// Create several files to generate some edits.
createFile("one");
createFile("two");
createFile("three");
assertTrue(checkFileExists("one"));
assertTrue(checkFileExists("two"));
assertTrue(checkFileExists("three"));
// Restart to force a checkpoint.
cluster.restartNameNode();
// Shutdown so that we can safely modify the fstime file.
File[] editsFsTime = cluster.getNameNode().getFSImage().getFileNames(
NameNodeFile.TIME, NameNodeDirType.EDITS);
assertTrue("expected exactly one edits directory containing fstime file",
editsFsTime.length == 1);
cluster.shutdown();
// Write 0 into the fstime file for the edits directory.
FileOutputStream fos = null;
DataOutputStream dos = null;
try {
fos = new FileOutputStream(editsFsTime[0]);
dos = new DataOutputStream(fos);
dos.writeLong(0);
} finally {
IOUtils.cleanup(LOG, dos, fos);
}
// Restart to force another checkpoint, which should discard the old edits.
cluster = new MiniDFSCluster(0, conf, 1, false, false, true, null, null,
null, null);
cluster.waitActive();
// Restart one more time. If all of the prior checkpoints worked correctly,
// then we expect to load the image successfully and find the files.
cluster.restartNameNode();
assertTrue(checkFileExists("one"));
assertTrue(checkFileExists("two"));
assertTrue(checkFileExists("three"));
}
/**
* Checks that a file exists in the cluster.
*
* @param file String name of file to check
* @return boolean true if file exists
* @throws IOException thrown if there is an I/O error
*/
private boolean checkFileExists(String file) throws IOException {
return cluster.getFileSystem().exists(new Path(file));
}
/**
* Creates a new, empty file in the cluster.
*
* @param file String name of file to create
* @throws IOException thrown if there is an I/O error
*/
private void createFile(String file) throws IOException {
cluster.getFileSystem().create(new Path(file)).close();
}
private static void truncateStorageDirFile(MiniDFSCluster cluster,
NameNodeFile f, int storageDirIndex) throws IOException {
File currentDir = cluster.getNameNode().getFSImage()
.getStorageDir(storageDirIndex).getCurrentDir();
File nameNodeFile = new File(currentDir, f.getName());
assertTrue(nameNodeFile.isFile());
assertTrue(nameNodeFile.delete());
assertTrue(nameNodeFile.createNewFile());
}
}