/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.spy;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
/**
* Test various failure scenarios during saveNamespace() operation.
* Cases covered:
* <ol>
* <li>Recover from failure while saving into the second storage directory</li>
* <li>Recover from failure while moving current into lastcheckpoint.tmp</li>
* <li>Recover from failure while moving lastcheckpoint.tmp into
* previous.checkpoint</li>
* <li>Recover from failure while rolling edits file</li>
* </ol>
*/
public class TestSaveNamespace {
private static final Log LOG = LogFactory.getLog(TestSaveNamespace.class);
private static class FaultySaveImage implements Answer<Void> {
int count = 0;
boolean exceptionType = true;
// generate a RuntimeException
public FaultySaveImage() {
this.exceptionType = true;
}
// generate either a RuntimeException or IOException
public FaultySaveImage(boolean etype) {
this.exceptionType = etype;
}
public Void answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
File f = (File)args[0];
if (count++ == 1) {
LOG.info("Injecting fault for file: " + f);
if (exceptionType) {
throw new RuntimeException("Injected fault: saveFSImage second time");
} else {
throw new IOException("Injected fault: saveFSImage second time");
}
}
LOG.info("Not injecting fault for file: " + f);
return (Void)invocation.callRealMethod();
}
}
/**
* Verify that a saveNamespace command brings faulty directories
* in fs.name.dir and fs.edit.dir back online.
*/
@Test
public void testReinsertnamedirsInSavenamespace() throws Exception {
// create a configuration with the key to restore error
// directories in fs.name.dir
Configuration conf = getConf();
conf.setBoolean("dfs.namenode.name.dir.restore", true);
MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
cluster.waitActive();
FSNamesystem fsn = FSNamesystem.getFSNamesystem();
// Replace the FSImage with a spy
FSImage originalImage = fsn.dir.fsImage;
FSImage spyImage = spy(originalImage);
spyImage.setStorageDirectories(
FSNamesystem.getNamespaceDirs(conf),
FSNamesystem.getNamespaceEditsDirs(conf));
fsn.dir.fsImage = spyImage;
// inject fault
// The spy throws a IOException when writing to the second directory
doAnswer(new FaultySaveImage(false)).
when(spyImage).saveFSImage((File)anyObject(), anyBoolean());
try {
doAnEdit(fsn, 1);
fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
// Save namespace - this injects a fault and marks one
// directory as faulty.
LOG.info("Doing the first savenamespace.");
fsn.saveNamespace(false, false);
LOG.warn("First savenamespace sucessful.");
assertTrue("Savenamespace should have marked one directory as bad." +
" But found " + spyImage.getRemovedStorageDirs().size() +
" bad directories.",
spyImage.getRemovedStorageDirs().size() == 1);
// The next call to savenamespace should try inserting the
// erroneous directory back to fs.name.dir. This command should
// be successful.
LOG.info("Doing the second savenamespace.");
fsn.saveNamespace(false, false);
LOG.warn("Second savenamespace sucessful.");
assertTrue("Savenamespace should have been successful in removing " +
" bad directories from Image." +
" But found " + originalImage.getRemovedStorageDirs().size() +
" bad directories.",
originalImage.getRemovedStorageDirs().size() == 0);
// Now shut down and restart the namesystem
LOG.info("Shutting down fsimage.");
originalImage.close();
fsn.close();
fsn = null;
cluster.shutdown();
// Start a new namesystem, which should be able to recover
// the namespace from the previous incarnation.
LOG.info("Loading new FSmage from disk.");
cluster = new MiniDFSCluster(conf, 1, false, null);
cluster.waitActive();
fsn = FSNamesystem.getFSNamesystem();
// Make sure the image loaded including our edit.
LOG.info("Checking reloaded image.");
checkEditExists(cluster, 1);
LOG.info("Reloaded image is good.");
} finally {
fsn.close();
cluster.shutdown();
}
}
/**
* test savenamespace in the middle of a checkpoint
*/
@Test
public void testCheckpointWithSavenamespace() throws Exception {
Configuration conf = getConf();
MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
cluster.waitActive();
FSNamesystem fsn = FSNamesystem.getFSNamesystem();
// Replace the FSImage with a spy
final FSImage originalImage = fsn.dir.fsImage;
try {
doAnEdit(fsn, 1);
CheckpointSignature sig = fsn.rollEditLog();
LOG.warn("Checkpoint signature: " + sig);
// Do another edit
doAnEdit(fsn, 2);
// Save namespace
fsn.saveNamespace(true, false);
// try to do a rollFSImage, this should fail because the
// saveNamespace have already occured after the call to
// rollFSEdit
try {
fsn.rollFSImage(sig);
assertTrue("The rollFSImage immediately folloing the saveName " +
" command should fail. ", false);
} catch (IOException e) {
LOG.info("Expected exception while invoking rollFSImage " +
" after a successful call to saveNamespace." + e);
}
// Now shut down and restart the NN
originalImage.close();
fsn.close();
cluster.shutdown();
fsn = null;
// Start a new namesystem, which should be able to recover
// the namespace from the previous incarnation.
cluster = new MiniDFSCluster(conf, 1, false, null);
cluster.waitActive();
fsn = FSNamesystem.getFSNamesystem();
// Make sure the image loaded including our edits.
checkEditExists(cluster, 1);
checkEditExists(cluster, 2);
} finally {
if (fsn != null) {
fsn.close();
cluster.shutdown();
}
}
}
/**
* Test if namespace saving works while edits if rolling
* @throws Exception
*/
@Test
public void testSaveWhileEditsRolled() throws Exception {
testSaveWhileEditsRolled(true, true, false);
testSaveWhileEditsRolled(false, true, false);
testSaveWhileEditsRolled(true, true, true);
}
private void testSaveWhileEditsRolled(boolean dosafemode, boolean force,
boolean uncompressed) throws Exception {
Configuration conf = getConf();
MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
cluster.waitActive();
FSNamesystem fsn = FSNamesystem.getFSNamesystem();
// Replace the FSImage with a spy
FSImage originalImage = fsn.dir.fsImage;
FSImage spyImage = spy(originalImage);
spyImage.setStorageDirectories(
FSNamesystem.getNamespaceDirs(conf),
FSNamesystem.getNamespaceEditsDirs(conf));
fsn.dir.fsImage = spyImage;
try {
doAnEdit(fsn, 1);
CheckpointSignature sig = fsn.rollEditLog();
LOG.warn("Checkpoint signature: " + sig);
// Do another edit
doAnEdit(fsn, 2);
// Save namespace
if (dosafemode) {
fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
}
fsn.saveNamespace(force, uncompressed);
// Now shut down and restart the NN
originalImage.close();
originalImage = null;
fsn.close();
fsn = null;
cluster.shutdown();
// Start a new namesystem, which should be able to recover
// the namespace from the previous incarnation.
cluster = new MiniDFSCluster(conf, 1, false, null);
cluster.waitActive();
fsn = FSNamesystem.getFSNamesystem();
// Make sure the image loaded including our edits.
checkEditExists(cluster, 1);
checkEditExists(cluster, 2);
} finally {
if (originalImage != null) {
originalImage.close();
}
if (fsn != null) {
fsn.close();
cluster.shutdown();
}
}
}
private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
// Make an edit
fsn.mkdirs(
"/test" + id,
new PermissionStatus("test", "Test",
new FsPermission((short)0777)));
}
private void checkEditExists(MiniDFSCluster cluster, int id) throws IOException {
// Make sure the image loaded including our edit.
Collection<File> editsDirs = cluster.getNameEditsDirs();
int count = 0;
for (File ed : editsDirs) {
count++;
if (count == id) {
assertTrue(new File(ed, "current/edits").exists());
}
}
}
private Configuration getConf() throws IOException {
Configuration conf = new Configuration();
FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
conf.set("dfs.namenode.http-address", "0.0.0.0:0");
conf.set("dfs.namenode.secondary.http-address", "0.0.0.0:0");
conf.setBoolean("dfs.permissions.enabled", false);
return conf;
}
}