/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.junit.Before;
import org.junit.After;
import static org.junit.Assert.*;
import static org.mockito.Mockito.anyString;
import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
/**
* Test that the NN stays up as long as it has a valid storage directory and
* exits when there are no more valid storage directories.
*/
public class TestStorageDirectoryFailure {
MiniDFSCluster cluster = null;
FileSystem fs;
SecondaryNameNode secondaryNN;
ArrayList<String> nameDirs;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
String baseDir = System.getProperty("test.build.data", "/tmp");
File dfsDir = new File(baseDir, "dfs");
nameDirs = new ArrayList<String>();
// Have all the name dirs with the same filename: important for regression
// testing HDFS-3652.
nameDirs.add(new File(new File(dfsDir, "name1"), "nn").getPath());
nameDirs.add(new File(new File(dfsDir, "name2"), "nn").getPath());
nameDirs.add(new File(new File(dfsDir, "name3"), "nn").getPath());
conf.set("dfs.name.dir", StringUtils.join(nameDirs, ","));
conf.set("dfs.data.dir", new File(dfsDir, "data").getPath());
conf.set("fs.checkpoint.dir", new File(dfsDir, "secondary").getPath());
conf.set("fs.default.name", "hdfs://localhost:0");
conf.set("dfs.http.address", "0.0.0.0:0");
conf.set("dfs.secondary.http.address", "0.0.0.0:0");
cluster = new MiniDFSCluster(0, conf, 1, true, false, true, null, null,
null, null);
cluster.waitActive();
fs = cluster.getFileSystem();
secondaryNN = new SecondaryNameNode(conf);
}
@After
public void tearDown() throws Exception {
if (cluster != null) {
cluster.shutdown();
}
if (secondaryNN != null) {
secondaryNN.shutdown();
}
}
private List<StorageDirectory> getRemovedDirs() {
return cluster.getNameNode().getFSImage().getRemovedStorageDirs();
}
private int numRemovedDirs() {
return getRemovedDirs().size();
}
private void writeFile(String name, byte[] buff) throws IOException {
FSDataOutputStream writeStream = fs.create(new Path(name));
writeStream.write(buff, 0, buff.length);
writeStream.close();
}
private byte[] readFile(String name, int len) throws IOException {
FSDataInputStream readStream = fs.open(new Path(name));
byte[] buff = new byte[len];
readStream.readFully(buff);
readStream.close();
return buff;
}
/** Assert that we can create and read a file */
private void checkFileCreation(String name) throws IOException {
byte[] buff = "some bytes".getBytes();
writeFile(name, buff);
assertTrue(Arrays.equals(buff, readFile(name, buff.length)));
}
/** Assert that we can read a file we created */
private void checkFileContents(String name) throws IOException {
byte[] buff = "some bytes".getBytes();
assertTrue(Arrays.equals(buff, readFile(name, buff.length)));
}
@Test
/** Remove storage dirs and checkpoint to trigger detection */
public void testCheckpointAfterFailingFirstNamedir() throws IOException {
assertEquals(0, numRemovedDirs());
checkFileCreation("file0");
// Remove the 1st storage dir
FileUtil.fullyDelete(new File(nameDirs.get(0)));
secondaryNN.doCheckpoint();
assertEquals(1, numRemovedDirs());
assertEquals(nameDirs.get(0), getRemovedDirs().get(0).getRoot().getPath());
checkFileCreation("file1");
// Remove the 2nd
FileUtil.fullyDelete(new File(nameDirs.get(1)));
secondaryNN.doCheckpoint();
assertEquals(2, numRemovedDirs());
assertEquals(nameDirs.get(1), getRemovedDirs().get(1).getRoot().getPath());
checkFileCreation("file2");
// Remove the last one. Prevent the NN from exiting the process when
// it notices this via the checkpoint.
FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog());
doNothing().when(spyLog).fatalExit(anyString());
cluster.getNameNode().getFSImage().setEditLog(spyLog);
// After the checkpoint, we should be dead. Verify fatalExit was
// called and that eg a checkpoint fails.
FileUtil.fullyDelete(new File(nameDirs.get(2)));
try {
secondaryNN.doCheckpoint();
fail("There's no storage to retrieve an image from");
} catch (FileNotFoundException fnf) {
// Expected
}
verify(spyLog, atLeastOnce()).fatalExit(anyString());
// Check that we can't mutate state without any edit streams
try {
checkFileCreation("file3");
fail("Created a file w/o edit streams");
} catch (IOException ioe) {
// Expected
assertTrue(ioe.getMessage().contains(
"java.lang.AssertionError: No edit streams to log to"));
}
}
@Test
/** Test that we can restart OK after removing a failed dir */
public void testRestartAfterFailingStorageDir() throws IOException {
assertEquals(0, numRemovedDirs());
checkFileCreation("file0");
FileUtil.fullyDelete(new File(nameDirs.get(0)));
secondaryNN.doCheckpoint();
assertEquals(1, numRemovedDirs());
assertEquals(nameDirs.get(0), getRemovedDirs().get(0).getRoot().getPath());
checkFileCreation("file1");
new File(nameDirs.get(0)).mkdir();
cluster.restartNameNode();
// The dir was restored, is no longer considered removed
assertEquals(0, numRemovedDirs());
checkFileContents("file0");
checkFileContents("file1");
}
@Test
/** Test that we abort when there are no valid edit log directories
* remaining. */
public void testAbortOnNoValidEditDirs() throws IOException {
cluster.restartNameNode();
assertEquals(0, numRemovedDirs());
checkFileCreation("file9");
cluster.getNameNode().getFSImage().
removeStorageDir(new File(nameDirs.get(0)));
cluster.getNameNode().getFSImage().
removeStorageDir(new File(nameDirs.get(1)));
FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog());
doNothing().when(spyLog).fatalExit(anyString());
cluster.getNameNode().getFSImage().setEditLog(spyLog);
cluster.getNameNode().getFSImage().
removeStorageDir(new File(nameDirs.get(2)));
verify(spyLog, atLeastOnce()).fatalExit(anyString());
}
}