/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; import java.net.InetSocketAddress; import java.nio.channels.FileChannel; import java.security.PrivilegedExceptionAction; import java.util.Random; import junit.framework.TestCase; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.TestDatanodeBlockScanner; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; /** * A JUnit test for doing fsck */ public class TestFsck extends TestCase { static String runFsck(Configuration conf, int expectedErrCode, boolean checkErrorCode,String... path) throws Exception { PrintStream oldOut = System.out; ByteArrayOutputStream bStream = new ByteArrayOutputStream(); PrintStream newOut = new PrintStream(bStream, true); System.setOut(newOut); ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL); int errCode = ToolRunner.run(new DFSck(conf), path); if (checkErrorCode) assertEquals(expectedErrCode, errCode); ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO); System.setOut(oldOut); return bStream.toString(); } /** do fsck */ public void testFsck() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024); MiniDFSCluster cluster = null; FileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); final long precision = 1L; conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, precision); conf.setLong("dfs.blockreport.intervalMsec", 10000L); cluster = new MiniDFSCluster(conf, 4, true, null); fs = cluster.getFileSystem(); final String fileName = "/srcdat"; util.createFiles(fs, fileName); util.waitReplication(fs, fileName, (short)3); final Path file = new Path(fileName); long aTime = fs.getFileStatus(file).getAccessTime(); Thread.sleep(precision); String outStr = runFsck(conf, 0, true, "/"); assertEquals(aTime, fs.getFileStatus(file).getAccessTime()); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); System.out.println(outStr); if (fs != null) {try{fs.close();} catch(Exception e){}} cluster.shutdown(); // restart the cluster; bring up namenode but not the data nodes cluster = new MiniDFSCluster(conf, 0, false, null); outStr = runFsck(conf, 1, true, "/"); // expect the result is corrupt assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); System.out.println(outStr); // bring up data nodes & cleanup cluster cluster.startDataNodes(conf, 4, true, null, null); cluster.waitActive(); cluster.waitClusterUp(); fs = cluster.getFileSystem(); util.cleanup(fs, "/srcdat"); } finally { if (fs != null) {try{fs.close();} catch(Exception e){}} if (cluster != null) { cluster.shutdown(); } } } public void testFsckNonExistent() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024); MiniDFSCluster cluster = null; FileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); conf.setLong("dfs.blockreport.intervalMsec", 10000L); cluster = new MiniDFSCluster(conf, 4, true, null); fs = cluster.getFileSystem(); util.createFiles(fs, "/srcdat"); util.waitReplication(fs, "/srcdat", (short)3); String outStr = runFsck(conf, 0, true, "/non-existent"); assertEquals(-1, outStr.indexOf(NamenodeFsck.HEALTHY_STATUS)); System.out.println(outStr); util.cleanup(fs, "/srcdat"); } finally { if (fs != null) {try{fs.close();} catch(Exception e){}} if (cluster != null) { cluster.shutdown(); } } } /** Test fsck with permission set on inodes */ public void testFsckPermission() throws Exception { final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024); final Configuration conf = new HdfsConfiguration(); conf.setLong("dfs.blockreport.intervalMsec", 10000L); MiniDFSCluster cluster = null; try { // Create a cluster with the current user, write some files cluster = new MiniDFSCluster(conf, 4, true, null); final MiniDFSCluster c2 = cluster; final String dir = "/dfsck"; final Path dirpath = new Path(dir); final FileSystem fs = c2.getFileSystem(); util.createFiles(fs, dir); util.waitReplication(fs, dir, (short) 3); fs.setPermission(dirpath, new FsPermission((short) 0700)); // run DFSck as another user, should fail with permission issue UserGroupInformation fakeUGI = UserGroupInformation.createUserForTesting( "ProbablyNotARealUserName", new String[] { "ShangriLa" }); fakeUGI.doAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { System.out.println(runFsck(conf, -1, true, dir)); return null; } }); // set permission and try DFSck again as the fake user, should succeed fs.setPermission(dirpath, new FsPermission((short) 0777)); fakeUGI.doAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { final String outStr = runFsck(conf, 0, true, dir); System.out.println(outStr); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); return null; } }); util.cleanup(fs, dir); } finally { if (cluster != null) { cluster.shutdown(); } } } public void testFsckMove() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024); MiniDFSCluster cluster = null; FileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); conf.setLong("dfs.blockreport.intervalMsec", 10000L); conf.setInt("dfs.datanode.directoryscan.interval", 1); cluster = new MiniDFSCluster(conf, 4, true, null); String topDir = "/srcdat"; fs = cluster.getFileSystem(); cluster.waitActive(); util.createFiles(fs, topDir); util.waitReplication(fs, topDir, (short)3); String outStr = runFsck(conf, 0, true, "/"); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); // Corrupt a block by deleting it String[] fileNames = util.getFileNames(topDir); DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); String block = dfsClient.getNamenode(). getBlockLocations(fileNames[0], 0, Long.MAX_VALUE). get(0).getBlock().getBlockName(); File baseDir = new File(System.getProperty("test.build.data", "build/test/data"),"dfs/data"); for (int i=0; i<8; i++) { File blockFile = new File(baseDir, "data" +(i+1) + MiniDFSCluster.FINALIZED_DIR_NAME + block); if(blockFile.exists()) { assertTrue(blockFile.delete()); } } // We excpect the filesystem to be corrupted outStr = runFsck(conf, 1, false, "/"); while (!outStr.contains(NamenodeFsck.CORRUPT_STATUS)) { try { Thread.sleep(100); } catch (InterruptedException ignore) { } outStr = runFsck(conf, 1, false, "/"); } // Fix the filesystem by moving corrupted files to lost+found outStr = runFsck(conf, 1, true, "/", "-move"); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); // Check to make sure we have healthy filesystem outStr = runFsck(conf, 0, true, "/"); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); util.cleanup(fs, topDir); if (fs != null) {try{fs.close();} catch(Exception e){}} cluster.shutdown(); } finally { if (fs != null) {try{fs.close();} catch(Exception e){}} if (cluster != null) { cluster.shutdown(); } } } public void testFsckOpenFiles() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024); MiniDFSCluster cluster = null; FileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); conf.setLong("dfs.blockreport.intervalMsec", 10000L); cluster = new MiniDFSCluster(conf, 4, true, null); String topDir = "/srcdat"; String randomString = "HADOOP "; fs = cluster.getFileSystem(); cluster.waitActive(); util.createFiles(fs, topDir); util.waitReplication(fs, topDir, (short)3); String outStr = runFsck(conf, 0, true, "/"); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); // Open a file for writing and do not close for now Path openFile = new Path(topDir + "/openFile"); FSDataOutputStream out = fs.create(openFile); int writeCount = 0; while (writeCount != 100) { out.write(randomString.getBytes()); writeCount++; } // We expect the filesystem to be HEALTHY and show one open file outStr = runFsck(conf, 0, true, topDir); System.out.println(outStr); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); assertFalse(outStr.contains("OPENFORWRITE")); // Use -openforwrite option to list open files outStr = runFsck(conf, 0, true, topDir, "-openforwrite"); System.out.println(outStr); assertTrue(outStr.contains("OPENFORWRITE")); assertTrue(outStr.contains("openFile")); // Close the file out.close(); // Now, fsck should show HEALTHY fs and should not show any open files outStr = runFsck(conf, 0, true, topDir); System.out.println(outStr); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); assertFalse(outStr.contains("OPENFORWRITE")); util.cleanup(fs, topDir); if (fs != null) {try{fs.close();} catch(Exception e){}} cluster.shutdown(); } finally { if (fs != null) {try{fs.close();} catch(Exception e){}} if (cluster != null) { cluster.shutdown(); } } } public void testCorruptBlock() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong("dfs.blockreport.intervalMsec", 1000); FileSystem fs = null; DFSClient dfsClient = null; LocatedBlocks blocks = null; int replicaCount = 0; Random random = new Random(); String outStr = null; MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster(conf, 3, true, null); cluster.waitActive(); fs = cluster.getFileSystem(); Path file1 = new Path("/testCorruptBlock"); DFSTestUtil.createFile(fs, file1, 1024, (short)3, 0); // Wait until file replication has completed DFSTestUtil.waitReplication(fs, file1, (short)3); String block = DFSTestUtil.getFirstBlock(fs, file1).getBlockName(); // Make sure filesystem is in healthy state outStr = runFsck(conf, 0, true, "/"); System.out.println(outStr); assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); // corrupt replicas File baseDir = new File(System.getProperty("test.build.data", "build/test/data"),"dfs/data"); for (int i=0; i < 6; i++) { File blockFile = new File(baseDir, "data" + (i+1) + MiniDFSCluster.FINALIZED_DIR_NAME + block); if (blockFile.exists()) { RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw"); FileChannel channel = raFile.getChannel(); String badString = "BADBAD"; int rand = random.nextInt((int)channel.size()/2); raFile.seek(rand); raFile.write(badString.getBytes()); raFile.close(); } } // Read the file to trigger reportBadBlocks try { IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf, true); } catch (IOException ie) { // Ignore exception } dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); blocks = dfsClient.getNamenode(). getBlockLocations(file1.toString(), 0, Long.MAX_VALUE); replicaCount = blocks.get(0).getLocations().length; while (replicaCount != 3) { try { Thread.sleep(100); } catch (InterruptedException ignore) { } blocks = dfsClient.getNamenode(). getBlockLocations(file1.toString(), 0, Long.MAX_VALUE); replicaCount = blocks.get(0).getLocations().length; } assertTrue (blocks.get(0).isCorrupt()); // Check if fsck reports the same outStr = runFsck(conf, 1, true, "/"); System.out.println(outStr); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); assertTrue(outStr.contains("testCorruptBlock")); } finally { if (cluster != null) {cluster.shutdown();} } } /** Test if fsck can return -1 in case of failure * * @throws Exception */ public void testFsckError() throws Exception { MiniDFSCluster cluster = null; try { // bring up a one-node cluster Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster(conf, 1, true, null); String fileName = "/test.txt"; Path filePath = new Path(fileName); FileSystem fs = cluster.getFileSystem(); // create a one-block file DFSTestUtil.createFile(fs, filePath, 1L, (short)1, 1L); DFSTestUtil.waitReplication(fs, filePath, (short)1); // intentionally corrupt NN data structure INodeFile node = (INodeFile)cluster.getNamesystem().dir.rootDir.getNode(fileName, true); assertEquals(node.blocks.length, 1); node.blocks[0].setNumBytes(-1L); // set the block length to be negative // run fsck and expect a failure with -1 as the error code String outStr = runFsck(conf, -1, true, fileName); System.out.println(outStr); assertTrue(outStr.contains(NamenodeFsck.FAILURE_STATUS)); // clean up file system fs.delete(filePath, true); } finally { if (cluster != null) {cluster.shutdown();} } } /** * Check if NamenodeFsck.buildSummaryResultForListCorruptFiles constructs the * proper string according to the number of corrupt files */ public void testbuildResultForListCorruptFile() { assertEquals("Verifying result for zero corrupt files", "Unable to locate any corrupt files under '/'.\n\n" + "Please run a complete fsck to confirm if '/' " + NamenodeFsck.HEALTHY_STATUS, NamenodeFsck .buildSummaryResultForListCorruptFiles(0, "/")); assertEquals("Verifying result for one corrupt file", "There is at least 1 corrupt file under '/', which " + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck .buildSummaryResultForListCorruptFiles(1, "/")); assertEquals("Verifying result for than one corrupt file", "There are at least 100 corrupt files under '/', which " + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck .buildSummaryResultForListCorruptFiles(100, "/")); try { NamenodeFsck.buildSummaryResultForListCorruptFiles(-1, "/"); fail("NamenodeFsck.buildSummaryResultForListCorruptFiles should " + "have thrown IllegalArgumentException for non-positive argument"); } catch (IllegalArgumentException e) { // expected result } } /** check if option -list-corruptfiles of fsck command works properly */ public void testCorruptFilesOption() throws Exception { MiniDFSCluster cluster = null; try { final int FILE_SIZE = 512; // the files and directories are intentionally prefixes of each other in // order to verify if fsck can distinguish correctly whether the path // supplied by user is a file or a directory Path[] filepaths = { new Path("/audiobook"), new Path("/audio/audio1"), new Path("/audio/audio2"), new Path("/audio/audio") }; Configuration conf = new HdfsConfiguration(); conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans // directories conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends // block reports cluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fs = cluster.getFileSystem(); // create files for (Path filepath : filepaths) { DFSTestUtil.createFile(fs, filepath, FILE_SIZE, (short) 1, 0L); DFSTestUtil.waitReplication(fs, filepath, (short) 1); } // verify there are not corrupt files ClientProtocol namenode = DFSClient.createNamenode(conf); FileStatus[] badFiles = namenode.getCorruptFiles(); assertTrue("There are " + badFiles.length + " corrupt files, but expecting none", badFiles.length == 0); // Check if fsck -list-corruptfiles agree String outstr = runFsck(conf, 0, true, "/", "-list-corruptfiles"); assertTrue(outstr.contains(NamenodeFsck .buildSummaryResultForListCorruptFiles(0, "/"))); // Now corrupt all the files except for the last one for (int idx = 0; idx < filepaths.length - 1; idx++) { String blockName = DFSTestUtil.getFirstBlock(fs, filepaths[idx]) .getBlockName(); TestDatanodeBlockScanner.corruptReplica(blockName, 0); // read the file so that the corrupt block is reported to NN FSDataInputStream in = fs.open(filepaths[idx]); try { in.readFully(new byte[FILE_SIZE]); } catch (ChecksumException ignored) { // checksum error is expected. } in.close(); } // verify if all corrupt files were reported to NN badFiles = namenode.getCorruptFiles(); assertTrue("Expecting 3 corrupt files, but got " + badFiles.length, badFiles.length == 3); // check the corrupt file String corruptFile = "/audiobook"; outstr = runFsck(conf, 1, true, corruptFile, "-list-corruptfiles"); assertTrue(outstr.contains(NamenodeFsck .buildSummaryResultForListCorruptFiles(1, corruptFile))); // check corrupt dir String corruptDir = "/audio"; outstr = runFsck(conf, 1, true, corruptDir, "-list-corruptfiles"); assertTrue(outstr.contains("/audio/audio1")); assertTrue(outstr.contains("/audio/audio2")); assertTrue(outstr.contains(NamenodeFsck .buildSummaryResultForListCorruptFiles(2, corruptDir))); // check healthy file String healthyFile = "/audio/audio"; outstr = runFsck(conf, 0, true, healthyFile, "-list-corruptfiles"); assertTrue(outstr.contains(NamenodeFsck .buildSummaryResultForListCorruptFiles(0, healthyFile))); // clean up for (Path filepath : filepaths) { fs.delete(filepath, false); } } finally { if (cluster != null) { cluster.shutdown(); } } } }