/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.IOException;
import junit.framework.TestCase;
import org.junit.Test;
import org.junit.After;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.raid.RaidNode;
public class TestDirectoryRaidShellFsck extends TestCase {
final static Log LOG =
LogFactory.getLog("org.apache.hadoop.raid.TestDirectoryRaidShellFsck");
final static String TEST_DIR =
new File(System.
getProperty("test.build.data", "build/contrib/raid/test/data")).
getAbsolutePath();
final static int NUM_DATANODES = 4;
final static int STRIPE_BLOCKS = 3; // number of blocks per stripe
final static long BASIC_BLOCK_SIZE = 8192L; // size of block in byte
final static long PARITY_BLOCK_SIZE = BASIC_BLOCK_SIZE * 2;
final short targetReplication = 1;
final short metaReplication = 1;
final long[] fileSizes =
new long[]{BASIC_BLOCK_SIZE + BASIC_BLOCK_SIZE/2, // block 0, 1
3*BASIC_BLOCK_SIZE, // block 2, 3
BASIC_BLOCK_SIZE + BASIC_BLOCK_SIZE/2 + 1}; // block 4, 5, 6, 7
final long[] blockSizes = new long[]{BASIC_BLOCK_SIZE, 2*BASIC_BLOCK_SIZE,
BASIC_BLOCK_SIZE/2};
final long[] fileSizes1 = new long[] {
BASIC_BLOCK_SIZE * STRIPE_BLOCKS,
BASIC_BLOCK_SIZE * STRIPE_BLOCKS,
BASIC_BLOCK_SIZE * STRIPE_BLOCKS,
};
final long[] blockSizes1 = new long[] {
BASIC_BLOCK_SIZE, BASIC_BLOCK_SIZE, BASIC_BLOCK_SIZE
};
final Path srcDir = new Path("/user/dhruba/raidtestrs");
final Path parityFile = new Path("/destraidrs/user/dhruba/raidtestrs");
Configuration conf = null;
Configuration clientConf = null;
MiniDFSCluster cluster = null;
DistributedFileSystem dfs = null;
RaidShell shell = null;
String[] args = null;
Path[] files = null;
FileStatus[] srcStats = null;
FileStatus parityStat = null;
public void setUpCluster(int rsParityLength)
throws IOException, ClassNotFoundException {
setUpCluster(rsParityLength, fileSizes, blockSizes);
}
/**
* creates a MiniDFS instance with a raided file in it
*/
public void setUpCluster(int rsPairtyLength, long[] fileSizes,
long[] blockSizes) throws IOException, ClassNotFoundException {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
Utils.loadTestCodecs(conf, STRIPE_BLOCKS, STRIPE_BLOCKS, 1, rsPairtyLength,
"/destraid", "/destraidrs", false, true);
conf.setBoolean("dfs.permissions", false);
cluster = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
cluster.waitActive();
dfs = (DistributedFileSystem) cluster.getFileSystem();
TestDirectoryRaidDfs.setupStripeStore(conf, dfs);
String namenode = dfs.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
Codec dirRS = Codec.getCodec("rs");
long[] crcs = new long[fileSizes.length];
int[] seeds = new int[fileSizes.length];
files = TestRaidDfs.createTestFiles(srcDir, fileSizes,
blockSizes, crcs, seeds, (FileSystem)dfs, (short)1);
assertTrue(RaidNode.doRaid(conf, dfs.getFileStatus(srcDir),
new Path(dirRS.parityDirectory), dirRS,
new RaidNode.Statistics(),
RaidUtils.NULL_PROGRESSABLE,
false, 1, 1));
srcStats = new FileStatus[files.length];
for (int i = 0 ; i < files.length; i++) {
srcStats[i] = dfs.getFileStatus(files[i]);
}
parityStat = dfs.getFileStatus(parityFile);
clientConf = new Configuration(conf);
clientConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
clientConf.set("fs.raid.underlyingfs.impl",
"org.apache.hadoop.hdfs.DistributedFileSystem");
// prepare shell and arguments
shell = new RaidShell(clientConf);
args = new String[2];
args[0] = "-fsck";
args[1] = "/";
}
/**
* removes a specified block from MiniDFS storage and reports it as corrupt
*/
public static void removeAndReportBlock(MiniDFSCluster cluster,
FileStatus fsStat,
int[] blockNums)
throws IOException {
DistributedFileSystem blockDfs = (DistributedFileSystem)cluster.getFileSystem();
Path filePath = fsStat.getPath();
LocatedBlocks lbs = blockDfs.getClient().namenode.
getBlockLocations(filePath.toUri().getPath(), 0, fsStat.getLen());
for (int blockNum: blockNums) {
assert blockNum < lbs.getLocatedBlocks().size();
LocatedBlock block = lbs.get(blockNum);
TestRaidDfs.corruptBlock(filePath, block.getBlock(),
NUM_DATANODES, true, cluster);
// report deleted block to the name node
LocatedBlock[] toReport = { block };
blockDfs.getClient().namenode.reportBadBlocks(toReport);
}
}
/**
* checks fsck with files whose size is one stripe
*/
@Test
public void testFilesWithStripeSize() throws Exception {
LOG.info("testFilesWithStripeSize");
int rsParityLength = 3;
setUpCluster(rsParityLength, fileSizes1, blockSizes1);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
LOG.info("Corrupt the last file");
removeAndReportBlock(cluster, srcStats[2], new int[]{0, 1, 2});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 1);
LOG.info(" Corrupt 1 blocks of parity file in stripe 2 ");
removeAndReportBlock(cluster, parityStat, new int[]{6});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 2);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
assertEquals("fsck should return 1", 1, result);
}
/**
* checks fsck with no missing blocks
*/
@Test
public void testClean() throws Exception {
LOG.info("testClean");
setUpCluster(3);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
assertEquals("fsck should return 0", 0, result);
}
/**
* checks fsck with missing all blocks in files but not in parity files
* Because parity stripe length is 3, we don't corrupt any files.
*/
@Test
public void testFileBlockMissing() throws Exception {
LOG.info("testFileBlockMissing");
int rsParityLength = 3;
setUpCluster(rsParityLength);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
LOG.info("Corrupt all blocks in all source files");
for (int i = 0; i < files.length; i++) {
long blockNum = RaidNode.getNumBlocks(srcStats[i]);
for (int j = 0; j < blockNum; j++) {
removeAndReportBlock(cluster, srcStats[i], new int[]{j});
}
}
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, files.length);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
assertEquals("fsck should return 0", 0, result);
}
/**
* checks fsck with missing all blocks in parity block but not in file block
* Raid fsck actually skips all parity files.
*/
@Test
public void testParityBlockMissing() throws Exception {
LOG.info("testParityBlockMissing");
int rsParityLength = 3;
setUpCluster(rsParityLength);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
long blockNum = RaidNode.getNumBlocks(parityStat);
LOG.info("Corrupt all blocks in parity file");
for (int i = 0; i < blockNum; i++) {
removeAndReportBlock(cluster, parityStat, new int[]{i});
}
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 1);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
assertEquals("fsck should return 0", 0, result);
}
/**
* checks fsck with missing block in both file block and parity block
* in different stripes
*/
@Test
public void testFileBlockAndParityBlockMissingInDifferentStripes()
throws Exception {
LOG.info("testFileBlockAndParityBlockMissingInDifferentStripes");
int rsParityLength = 3;
setUpCluster(rsParityLength);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
LOG.info("Corrupt all blocks of source files in the first stripe");
removeAndReportBlock(cluster, srcStats[0], new int[]{0, 1});
removeAndReportBlock(cluster, srcStats[1], new int[]{0});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 2);
LOG.info("Corrupt all blocks of parity file in the second and third stripe");
removeAndReportBlock(cluster, parityStat, new int[]{3, 4, 5, 6, 7, 8});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 3);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
assertEquals("fsck should return 0", 0, result);
}
/**
* checks fsck with missing blocks in both file block and parity block
* in same stripe
*/
@Test
public void testFileBlockAndParityBlockMissingInSameStripe()
throws Exception {
LOG.info("testFileBlockAndParityBlockMissingInSameStripe");
int rsParityLength = 3;
setUpCluster(rsParityLength);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
LOG.info("Corrupt all blocks of parity file in the first stripe");
removeAndReportBlock(cluster, parityStat, new int[]{0, 1, 2});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 1);
LOG.info("Corrupt the second block in the first stripe");
removeAndReportBlock(cluster, srcStats[0], new int[]{1});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 2);
LOG.info("Corrupt the first block in the second stripe");
removeAndReportBlock(cluster, srcStats[1], new int[]{1});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 3);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
// only the fist file is corrupt
assertEquals("fsck should return 1", 1, result);
}
/**
* checks fsck with 3 missing blocks in same stripe
* Because rsParityLength is 2, we corrupt a file.
*/
@Test
public void test3FileBlocksMissingInSameStripe()
throws Exception {
LOG.info("test3FileBlocksMissingInSameStripe");
int rsParityLength = 2;
setUpCluster(rsParityLength);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
LOG.info("Corrupt the all blocks of source files in the second " +
"stripe and thrid stripe, the second stripe is corrupt, so " +
" file 1 and file 2 are corrupt");
removeAndReportBlock(cluster, srcStats[1], new int[]{1});
removeAndReportBlock(cluster, srcStats[2], new int[]{0, 1, 2, 3});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 2);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
assertEquals("fsck should return 2", 2, result);
}
/**
* Corrupt blocks in all stripes
*/
@Test
public void testCorruptionInAllStripes() throws Exception {
LOG.info("testCorruptionInAllStripes");
int rsParityLength = 3;
setUpCluster(rsParityLength);
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 0);
LOG.info(" Corrupt 2 blocks of source file in stripe 0, 1, 2 ");
removeAndReportBlock(cluster, srcStats[0], new int[]{0, 1});
removeAndReportBlock(cluster, srcStats[1], new int[]{1});
removeAndReportBlock(cluster, srcStats[2], new int[]{2, 3});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 3);
LOG.info(" Corrupt 2 blocks of parity file in stripe 0, 2 ");
removeAndReportBlock(cluster, parityStat, new int[]{0, 1, 6, 7});
TestRaidShellFsck.waitUntilCorruptFileCount(dfs, 4);
assertEquals(0, ToolRunner.run(shell, args));
int result = shell.getCorruptCount();
// the second file is not recoverable
assertEquals("fsck should return 2", 2, result);
}
@After
public void tearDown() throws Exception {
if (cluster != null) {
cluster.shutdown();
cluster = null;
}
dfs = null;
LOG.info("Test cluster shut down");
}
}