/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.raid; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.URI; import java.util.Random; import java.util.UUID; import java.util.zip.CRC32; import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.TestRaidDfs; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.raid.RaidNode; import org.apache.hadoop.raid.tools.FastFileCheck; public class TestRaidShell extends TestCase { final static Log LOG = LogFactory.getLog( "org.apache.hadoop.raid.TestRaidShell"); final static String TEST_DIR = new File(System.getProperty("test.build.data", "build/contrib/raid/test/data")).getAbsolutePath(); final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").getAbsolutePath(); final static private String RAID_SRC_PATH = "/user/raidtest"; final static private String RAID_POLICY_NAME = "RaidTest1"; final static long RELOAD_INTERVAL = 1000; final static int NUM_DATANODES = 3; Configuration conf; String namenode = null; MiniDFSCluster dfsCluster = null; String hftp = null; FileSystem fileSys = null; RaidNode cnode = null; Random rand = new Random(); private void doRaid(Path srcPath, Codec codec) throws IOException { RaidNode.doRaid(conf, fileSys.getFileStatus(srcPath), new Path("/raid"), codec, new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE, false, 1, 1); } public void testFileCheck() throws Exception { LOG.info("Test FileCheck started."); mySetup(3, -1); File fileList = null; try { MiniMRCluster mr = new MiniMRCluster(4, namenode, 3); String jobTrackerName = "localhost:" + mr.getJobTrackerPort(); conf.set("mapred.job.tracker", jobTrackerName); Path srcPath = new Path("/user/dikang/raidtest/file0"); TestRaidDfs.createTestFilePartialLastBlock(fileSys, srcPath, 1, 8, 8192L); Codec codec = Codec.getCodec("xor"); doRaid(srcPath, codec); FileStatus stat = fileSys.getFileStatus(srcPath); ParityFilePair pfPair = ParityFilePair.getParityFile(codec, stat, conf); assertNotNull(pfPair); // write the filelist fileList = new File(TEST_DIR + "/" + UUID.randomUUID().toString()); BufferedWriter writer = new BufferedWriter(new FileWriter(fileList)); writer.write(fileList.getPath() + "\n"); writer.close(); // Create RaidShell RaidShell shell = new RaidShell(conf); String[] args = new String[4]; args[0] = "-fileCheck"; args[1] = "-filesPerJob"; args[2] = "1"; args[3] = fileList.getPath(); assertEquals(0, ToolRunner.run(shell, args)); // test check source only // delete the parity file fileSys.delete(pfPair.getPath()); args = new String[5]; args[0] = "-fileCheck"; args[1] = "-filesPerJob"; args[2] = "1"; args[3] = "-sourceOnly"; args[4] = fileList.getPath(); assertEquals(0, ToolRunner.run(shell, args)); } finally { if (null != fileList) { fileList.delete(); } myTearDown(); } } /** * Test distRaid command * @throws Exception */ public void testDistRaid() throws Exception { LOG.info("TestDist started."); // create a dfs and map-reduce cluster mySetup(3, -1); MiniMRCluster mr = new MiniMRCluster(4, namenode, 3); String jobTrackerName = "localhost:" + mr.getJobTrackerPort(); conf.set("mapred.job.tracker", jobTrackerName); try { // Create files to be raided TestRaidNode.createTestFiles(fileSys, RAID_SRC_PATH, "/raid" + RAID_SRC_PATH, 1, 3, (short)3); String subDir = RAID_SRC_PATH + "/subdir"; TestRaidNode.createTestFiles( fileSys, subDir, "/raid" + subDir, 1, 3, (short)3); // Create RaidShell and raid the files. RaidShell shell = new RaidShell(conf); String[] args = new String[3]; args[0] = "-distRaid"; args[1] = RAID_POLICY_NAME; args[2] = RAID_SRC_PATH; assertEquals(0, ToolRunner.run(shell, args)); // Check files are raided checkIfFileRaided(new Path(RAID_SRC_PATH, "file0")); checkIfFileRaided(new Path(subDir, "file0")); } finally { mr.shutdown(); myTearDown(); } } // check if a file has been raided private void checkIfFileRaided(Path srcPath) throws IOException { FileStatus srcStat = fileSys.getFileStatus(srcPath); assertEquals(1, srcStat.getReplication()); Path parityPath = new Path("/raid", srcPath); FileStatus parityStat = fileSys.getFileStatus(parityPath); assertEquals(1, parityStat.getReplication()); } /** * Create a file with three stripes, corrupt a block each in two stripes, * and wait for the the file to be fixed. */ public void testBlockFix() throws Exception { LOG.info("Test testBlockFix started."); long blockSize = 8192L; int stripeLength = 3; mySetup(stripeLength, -1); Path file1 = new Path(RAID_SRC_PATH, "file1"); Path destPath = new Path("/raid"+RAID_SRC_PATH); long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1, 1, 7, blockSize); long file1Len = fileSys.getFileStatus(file1).getLen(); LOG.info("Test testBlockFix created test files"); // create an instance of the RaidNode Configuration localConf = new Configuration(conf); localConf.setInt("raid.blockfix.interval", 1000); localConf.set("raid.blockfix.classname", "org.apache.hadoop.raid.LocalBlockIntegrityMonitor"); // the RaidNode does the raiding inline (instead of submitting to map/reduce) conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode"); // use local block fixer conf.set("raid.blockfix.classname", "org.apache.hadoop.raid.LocalBlockIntegrityMonitor"); cnode = RaidNode.createRaidNode(null, localConf); try { TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath); cnode.stop(); cnode.join(); cnode = null; FileStatus srcStat = fileSys.getFileStatus(file1); LocatedBlocks locations = getBlockLocations(file1, srcStat.getLen()); DistributedFileSystem dfs = (DistributedFileSystem)fileSys; ClientProtocol namenode = dfs.getClient().namenode; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals(corruptFiles.length, 0); // Corrupt blocks in two different stripes. We can fix them. TestRaidDfs.corruptBlock(file1, locations.get(0).getBlock(), NUM_DATANODES, true, dfsCluster); // delete block TestRaidDfs.corruptBlock(file1, locations.get(4).getBlock(), NUM_DATANODES, false, dfsCluster); // corrupt block TestRaidDfs.corruptBlock(file1, locations.get(6).getBlock(), NUM_DATANODES, true, dfsCluster); // delete last (partial) block LocatedBlock[] toReport = new LocatedBlock[3]; toReport[0] = locations.get(0); toReport[1] = locations.get(4); toReport[2] = locations.get(6); namenode.reportBadBlocks(toReport); corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals(corruptFiles.length, 1); assertEquals(corruptFiles[0], file1.toString()); // Create RaidShell and fix the file. RaidShell shell = new RaidShell(conf); String[] args = new String[2]; args[0] = "-recoverBlocks"; args[1] = file1.toUri().getPath(); assertEquals(0, ToolRunner.run(shell, args)); long start = System.currentTimeMillis(); do { LOG.info("Test testBlockFix waiting for files to be fixed."); Thread.sleep(1000); corruptFiles = DFSUtil.getCorruptFiles(dfs); } while (corruptFiles.length != 0 && System.currentTimeMillis() - start < 120000); assertEquals(0, corruptFiles.length); dfs = getDFS(conf, dfs); assertTrue(TestRaidDfs.validateFile(dfs, file1, file1Len, crc1)); } catch (Exception e) { LOG.info("Test testBlockFix Exception " + e + StringUtils.stringifyException(e)); throw e; } finally { myTearDown(); } LOG.info("Test testBlockFix completed."); } private static DistributedFileSystem getDFS( Configuration conf, FileSystem dfs) throws IOException { Configuration clientConf = new Configuration(conf); clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); clientConf.setBoolean("fs.hdfs.impl.disable.cache", true); URI dfsUri = dfs.getUri(); FileSystem.closeAll(); return (DistributedFileSystem) FileSystem.get(dfsUri, clientConf); } private void mySetup(int stripeLength, int timeBeforeHar) throws Exception { new File(TEST_DIR).mkdirs(); // Make sure data directory exists conf = new Configuration(); conf.set("raid.config.file", CONFIG_FILE); conf.setBoolean("raid.config.reload", true); conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL); // scan all policies once every 5 second conf.setLong("raid.policy.rescan.interval", 5000); // do not use map-reduce cluster for Raiding conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode"); conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort()); conf.setBoolean("dfs.permissions", false); dfsCluster = new MiniDFSCluster(conf, NUM_DATANODES, true, null); dfsCluster.waitActive(); fileSys = dfsCluster.getFileSystem(); namenode = fileSys.getUri().toString(); FileSystem.setDefaultUri(conf, namenode); hftp = "hftp://localhost.localdomain:" + dfsCluster.getNameNodePort(); FileSystem.setDefaultUri(conf, namenode); FileWriter fileWriter = new FileWriter(CONFIG_FILE); fileWriter.write("<?xml version=\"1.0\"?>\n"); String str = "<configuration> " + "<policy name = \"" + RAID_POLICY_NAME + "\"> " + "<srcPath prefix=\"" + RAID_SRC_PATH + "\"/> " + "<codecId>xor</codecId> " + "<destPath> /raid</destPath> " + "<property> " + "<name>targetReplication</name> " + "<value>1</value> " + "<description>after RAIDing, decrease the replication factor of a file to this value." + "</description> " + "</property> " + "<property> " + "<name>metaReplication</name> " + "<value>1</value> " + "<description> replication factor of parity file" + "</description> " + "</property> " + "<property> " + "<name>modTimePeriod</name> " + "<value>0</value> " + "<description> time (milliseconds) after a file is modified to make it " + "a candidate for RAIDing " + "</description> " + "</property> "; if (timeBeforeHar >= 0) { str += "<property> " + "<name>time_before_har</name> " + "<value>" + timeBeforeHar + "</value> " + "<description> amount of time waited before har'ing parity files" + "</description> " + "</property> "; } str += "</policy>" + "</configuration>"; fileWriter.write(str); fileWriter.close(); Utils.loadTestCodecs(conf, stripeLength, 1, 3, "/raid", "/raidrs"); } private void myTearDown() throws Exception { if (cnode != null) { cnode.stop(); cnode.join(); } if (dfsCluster != null) { dfsCluster.shutdown(); } } private LocatedBlocks getBlockLocations(Path file, long length) throws IOException { DistributedFileSystem dfs = (DistributedFileSystem) fileSys; return dfs.getClient().namenode.getBlockLocations(file.toString(), 0, length); } private long getCRC(FileSystem fs, Path p) throws IOException { CRC32 crc = new CRC32(); FSDataInputStream stm = fs.open(p); for (int b = 0; b > 0; b = stm.read()) { crc.update(b); } stm.close(); return crc.getValue(); } }