/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.raid; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.zip.CRC32; import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.TestRaidDfs; import org.apache.hadoop.mapred.Counters; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.raid.protocol.PolicyInfo; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.hdfs.protocol.DataTransferProtocol; import org.apache.hadoop.raid.DistRaid.Counter; import org.apache.hadoop.raid.LogUtils.LOGRESULTS; import org.apache.hadoop.raid.LogUtils.LOGTYPES; import org.apache.hadoop.raid.PlacementMonitor.BlockInfo; import org.apache.hadoop.raid.Utils.Builder; /** * Test the generation of parity blocks for files with different block * sizes. Also test that a data block can be regenerated from a raid stripe * using the parity block */ public class TestRaidNode extends TestCase { final static String TEST_DIR = new File(System.getProperty("test.build.data", "build/contrib/raid/test/data")).getAbsolutePath(); final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").getAbsolutePath(); final static long RELOAD_INTERVAL = 1000; final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode"); final static Random rand = new Random(); Configuration conf; String namenode = null; String hftp = null; MiniDFSCluster dfs = null; MiniMRCluster mr = null; FileSystem fileSys = null; String jobTrackerName = null; static public void loadTestCodecs(Configuration conf, int xorStripeLength, int rsStripeLength, int xorParityLength, int rsParityLength) throws IOException { Utils.loadTestCodecs(conf, new Builder[] { // priority 100 Utils.getXORBuilder().setStripeLength(xorStripeLength).setParityLength( xorParityLength), Utils.getXORBuilder().dirRaid(true).setParityDir( "/dir-raid").setStripeLength(xorStripeLength).setParityLength( xorParityLength).setCodeId("dir-xor").setPriority(400), // priority 300 Utils.getRSBuilder().setStripeLength(rsStripeLength).setParityLength( rsParityLength), Utils.getRSBuilder().dirRaid(true).setParityDir( "/dir-raidrs").setStripeLength(rsStripeLength).setParityLength( rsParityLength).setCodeId("dir-rs").setPriority(600) }); } /** * create mapreduce and dfs clusters */ private void createClusters(boolean local, boolean rackAware) throws Exception { if (System.getProperty("hadoop.log.dir") == null) { String base = new File(".").getAbsolutePath(); System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs"); } new File(TEST_DIR).mkdirs(); // Make sure data directory exists conf = new Configuration(); conf.set("raid.config.file", CONFIG_FILE); conf.setBoolean("raid.config.reload", true); conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL); conf.setLong(JobMonitor.JOBMONITOR_INTERVAL_KEY, 20000L); conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3000L); conf.setLong(RaidNode.MINIMUM_RAIDABLE_FILESIZE_KEY, 1L); conf.setLong(RaidNode.RAID_MOD_TIME_PERIOD_KEY, 0); // Report block deletion quickly conf.setLong("dfs.blockreport.intervalMsec", 8000L); conf.setBoolean(StatisticsCollector.STATS_COLLECTOR_SUBMIT_JOBS_CONFIG, false); conf.set("mapred.raid.http.address", "localhost:0"); //disable purge monitor conf.setLong(PurgeMonitor.PURGE_MONITOR_SLEEP_TIME_KEY, 3600000L); // scan every policy every 5 seconds conf.setLong("raid.policy.rescan.interval", 5 * 1000L); // the RaidNode does the raiding inline (instead of submitting to map/reduce) if (local) { conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode"); } else { conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode"); } // use local block fixer conf.set("raid.blockfix.classname", "org.apache.hadoop.raid.LocalBlockIntegrityMonitor"); conf.set("dfs.block.replicator.classname", "org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid"); conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort()); loadTestCodecs(conf, 3, 10, 1, 5); // create a dfs and map-reduce cluster final int taskTrackers = 4; final int jobTrackerPort = 60050; if (rackAware) { // Because BlockPlacementPolicyRaid only allows one replica in each rack, // spread 6 nodes into 6 racks to make sure chooseTarget function could pick // more than one node. String[] racks = {"/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6"}; dfs = new MiniDFSCluster(conf, 6, true, racks); } else { dfs = new MiniDFSCluster(conf, 3, true, null); } dfs.waitActive(); fileSys = dfs.getFileSystem(); TestDirectoryRaidDfs.setupStripeStore(conf, fileSys); namenode = fileSys.getUri().toString(); mr = new MiniMRCluster(taskTrackers, namenode, 3); jobTrackerName = "localhost:" + mr.getJobTrackerPort(); hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort(); FileSystem.setDefaultUri(conf, namenode); conf.set("mapred.job.tracker", jobTrackerName); } /** * stop clusters created earlier */ private void stopClusters() throws Exception { if (mr != null) { mr.shutdown(); } if (dfs != null) { dfs.shutdown(); } } public void testFileListDirRaidPolicy() throws Exception { LOG.info("Test testFileListDirRaidPolicy started."); short targetReplication = 2; short metaReplication = 2; short srcReplication = 3; createClusters(false, false); conf.setInt(ConfigManager.DIRRAID_BLOCK_LIMIT_KEY, 14); // avoid raidnode start raiding files conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3600000); ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addAbstractPolicy("abstractPolicy2", targetReplication, metaReplication, "dir-xor"); cb.addFileListPolicy("policy3", "/user/rvadali/raiddirlist.txt", "abstractPolicy2"); cb.persist(); RaidNode cnode = null; Path dirListPath = new Path("/user/rvadali/raiddirlist.txt"); try { createTestFiles(fileSys, "/user/rvadali/dir-raidtest/1/", "/dir-raid/user/rvadali/dir-raidtest/1", 2, 8, srcReplication); createTestFiles(fileSys, "/user/rvadali/dir-raidtest/2/", "/dir-raid/user/rvadali/dir-raidtest/2", 2, 7, targetReplication); createTestFiles(fileSys, "/user/rvadali/dir-raidtest/3/", "/dir-raid/user/rvadali/dir-raidtest/3", 2, 3, targetReplication); createTestFiles(fileSys, "/user/rvadali/dir-raidtest/4/", "/dir-raid/user/rvadali/dir-raidtest/4", 2, 3, targetReplication); createTestFiles(fileSys, "/user/rvadali/dir-raidtest/5/", "/dir-raid/user/rvadali/dir-raidtest/5", 2, 3, targetReplication); LOG.info("Test testFileListDirRaidPolicy created test files"); FSDataOutputStream dirOut = fileSys.create(dirListPath); FileStatus[] dirs = fileSys.listStatus(new Path("/user/rvadali/dir-raidtest")); for (FileStatus dir: dirs) { dirOut.write(dir.getPath().toString().getBytes()); dirOut.write("\n".getBytes()); } dirOut.close(); cnode = RaidNode.createRaidNode(conf); PolicyInfo[] infos = cnode.getAllPolicies(); assertEquals("we should have only one policy", 1, infos.length); PolicyInfo info = infos[0]; cnode.triggerMonitor.putPolicyInfo(info); List<FileStatus> list1 = cnode.triggerMonitor.readFileList(info); assertEquals("Only one directory is selected", 1, list1.size()); assertEquals("/user/rvadali/dir-raidtest/1", list1.get(0).getPath().toUri().getPath()); List<FileStatus> list2 = cnode.triggerMonitor.readFileList(info); assertEquals("Only one directory is selected", 1, list2.size()); assertEquals("/user/rvadali/dir-raidtest/2", list2.get(0).getPath().toUri().getPath()); List<FileStatus> list3 = cnode.triggerMonitor.readFileList(info); assertEquals("Only 3 directories are selected", 3, list3.size()); assertEquals("/user/rvadali/dir-raidtest/3", list3.get(0).getPath().toUri().getPath()); assertEquals("/user/rvadali/dir-raidtest/4", list3.get(1).getPath().toUri().getPath()); assertEquals("/user/rvadali/dir-raidtest/5", list3.get(2).getPath().toUri().getPath()); List<FileStatus> list4 = cnode.triggerMonitor.readFileList(info); assertEquals("None is selected", 0, list4.size()); LOG.info("Test testFileListDirRaidPolicy successful."); } catch (Exception e) { LOG.info("testFileListDirRaidPolicy Exception ", e); throw e; } finally { if (cnode != null) { cnode.stop(); cnode.join(); } stopClusters(); } LOG.info("Test testFileListDirRaidPolicy completed."); } /** * Test to run a filter */ public void testPathFilter() throws Exception { LOG.info("Test testPathFilter started."); long blockSizes [] = {1024L}; int stripeLengths [] = {5, 6, 10, 11, 12}; int targetReplication = 1; int metaReplication = 1; int numBlock = 11; int iter = 0; createClusters(true, false); try { for (long blockSize : blockSizes) { for (int stripeLength : stripeLengths) { this.loadTestCodecs(conf, stripeLength, stripeLength, 1, 3); doTestPathFilter(iter, targetReplication, metaReplication, stripeLength, blockSize, numBlock); iter++; } } doCheckPolicy(); } finally { stopClusters(); } LOG.info("Test testPathFilter completed."); } private void simulateErrors(RaidShell shell, Path file1, long crc, long blockSize, long numBlock, long stripeLength) throws IOException { if (numBlock >= 1) { LOG.info("doTestPathFilter Check error at beginning of file."); simulateError(shell, fileSys, file1, crc, 0); } // check for error at the beginning of second block if (numBlock >= 2) { LOG.info("doTestPathFilter Check error at beginning of second block."); simulateError(shell, fileSys, file1, crc, blockSize + 1); } // check for error at the middle of third block if (numBlock >= 3) { LOG.info("doTestPathFilter Check error at middle of third block."); simulateError(shell, fileSys, file1, crc, 2 * blockSize + 10); } // check for error at the middle of second stripe if (numBlock >= stripeLength + 1) { LOG.info("doTestPathFilter Check error at middle of second stripe."); simulateError(shell, fileSys, file1, crc, stripeLength * blockSize + 100); } } /** * Test to run a filter */ private void doTestPathFilter(int iter, long targetReplication, long metaReplication, long stripeLength, long blockSize, int numBlock) throws Exception { LOG.info("doTestPathFilter started---------------------------:" + " iter " + iter + " blockSize=" + blockSize + " stripeLength=" + stripeLength); ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addPolicy("policy1", "/user/dhruba/raidtest", targetReplication, metaReplication); cb.addPolicy("policy2", "/user/dhruba/dir-raidtest", targetReplication, metaReplication, "dir-xor"); cb.persist(); RaidShell shell = null; Path dir = new Path("/user/dhruba/raidtest/"); Path file1 = new Path(dir + "/file" + iter); Path dir1 = new Path("/user/dhruba/dir-raidtest/1"); Path file2 = new Path(dir1 + "/file2"); Path file3 = new Path(dir1 + "/file3"); RaidNode cnode = null; try { Path destPath = new Path("/raid/user/dhruba/raidtest"); Path destPath1 = new Path("/dir-raid/user/dhruba/dir-raidtest"); fileSys.delete(dir, true); fileSys.delete(destPath, true); fileSys.delete(dir1, true); fileSys.delete(destPath1, true); long crc1 = createOldFile(fileSys, file1, 1, numBlock, blockSize); long crc2 = createOldFile(fileSys, file2, 1, numBlock, blockSize); long crc3 = createOldFile(fileSys, file3, 1, numBlock, blockSize); LOG.info("doTestPathFilter created test files for iteration " + iter); // create an instance of the RaidNode Configuration localConf = new Configuration(conf); cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath); TestRaidDfs.waitForDirRaided(LOG, fileSys, dir1, destPath1); LOG.info("doTestPathFilter all files found in Raid."); // check for error at beginning of file shell = new RaidShell(conf); shell.initializeRpc(conf, cnode.getListenerAddress()); this.simulateErrors(shell, file1, crc1, blockSize, numBlock, stripeLength); this.simulateErrors(shell, file2, crc2, blockSize, numBlock, stripeLength); this.simulateErrors(shell, file3, crc3, blockSize, numBlock, stripeLength); } catch (Exception e) { LOG.info("doTestPathFilter Exception ", e); throw e; } finally { if (shell != null) shell.close(); if (cnode != null) { cnode.stop(); cnode.join(); } LOG.info("doTestPathFilter clean up" ); fileSys.delete(dir, true); fileSys.delete(new Path("/raid"), true); fileSys.delete(dir1, true); fileSys.delete(new Path("/dir-raid"), true); } LOG.info("doTestPathFilter completed:" + " blockSize=" + blockSize + " stripeLength=" + stripeLength); } // Check that raid occurs only on files that have a replication factor // greater than or equal to the specified value private void doCheckPolicy() throws Exception { LOG.info("doCheckPolicy started---------------------------:"); short srcReplication = 3; short targetReplication = 2; long metaReplication = 1; long stripeLength = 2; long blockSize = 1024; int numBlock = 3; ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addPolicy("policy1", "/user/dhruba/policytest", targetReplication, metaReplication); cb.addPolicy("policy2", "/user/dhruba/dir-policytest", targetReplication, metaReplication, "dir-xor"); cb.persist(); Path dir = new Path("/user/dhruba/policytest/"); Path dir1 = new Path("/user/dhruba/dir-policytest/1"); Path file1 = new Path(dir + "/file1"); Path file2 = new Path(dir1 + "/file2"); Path file3 = new Path(dir1 + "/file3"); Path file4 = new Path(dir1 + "/file4"); RaidNode cnode = null; try { Path destPath = new Path("/raid/user/dhruba/policytest"); Path destPath1 = new Path("/dir-raid/user/dhruba/dir-policytest"); fileSys.delete(dir, true); fileSys.delete(destPath, true); fileSys.delete(dir1, true); fileSys.delete(destPath1, true); // create an instance of the RaidNode Configuration localConf = new Configuration(conf); cnode = RaidNode.createRaidNode(null, localConf); // this file should be picked up RaidNode createOldFile(fileSys, file1, 3, numBlock, blockSize); createOldFile(fileSys, file2, 3, numBlock, blockSize); createOldFile(fileSys, file3, 3, numBlock, blockSize); TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath, targetReplication); TestRaidDfs.waitForDirRaided(LOG, fileSys, dir1, destPath1, targetReplication); LOG.info("doCheckPolicy all files found in Raid the first time."); LOG.info("doCheckPolicy: recreating source file"); long firstmodetime1 = fileSys.getFileStatus(file1).getModificationTime(); createOldFile(fileSys, file1, 3, numBlock, blockSize); assertTrue(fileSys.getFileStatus(file1).getModificationTime() > firstmodetime1); LOG.info("Change the modification time of directory"); long firstmodetime2 = fileSys.getFileStatus(dir1).getModificationTime(); createOldFile(fileSys, file4, 3, numBlock, blockSize); assertTrue(fileSys.getFileStatus(dir1).getModificationTime() > firstmodetime2); TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath, targetReplication); TestRaidDfs.waitForDirRaided(LOG, fileSys, dir1, destPath1, targetReplication); LOG.info("doCheckPolicy: file got re-raided as expected."); } catch (Exception e) { LOG.info("doCheckPolicy Exception ", e); throw e; } finally { if (cnode != null) { cnode.stop(); cnode.join(); } LOG.info("doTestPathFilter clean up"); fileSys.delete(dir, true); fileSys.delete(new Path("/raid"), true); fileSys.delete(dir1, true); fileSys.delete(new Path("/dir-raid"), true); } LOG.info("doCheckPolicy completed:"); } static public void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile, int nblock) throws IOException { createTestFiles(fileSys, path, destpath, nfile, nblock, (short)1); } static void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile, int nblock, short repl) throws IOException { long blockSize = 1024L; Path dir = new Path(path); Path destPath = new Path(destpath); fileSys.delete(dir, true); fileSys.delete(destPath, true); for(int i = 0 ; i < nfile; i++){ Path file = new Path(dir, "file" + i); createOldFile(fileSys, file, repl, nblock, blockSize); } } private void checkTestFiles(String srcDir, String parityDir, int stripeLength, short targetReplication, short metaReplication, PlacementMonitor pm, Codec codec, int nfiles) throws IOException, InterruptedException { for(int i = 0 ; i < nfiles; i++){ Path srcPath = new Path(srcDir, "file" + i); Path parityPath = null; if (codec.isDirRaid) { parityPath = new Path(parityDir); TestRaidDfs.waitForDirRaided(LOG, fileSys, srcPath.getParent(), parityPath.getParent(), targetReplication); } else { parityPath = new Path(parityDir, "file" + i); TestRaidDfs.waitForFileRaided(LOG, fileSys, srcPath, parityPath.getParent(), targetReplication); } TestRaidDfs.waitForReplicasReduction(fileSys, parityPath, targetReplication); FileStatus srcFile = fileSys.getFileStatus(srcPath); FileStatus parityStat = fileSys.getFileStatus(parityPath); assertEquals(srcFile.getReplication(), targetReplication); assertEquals(parityStat.getReplication(), metaReplication); List<BlockInfo> parityBlocks = pm.getBlockInfos(fileSys, parityStat); int parityLength = codec.parityLength; if (parityLength == 1) { continue; } if (codec.isDirRaid && i > 0) { // One directory has one parity, just need to check once continue; } long numBlocks; if (codec.isDirRaid) { List<FileStatus> lfs = RaidNode.listDirectoryRaidFileStatus(conf, fileSys, new Path(srcDir)); numBlocks = DirectoryStripeReader.getBlockNum(lfs); } else { numBlocks = RaidNode.numBlocks(srcFile); } int numStripes = (int)RaidNode.numStripes(numBlocks, stripeLength); Map<String, Integer> nodeToNumBlocks = new HashMap<String, Integer>(); Set<String> nodesInThisStripe = new HashSet<String>(); for (int stripeIndex = 0; stripeIndex < numStripes; ++stripeIndex) { List<BlockInfo> stripeBlocks = new ArrayList<BlockInfo>(); // Adding parity blocks int stripeStart = parityLength * stripeIndex; int stripeEnd = Math.min( stripeStart + parityLength, parityBlocks.size()); if (stripeStart < stripeEnd) { stripeBlocks.addAll(parityBlocks.subList(stripeStart, stripeEnd)); } PlacementMonitor.countBlocksOnEachNode(stripeBlocks, nodeToNumBlocks, nodesInThisStripe); LOG.info("file: " + parityPath + " stripe: " + stripeIndex); int max = 0; for (String node: nodeToNumBlocks.keySet()) { int count = nodeToNumBlocks.get(node); LOG.info("node:" + node + " count:" + count); if (max < count) { max = count; } } assertTrue("pairty blocks in a stripe cannot live in the same node", max<parityLength); } } } /** * Test dist Raid */ public void testDistRaid() throws Exception { LOG.info("Test testDistRaid started."); short targetReplication = 2; short metaReplication = 2; short rstargetReplication = 1; short rsmetaReplication = 1; short xorstripeLength = 3; int rsstripeLength = 10; createClusters(false, true); ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addPolicy("policy1", "/user/dhruba/raidtest", targetReplication, metaReplication); cb.addAbstractPolicy("abstractPolicy", targetReplication, metaReplication, "xor"); cb.addPolicy("policy2", "/user/dhruba/raidtest2", "abstractPolicy"); cb.addPolicy("policy3", "/user/dhruba/raidtest3", rstargetReplication, rsmetaReplication, "rs"); cb.addPolicy("policy4", "/user/dhruba/dir-raidtest/", targetReplication, metaReplication, "dir-xor"); cb.addPolicy("policy5", "/user/dhruba/dir-raidtestrs/", rstargetReplication, rsmetaReplication, "dir-rs"); cb.persist(); RaidNode cnode = null; try { createTestFiles(fileSys, "/user/dhruba/raidtest/", "/raid/user/dhruba/raidtest", 5, 7); createTestFiles(fileSys, "/user/dhruba/raidtest2/", "/raid/user/dhruba/raidtest2", 5, 7); createTestFiles(fileSys, "/user/dhruba/raidtest3/", "/raidrs/user/dhruba/raidtest3", 1, 10); createTestFiles(fileSys, "/user/dhruba/dir-raidtest/1/", "/dir-raid/user/dhruba/dir-raidtest/1", 5, 7); createTestFiles(fileSys, "/user/dhruba/dir-raidtestrs/2/", "/dir-raidrs/user/dhruba/dir-raidtestrs/2", 2, 8); LOG.info("Test testDistRaid created test files"); Configuration localConf = new Configuration(conf); //Avoid block mover to move blocks localConf.setInt(PlacementMonitor.BLOCK_MOVE_QUEUE_LENGTH_KEY, 0); localConf.setInt(PlacementMonitor.NUM_MOVING_THREADS_KEY, 1); // don't allow rescan, make sure only one job is submitted. localConf.setLong("raid.policy.rescan.interval", 3600 * 1000L); cnode = RaidNode.createRaidNode(null, localConf); // Verify the policies are parsed correctly for (PolicyInfo p: cnode.getAllPolicies()) { if (p.getName().equals("policy1")) { Path srcPath = new Path("/user/dhruba/raidtest"); assertTrue(p.getSrcPath().equals( srcPath.makeQualified(srcPath.getFileSystem(conf)))); } else if (p.getName().equals("policy2")) { Path srcPath = new Path("/user/dhruba/raidtest2"); assertTrue(p.getSrcPath().equals( srcPath.makeQualified(srcPath.getFileSystem(conf)))); } else if (p.getName().equals("policy3")){ Path srcPath = new Path("/user/dhruba/raidtest3"); assertTrue(p.getSrcPath().equals( srcPath.makeQualified(srcPath.getFileSystem(conf)))); } else if (p.getName().equals("policy4")) { Path srcPath = new Path("/user/dhruba/dir-raidtest/"); assertTrue(p.getSrcPath().equals( srcPath.makeQualified(srcPath.getFileSystem(conf)))); } else { assertEquals(p.getName(), "policy5"); Path srcPath = new Path("/user/dhruba/dir-raidtestrs/"); assertTrue(p.getSrcPath().equals( srcPath.makeQualified(srcPath.getFileSystem(conf)))); } if (p.getName().equals("policy3") || p.getName().equals("policy5")) { if (p.getName().equals("policy3")) { assertTrue(p.getCodecId().equals("rs")); } else { assertTrue(p.getCodecId().equals("dir-rs")); } assertEquals(rstargetReplication, Integer.parseInt(p.getProperty("targetReplication"))); assertEquals(rsmetaReplication, Integer.parseInt(p.getProperty("metaReplication"))); } else { if (p.getName().equals("policy4")) { assertTrue(p.getCodecId().equals("dir-xor")); } else { assertTrue(p.getCodecId().equals("xor")); } assertEquals(targetReplication, Integer.parseInt(p.getProperty("targetReplication"))); assertEquals(metaReplication, Integer.parseInt(p.getProperty("metaReplication"))); } } long start = System.currentTimeMillis(); final int MAX_WAITTIME = 120000; assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode); DistRaidNode dcnode = (DistRaidNode) cnode; checkTestFiles("/user/dhruba/raidtest/", "/raid/user/dhruba/raidtest", xorstripeLength, targetReplication, metaReplication, dcnode.placementMonitor, Codec.getCodec("xor"), 5); checkTestFiles("/user/dhruba/raidtest2/", "/raid/user/dhruba/raidtest2", xorstripeLength, targetReplication, metaReplication, dcnode.placementMonitor, Codec.getCodec("xor"), 5); checkTestFiles("/user/dhruba/raidtest3/", "/raidrs/user/dhruba/raidtest3", rsstripeLength, rstargetReplication, rsmetaReplication, dcnode.placementMonitor, Codec.getCodec("rs"), 1); checkTestFiles("/user/dhruba/dir-raidtest/1/", "/dir-raid/user/dhruba/dir-raidtest/1", xorstripeLength, targetReplication, metaReplication, dcnode.placementMonitor, Codec.getCodec("dir-xor"), 5); checkTestFiles("/user/dhruba/dir-raidtestrs/2/", "/dir-raidrs/user/dhruba/dir-raidtestrs/2", rsstripeLength, rstargetReplication, rsmetaReplication, dcnode.placementMonitor, Codec.getCodec("dir-rs"), 2); while (dcnode.jobMonitor.runningJobsCount() > 0 && System.currentTimeMillis() - start < MAX_WAITTIME) { LOG.info("Waiting for zero running jobs: " + dcnode.jobMonitor.runningJobsCount()); Thread.sleep(1000); } TestBlockFixer.verifyMetrics(fileSys, cnode, LOGTYPES.ENCODING, LOGRESULTS.SUCCESS, 5 + 5 + 1 + 1 + 1, true); LOG.info("Test testDistRaid successful."); } catch (Exception e) { LOG.info("testDistRaid Exception " + e + StringUtils.stringifyException(e)); throw e; } finally { if (cnode != null) { cnode.stop(); cnode.join(); } stopClusters(); } LOG.info("Test testDistRaid completed."); } // // simulate a corruption at specified offset and verify that eveyrthing is good // void simulateError(RaidShell shell, FileSystem fileSys, Path file1, long crc, long corruptOffset) throws IOException { // recover the file assuming that we encountered a corruption at offset 0 String[] args = new String[3]; args[0] = "-recover"; args[1] = file1.toString(); args[2] = Long.toString(corruptOffset); Path recover1 = shell.recover(args[0], args, 1)[0]; // compare that the recovered file is identical to the original one LOG.info("Comparing file " + file1 + " with recovered file " + recover1); validateFile(fileSys, file1, recover1, crc); fileSys.delete(recover1, false); } // // creates a file and populate it with random data. Returns its crc. // static long createOldFile(FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize) throws IOException { CRC32 crc = new CRC32(); FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, blocksize); // fill random data into file byte[] b = new byte[(int)blocksize]; for (int i = 0; i < numBlocks; i++) { if (i == (numBlocks-1)) { b = new byte[(int)blocksize/2]; } rand.nextBytes(b); stm.write(b); crc.update(b); } stm.close(); return crc.getValue(); } // // validates that file matches the crc. // private void validateFile(FileSystem fileSys, Path name1, Path name2, long crc) throws IOException { FileStatus stat1 = fileSys.getFileStatus(name1); FileStatus stat2 = fileSys.getFileStatus(name2); assertTrue(" Length of file " + name1 + " is " + stat1.getLen() + " is different from length of file " + name1 + " " + stat2.getLen(), stat1.getLen() == stat2.getLen()); CRC32 newcrc = new CRC32(); FSDataInputStream stm = fileSys.open(name2); final byte[] b = new byte[4192]; int num = 0; while (num >= 0) { num = stm.read(b); if (num < 0) { break; } newcrc.update(b, 0, num); } stm.close(); if (newcrc.getValue() != crc) { fail("CRC mismatch of files " + name1 + " with file " + name2); } } public void testSuspendTraversal() throws Exception { LOG.info("Test testSuspendTraversal started."); long targetReplication = 2; long metaReplication = 2; createClusters(false, false); ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addPolicy("policy1", "/user/dhruba/raidtest", targetReplication, metaReplication); cb.persist(); RaidNode cnode = null; try { for(int i = 0; i < 6; i++){ Path file = new Path("/user/dhruba/raidtest/dir" + i + "/file" + i); createOldFile(fileSys, file, 1, 7, 1024L); } LOG.info("Test testSuspendTraversal created test files"); Configuration localConf = new Configuration(conf); localConf.setInt("raid.distraid.max.jobs", 3); localConf.setInt("raid.distraid.max.files", 2); localConf.setInt("raid.directorytraversal.threads", 1); localConf.setBoolean(StatisticsCollector.STATS_COLLECTOR_SUBMIT_JOBS_CONFIG, false); // don't allow rescan, make sure only one job is submitted. localConf.setLong("raid.policy.rescan.interval", 3600 * 1000L); // 6 test files: 3 jobs with 2 files each. final int numJobsExpected = 3; cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); final int MAX_WAITTIME = 300000; assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode); DistRaidNode dcnode = (DistRaidNode) cnode; start = System.currentTimeMillis(); while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected && System.currentTimeMillis() - start < MAX_WAITTIME) { LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() + " to reach " + numJobsExpected); Thread.sleep(3000); } // Wait for any running jobs to finish. start = System.currentTimeMillis(); while (dcnode.jobMonitor.runningJobsCount() > 0 && System.currentTimeMillis() - start < MAX_WAITTIME) { LOG.info("Waiting for zero running jobs: " + dcnode.jobMonitor.runningJobsCount()); Thread.sleep(1000); } assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored()); assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded()); LOG.info("Test testSuspendTraversal successful."); TestBlockFixer.verifyMetrics(fileSys, cnode, LOGTYPES.ENCODING, LOGRESULTS.SUCCESS, 6L, true); } catch (Exception e) { LOG.info("testSuspendTraversal Exception " + e + StringUtils.stringifyException(e)); throw e; } finally { if (cnode != null) { cnode.stop(); cnode.join(); } stopClusters(); } LOG.info("Test testSuspendTraversal completed."); } public void testFileListPolicy() throws Exception { LOG.info("Test testFileListPolicy started."); short targetReplication = 2; short metaReplication = 2; long stripeLength = 3; short srcReplication = 3; createClusters(false, false); ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE); cb.addAbstractPolicy("abstractPolicy", targetReplication, metaReplication, "xor"); cb.addFileListPolicy("policy2", "/user/rvadali/raidfilelist.txt", "abstractPolicy"); cb.addAbstractPolicy("abstractPolicy2", targetReplication, metaReplication, "dir-xor"); cb.addFileListPolicy("policy3", "/user/rvadali/raiddirlist.txt", "abstractPolicy2"); cb.persist(); RaidNode cnode = null; Path fileListPath = new Path("/user/rvadali/raidfilelist.txt"); Path dirListPath = new Path("/user/rvadali/raiddirlist.txt"); try { createTestFiles(fileSys, "/user/rvadali/raidtest/", "/raid/user/rvadali/raidtest", 5, 7, srcReplication); createTestFiles(fileSys, "/user/rvadali/dir-raidtest/1/", "/dir-raid/user/rvadali/dir-raidtest/1", 2, 7, srcReplication); // althought the files reach the target replication, because // they don't have parities, we are still going to raid them. createTestFiles(fileSys, "/user/rvadali/dir-raidtest/2/", "/dir-raid/user/rvadali/dir-raidtest/2", 2, 7, targetReplication); createTestFiles(fileSys, "/user/dhruba/unraidable/", "/raid/user/dhruba/unradiable/", 2, 1, srcReplication); // generate parities files for the following directory, raidnode // won't raid them again Path srcDir3 = new Path("/user/rvadali/dir-raidtest/3/"); Path destPath3 = new Path("/dir-raid/user/rvadali/dir-raidtest/3"); createTestFiles(fileSys, srcDir3.toString(), destPath3.toString(), 2, 4, targetReplication); Codec dirCode = Codec.getCodec("dir-xor"); FileStatus srcStat3 = fileSys.getFileStatus(srcDir3); assertTrue(RaidNode.doRaid(conf, srcStat3, new Path(dirCode.parityDirectory), dirCode, new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE, false, targetReplication, metaReplication)); FileStatus parityStat3 = fileSys.getFileStatus(destPath3); assertEquals(parityStat3.getModificationTime(), srcStat3.getModificationTime()); assertEquals(parityStat3.getReplication(), metaReplication); LOG.info("Test testFileListPolicy created test files"); // Create list of files to raid. FSDataOutputStream out = fileSys.create(fileListPath); FileStatus[] files = fileSys.listStatus(new Path("/user/rvadali/raidtest")); for (FileStatus f: files) { out.write(f.getPath().toString().getBytes()); out.write("\n".getBytes()); } // write directory, we should filter it. out.write("/user/rvadali/raidtest/\n".getBytes()); // small file, we should filter it out.write("/user/dhruba/unraidable/file0\n".getBytes()); out.write("/user/dhruba/unraidable/file1\n".getBytes()); out.close(); FSDataOutputStream dirOut = fileSys.create(dirListPath); FileStatus[] dirs = fileSys.listStatus(new Path("/user/rvadali/dir-raidtest")); for (FileStatus dir: dirs) { dirOut.write(dir.getPath().toString().getBytes()); dirOut.write("\n".getBytes()); } // write file, we should filter it dirOut.write("/user/rvadali/raidtest/file0\n".getBytes()); // small directory, we should filter it out.write("/user/dhruba/unraidable\n".getBytes()); dirOut.close(); Configuration localConf = new Configuration(conf); // don't allow rescan, make sure only one job is submitted. localConf.setLong("raid.policy.rescan.interval", 3600 * 1000L); cnode = RaidNode.createRaidNode(localConf); final int MAX_WAITTIME = 120000; DistRaidNode dcnode = (DistRaidNode) cnode; long start = System.currentTimeMillis(); int numJobsExpected = 2; while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected && System.currentTimeMillis() - start < MAX_WAITTIME) { LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() + " to reach " + numJobsExpected); Thread.sleep(1000); } assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored()); assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded()); Path destPath = new Path("/raid/user/rvadali/raidtest"); for (FileStatus file : files) { TestRaidDfs.waitForFileRaided(LOG, fileSys, file.getPath(), destPath, targetReplication); } Path destPath1 = new Path("/dir-raid/user/rvadali/dir-raidtest"); for (FileStatus dir : dirs) { TestRaidDfs.waitForDirRaided(LOG, fileSys, dir.getPath(), destPath1, targetReplication); } assertTrue(!fileSys.exists(new Path("/raid/user/dhruba/unraidable/file0"))); assertTrue(!fileSys.exists(new Path("/raid/user/dhruba/unraidable/file1"))); assertTrue(!fileSys.exists(new Path("/dir-raid/user/dhruba/unraidable"))); TestBlockFixer.verifyMetrics(fileSys, cnode, LOGTYPES.ENCODING, LOGRESULTS.SUCCESS, 7L, true); Map<String, Counters> raidProgress = dcnode.jobMonitor.getRaidProgress(); long succeedFiles = 0; for (Counters ctrs: raidProgress.values()) { Counters.Counter ctr = ctrs.findCounter(Counter.FILES_SUCCEEDED); succeedFiles += ctr.getValue(); } // We have one raided directory, so it's total - 1; assertEquals(succeedFiles, files.length + dirs.length -1); LOG.info("Test testFileListPolicy successful."); } catch (Exception e) { LOG.info("testFileListPolicy Exception " + e + StringUtils.stringifyException(e)); throw e; } finally { if (cnode != null) { cnode.stop(); cnode.join(); } stopClusters(); } LOG.info("Test testFileListPolicy completed."); } }