/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.junit.After;
public class TestRaidNodeMetrics extends TestCase {
final static String TEST_DIR =
new File(System.
getProperty("test.build.data", "build/contrib/raid/test/data")).
getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").
getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static int NUM_DATANODES = 6;
final static int STRIPE_BLOCKS = 5; // number of file blocks per stripe
final static int PARITY_BLOCKS = 3; // number of file blocks per stripe for RS
final static int FILE_BLOCKS = 6; // number of blocks that file consists of
final static short REPL = 1; // replication factor before raiding
final static long BLOCK_SIZE = 8192L; // size of block in byte
final static String DIR_PATH = "/user/rashmikv/raidtest";
final static Path FILE_PATH0 =
new Path("/user/rashmikv/raidtest/file0.test");
final static String MONITOR_DIRS = "/";
/*to test RS:
* Confirm that codec is set to "rs" in the setUp function
*/
final static Path RAID_PATH = new Path("/raidrs/user/rashmikv/raidtest");
final static String RAID_DIR = "/raidrs";
final static String CODE_USED ="rs";
String[] racks = {"/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6"};
String[] hosts= {"host1.rack1.com", "host2.rack2.com", "host3.rack3.com",
"host4.rack4.com", "host5.rack5.com", "host6.rack6.com"};
final int taskTrackers = 4;
Configuration conf = null;
Configuration raidConf = null;
Configuration clientConf = null;
MiniDFSCluster cluster = null;
DistributedFileSystem dfs = null;
RaidNode rnode = null;
MiniMRCluster mr = null;
String jobTrackerName = null;
String hftp = null;
Codec codec;
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestRaidNodeMetrics");
private void setUp(boolean doHar) throws IOException, ClassNotFoundException {
LOG.info("Setting up");
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
Utils.loadTestCodecs(conf, STRIPE_BLOCKS, 1, PARITY_BLOCKS, "/raid", "/raidrs");
codec = Codec.getCodec("rs");
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockIntegrityMonitor");
conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort());
conf.set("mapred.raid.http.address", "localhost:0");
conf.setInt("dfs.corruptfilesreturned.max", 500);
conf.setBoolean("dfs.permissions", false);
conf.set("raid.corruptfile.counter.dirs", MONITOR_DIRS);
conf.setInt("raid.corruptfilecount.interval", 1000);
cluster = new MiniDFSCluster(conf, 6, true, racks, hosts);
cluster.waitActive();
dfs = (DistributedFileSystem) cluster.getFileSystem();
String namenode = dfs.getUri().toString();
mr = new MiniMRCluster(taskTrackers, namenode, 6);
jobTrackerName = "localhost:" + mr.getJobTrackerPort();
hftp = "hftp://localhost.localdomain:" + cluster.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
//Change the codec name: rs of xor here:
String str =
"<configuration> " +
" <policy name = \"RaidTest1\"> " +
" <srcPath prefix=\"" + DIR_PATH + "\"/> " +
" <codecId>rs</codecId> " +
" <destPath> " + RAID_DIR + " </destPath> " +
" <property> " +
" <name>targetReplication</name> " +
" <value>1</value> " +
" <description>after RAIDing, decrease the replication " +
"factor of a file to this value.</description> " +
" </property> " +
" <property> " +
" <name>metaReplication</name> " +
" <value>1</value> " +
" <description> replication factor of parity file</description> " +
" </property> " +
" <property> " +
" <name>modTimePeriod</name> " +
" <value>2000</value> " +
" <description>time (milliseconds) after a file is modified " +
"to make it a candidate for RAIDing</description> " +
" </property> ";
str +=
" </policy>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
TestRaidDfs.createTestFile(dfs, FILE_PATH0, REPL, FILE_BLOCKS, BLOCK_SIZE);
//Path[] filePaths = { FILE_PATH0 };
RaidNode.doRaid(conf, dfs.getFileStatus(FILE_PATH0),
new Path(RAID_DIR), codec, new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE,
false, REPL, REPL);
try {
TestRaidDfs.waitForFileRaided(LOG, dfs, FILE_PATH0, RAID_PATH);
} catch (FileNotFoundException ignore) {
} catch (InterruptedException ignore) {
}
//raidTestFiles(RAID_PATH, filePaths, doHar);
clientConf = new Configuration(conf);
clientConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
clientConf.set("fs.raid.underlyingfs.impl",
"org.apache.hadoop.hdfs.DistributedFileSystem");
}
/**
* removes a file block in the specified stripe
*/
private void removeFileBlock(Path filePath, int stripe, int blockInStripe)
throws IOException {
LocatedBlocks fileBlocks = dfs.getClient().namenode.
getBlockLocations(filePath.toString(), 0, FILE_BLOCKS * BLOCK_SIZE);
if (fileBlocks.locatedBlockCount() != FILE_BLOCKS) {
throw new IOException("expected " + FILE_BLOCKS +
" file blocks but found " +
fileBlocks.locatedBlockCount());
}
if (blockInStripe >= STRIPE_BLOCKS) {
throw new IOException("blockInStripe is " + blockInStripe +
" but must be smaller than " + STRIPE_BLOCKS);
}
LocatedBlock block = fileBlocks.get(stripe * STRIPE_BLOCKS + blockInStripe);
removeAndReportBlock(dfs, filePath, block);
LOG.info("removed file " + filePath.toString() + " block " +
stripe * STRIPE_BLOCKS + " in stripe " + stripe);
}
/**
* removes a specified block from MiniDFS storage and reports it as corrupt
*/
private void removeAndReportBlock(DistributedFileSystem blockDfs,
Path filePath,
LocatedBlock block)
throws IOException {
TestRaidDfs.corruptBlock(filePath, block.getBlock(),
NUM_DATANODES, true, cluster);
// report deleted block to the name node
LocatedBlock[] toReport = { block };
blockDfs.getClient().namenode.reportBadBlocks(toReport);
}
/**
* sleeps for up to 20s until the number of corrupt files
* in the file system is equal to the number specified
*/
private void waitUntilCorruptFileCount(DistributedFileSystem dfs,
int corruptFiles)
throws IOException {
int initialCorruptFiles = DFSUtil.getCorruptFiles(dfs).length;
long waitStart = System.currentTimeMillis();
while (DFSUtil.getCorruptFiles(dfs).length != corruptFiles) {
try {
Thread.sleep(1000);
} catch (InterruptedException ignore) {
}
if (System.currentTimeMillis() > waitStart + 20000L) {
break;
}
}
long waited = System.currentTimeMillis() - waitStart;
int corruptFilesFound = DFSUtil.getCorruptFiles(dfs).length;
if (corruptFilesFound != corruptFiles) {
throw new IOException("expected " + corruptFiles +
" corrupt files but got " +
corruptFilesFound);
}
}
@After
public void tearDown() throws Exception {
if (rnode != null) {
rnode.stop();
rnode.join();
rnode = null;
}
if (cluster != null) {
cluster.shutdown();
cluster = null;
}
dfs = null;
LOG.info("Test cluster shut down");
}
public void testRaidNodeMetrics() {
LOG.info("testRaidNodeMetrics starting");
RaidNodeMetrics inst = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);
inst.filesFixed.inc();
inst.filesRaided.set(2);
inst.raidFailures.inc(3);
inst.doUpdates(inst.context);
LOG.info("testRaidNodeMetrics succeeded");
}
public void testRaidNodeMetricsBytesTransferred() throws Exception{
LOG.info("testRaidNodeMetricsBytesTransferred starting");
setUp(false);
try {
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 0, 0);
waitUntilCorruptFileCount(dfs, 1);
Configuration localConf = new Configuration(conf);
localConf.setInt("raid.blockfix.interval", 1000);
localConf.setInt("raid.blockcheck.interval", 1000);
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockIntegrityMonitor");
localConf.setLong("raid.blockfix.filespertask", 2L);
localConf.setLong("raid.blockfix.maxpendingjobs", 2L);
rnode = RaidNode.createRaidNode(null, localConf);
long start = System.currentTimeMillis();
while (rnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
System.currentTimeMillis() - start < 120000) {
LOG.info("Test testBlockFix waiting for files to be fixed.");
Thread.sleep(1000);
}
assertTrue("file not fixed", 1 <= rnode.blockIntegrityMonitor.getNumFilesFixed());
LOG.info("Checking Raid Node Metrics") ;
RaidNodeMetrics inst = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);
LOG.info("Number of bytes transferred across rack for repair in the current interval is : "+inst.numFileFixReadBytesRemoteRack.getCurrentIntervalValue());
LOG.info("Number of bytes transferred across rack for repair in the previous interval is : "+inst.numFileFixReadBytesRemoteRack.getPreviousIntervalValue());
} catch (Exception e) {
LOG.info("Test TestSimulationBlockFixer Exception " + e, e);
throw e;
} finally {
tearDown();
}
}
}