/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlockWithMetaInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.LocatedBlocksWithMetaInfo;
import org.apache.hadoop.hdfs.protocol.VersionedLocatedBlocks;
import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicy;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyFakeData;
import org.apache.hadoop.hdfs.server.namenode.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.Worker;
import org.apache.hadoop.raid.RaidNode;
import junit.framework.TestCase;
public class TestBlockCopier extends TestCase {
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestBlockCopier");
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"build/contrib/raid/test/data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static int STRIPE_LENGTH = 4;
final static int BLOCK_SIZE = 8192;
Configuration conf;
NameNode namenode = null;
MiniDFSCluster dfs = null;
String hftp = null;
MiniMRCluster mr = null;
DistributedFileSystem fileSys = null;
RaidNode raidnode = null;
String jobTrackerName = null;
Path hostsFile;
Path excludeFile;
ArrayList<String> decommissionedNodes = new ArrayList<String>();
Random rand = new Random();
static {
ParityFilePair.disableCacheUsedInTestOnly();
}
// Setup -- manually run before each test
public void setup(int numDataNodes, int timeBeforeHar)
throws IOException, ClassNotFoundException {
if (System.getProperty("hadoop.log.dir") == null) {
String base = new File(".").getAbsolutePath();
System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs");
}
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.server.address", "localhost:0");
Utils.loadTestCodecs(conf, STRIPE_LENGTH, 1, 3, "/raid", "/raidrs");
conf.setBoolean("dfs.permissions", false);
// Prevent the namenode from replicating files
conf.setInt("dfs.replication.interval", Integer.MAX_VALUE);
conf.setClass("dfs.block.replicator.classname",
BlockPlacementPolicyFakeData.class, BlockPlacementPolicy.class);
// Set the class which is used for the copying operation
//conf.setClass("raid.blockcopier.class", c, BlockCopyHelper.class);
// Set up the mini-cluster
dfs = new MiniDFSCluster(conf, numDataNodes, true, null, false);
dfs.waitActive();
fileSys = (DistributedFileSystem) dfs.getFileSystem();
namenode = dfs.getNameNode();
String namenodeRoot = fileSys.getUri().toString();
FileSystem.setDefaultUri(conf, namenodeRoot);
mr = new MiniMRCluster(4, namenodeRoot, 3);
jobTrackerName = "localhost:" + mr.getJobTrackerPort();
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
//FileSystem.setDefaultUri(conf, namenodeRoot);
conf.set("mapred.job.tracker", jobTrackerName);
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
String str = "<configuration> " +
"<policy name = \"RaidTest1\"> " +
"<srcPath prefix=\"/user/hadoop/raidtest\"/> " +
"<codecId>xor</codecId> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>1</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>1</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> ";
if (timeBeforeHar >= 0) {
str +=
"<property> " +
"<name>time_before_har</name> " +
"<value>" + timeBeforeHar + "</value> " +
"<description> amount of time waited before har'ing parity files" +
"</description> " +
"</property> ";
}
str +=
"</policy>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
// Set up raid node
if (raidnode == null) {
Configuration localConf = new Configuration(conf);
localConf.setInt("raid.blockfix.interval", 1000);
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockIntegrityMonitor");
localConf.setLong("raid.blockfix.filespertask", 2L);
try {
raidnode = RaidNode.createRaidNode(null, localConf);
} catch (ClassNotFoundException ex) {
ex.printStackTrace();
throw ex;
}
}
// Set up excludes file
excludeFile = new Path(TEST_DIR, "exclude");
conf.set("dfs.hosts.exclude", excludeFile.toUri().getPath());
conf.setInt("heartbeat.recheck.interval", 2000);
conf.setInt("dfs.heartbeat.interval", 1);
conf.setInt("dfs.replication.pending.timeout.sec", 4);
writeExcludesFileAndRefresh(null);
}
// Teardown -- manually run after each test
public void teardown() {
if (raidnode != null) {
raidnode.stop();
raidnode.join();
}
if (mr != null) { mr.shutdown(); }
if (dfs != null) { dfs.shutdown(); }
}
public void testDataSource() throws IOException, ClassNotFoundException {
final int numDatanodes = 5;
final int blocksPerFile = STRIPE_LENGTH;
setup(numDatanodes, -1);
// Make a many-block file with repl=2. This should guarantee that at least
// some blocks are on both decommissioned nodes.
Path filePath = new Path("/user/hadoop/testDataSource");
String[] fileNames = {"file0", "file1", "file2", "file3", "file4"};
long[][] crcs = new long[fileNames.length][];
FileStatus[] files = new FileStatus[fileNames.length];
createRandomFiles(filePath, fileNames, 2, blocksPerFile, crcs, files);
Worker bc =
((DistBlockIntegrityMonitor) raidnode.blockIntegrityMonitor).getDecommissioningMonitor();
for (FileStatus file : files) {
printFileLocations(file);
}
Set<String> downNodes = new HashSet<String>();
for (int i = 0; i < numDatanodes; i++) {
// Decommission a node and test the data source.
String downNode = decommissionOneNode();
downNodes.add(downNode);
// Compute which files have decommissioning blocks and how many
HashMap<String, Integer> decomFiles = new HashMap<String, Integer>();
for (FileStatus file : files) {
String path = file.getPath().toUri().getPath();
int decommissioningBlocks = 0;
BlockLocation[] locations =
fileSys.getFileBlockLocations(file, 0, file.getLen());
for (BlockLocation loc : locations) {
String[] names = loc.getNames();
if (downNodes.contains(names[0]) && downNodes.contains(names[1])) {
decommissioningBlocks++;
}
}
if (decommissioningBlocks > 0) {
decomFiles.put(path, decommissioningBlocks);
}
}
// Verify results
// FIXME: re-enable test when the underlying issue in fsck/namesystem is resolved
//assertEquals(decomFiles.keySet(), bf.getDecommissioningFiles().keySet());
}
// Un-decommission those nodes and test the data source again.
writeExcludesFileAndRefresh(null);
assertEquals(0, bc.getLostFiles().size());
// Done.
teardown();
}
public void testNameNodeBehavior()
throws IOException, ClassNotFoundException, InterruptedException {
setup(2, -1);
final int fileLenBlocks = STRIPE_LENGTH;
final int repl = 1;
// Get set up with datanode references
DatanodeInfo[] nodeInfos = namenode.getDatanodeReport(DatanodeReportType.ALL);
DatanodeDescriptor[] nodes = new DatanodeDescriptor[nodeInfos.length];
for (int i = 0; i < nodes.length; i++) {
nodes[i] = namenode.namesystem.getDatanode(nodeInfos[i]);
LOG.info("nodes[" + i + "]=" + nodes[i].getName());
}
// Create file with one block on nodes[1] and the rest on nodes[0]
Path raidPath = new Path("/raidrs");
Path filePath = new Path("/user/hadoop/testNameNodeBehavior/file");
long[] crc = createRandomFileDispersed(filePath, fileLenBlocks,
nodes[0], nodes[1]);
FileStatus file = fileSys.getFileStatus(filePath);
// Raid the file; parity blocks go on nodes[0]
BlockPlacementPolicyFakeData.lastInstance.overridingDatanode = nodes[0];
RaidNode.doRaid(conf, file, raidPath, Codec.getCodec("rs"),
new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE,
false, repl, repl);
Thread.sleep(1000);
printFileLocations(file);
BlockPlacementPolicyFakeData.lastInstance.overridingDatanode = null;
// Now decommission the second node
ArrayList<String> decommissioned = new ArrayList<String>();
decommissioned.add(nodes[1].getName());
writeExcludesFileAndRefresh(decommissioned);
// Wait for the BlockRegenerator to do its thing
long now = System.currentTimeMillis();
BlockIntegrityMonitor bf = raidnode.blockIntegrityMonitor;
while ((bf.getNumFilesCopied() == 0) && (bf.getNumFileCopyFailures() == 0)
&& ((System.currentTimeMillis() - now) < 30000)) {
LOG.info("Waiting for the BlockRegenerator to finish... ");
Thread.sleep(1000);
}
// Validate result
printFileLocations(file);
assertEquals(0, bf.getNumFileCopyFailures());
assertEquals(1, bf.getNumFilesCopied());
// No corrupt block fixing should have happened
assertEquals("corrupt block fixer unexpectedly performed fixing",
0, bf.getNumFilesFixed());
assertEquals("corrupt block fixer unexpectedly attempted fixing",
0, bf.getNumFileFixFailures());
validateFileCopy(fileSys, filePath, file.getLen(), crc, false);
teardown();
}
private String decommissionOneNode() throws IOException {
DFSClient client = ((DistributedFileSystem)fileSys).getClient();
DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
int index = 0;
boolean found = false;
while (!found) {
index = rand.nextInt(info.length);
if (!info[index].isDecommissioned() && !info[index].isDecommissionInProgress()) {
found = true;
}
}
String nodename = info[index].getName();
System.out.println("Decommissioning node: " + nodename);
// write nodename into the exclude file.
decommissionedNodes.add(nodename);
writeExcludesFileAndRefresh(decommissionedNodes);
return nodename;
}
private long[] createRandomFile(Path file, int repl, int numBlocks)
throws IOException {
long[] crcs = new long[numBlocks];
CRC32 crc = new CRC32();
Random rand = new Random();
FSDataOutputStream stm = fileSys.create(file, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, BLOCK_SIZE);
// Write whole blocks.
byte[] b = new byte[(int)BLOCK_SIZE];
for (int i = 1; i < numBlocks; i++) {
rand.nextBytes(b);
stm.write(b);
crc.update(b);
crcs[i-1] = crc.getValue();
crc.reset();
}
// Write partial block.
b = new byte[(int)BLOCK_SIZE/2 - 1];
rand.nextBytes(b);
stm.write(b);
crc.update(b);
crcs[crcs.length-1] = crc.getValue();
stm.close();
return crcs;//crc.getValue();
}
private static void printHexBufTmp(byte[] buf) {
System.out.print("0x");
for (int j = 0; j < buf.length; j++) {
System.out.print(String.format("%02X", buf[j]));
}
System.out.println();
}
private long[] createRandomFileDispersed(Path file, int numBlocks,
DatanodeDescriptor primaryNode, DatanodeDescriptor altNode)
throws IOException, InterruptedException {
BlockPlacementPolicyFakeData bp = BlockPlacementPolicyFakeData.lastInstance;
DatanodeDescriptor tmp = bp.overridingDatanode;
final int repl = 1;
long[] crcs = new long[numBlocks];
CRC32 crc = new CRC32();
Random rand = new Random();
FSDataOutputStream stm = fileSys.create(file, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, BLOCK_SIZE);
// Create the first block on the alt node
bp.overridingDatanode = altNode;
// fill random data into file
final byte[] b = new byte[(int)BLOCK_SIZE];
LOG.info("Writing first block (alt. host)");
rand.nextBytes(b);
stm.write(b);
crc.update(b);
crcs[0] = crc.getValue();
stm.flush();
Thread.sleep(1000); // What a hack. Le sigh.
// Now we want to write on the altNode
bp.overridingDatanode = primaryNode;
// Write the rest of the blocks on primaryNode
for (int i = 1; i < numBlocks; i++) {
LOG.info("Writing block number " + i + " (primary host)");
rand.nextBytes(b);
stm.write(b);
crc.reset();
crc.update(b);
crcs[i] = crc.getValue();
}
stm.close();
Thread.sleep(1000);
// Reset this guy
bp.overridingDatanode = tmp;
return crcs;
}
private void createRandomFiles(Path folder, String[] fileNames,
int repl, int numBlocks,
/*out*/ long[][] crcOut, FileStatus[] fileOut)
throws IOException {
int i = 0;
for (String name : fileNames) {
Path fPath = new Path(folder, name);
crcOut[i] = createRandomFile(fPath, repl, numBlocks);
fileOut[i++] = fileSys.getFileStatus(fPath);
}
}
private void writeExcludesFileAndRefresh(ArrayList<String> nodes)
throws IOException {
FileSystem fs = FileSystem.getLocal(conf);
LOG.info(fs);
// delete if it already exists
if (fs.exists(excludeFile)) {
fs.delete(excludeFile, true);
}
FSDataOutputStream stm = fs.create(excludeFile);
if (nodes != null) {
for (String node : nodes) {
stm.writeBytes(node);
stm.writeBytes("\n");
}
}
stm.close();
namenode.namesystem.refreshNodes(conf);
}
public void testReconstruction () throws Exception {
final int numBlocks = STRIPE_LENGTH + 1;
final int repl = 1;
setup(10, -1);
DistBlockIntegrityMonitor br = new DistBlockRegeneratorFake(conf);
Worker bc = br.getDecommissioningMonitor();
// Generate file
Path raidPath = new Path("/raidrs");
Path filePath = new Path("/user/hadoop/testReconstruction/file");
long[] crcs = createRandomFile(filePath, repl, numBlocks);
FileStatus file = fileSys.getFileStatus(filePath);
RaidNode.doRaid(conf, file, raidPath, Codec.getCodec("rs"),
new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE,
false, repl, repl);
// Do some testing
printFileLocations(file);
// We're gonna "decommission" the file
TestBlockCopier.decommissioningFiles =
new String[] { filePath.toUri().toString() };
// "Decommission" each of the file's blocks in turn
List<LocatedBlock> fileBlocks =
dfs.getNameNode().getBlockLocations(filePath.toUri().toString(),
0L,
file.getLen()).getLocatedBlocks();
for (LocatedBlock b : fileBlocks) {
TestBlockCopier.decommissioningBlocks = new LocatedBlock[] { b };
bc.checkAndReconstructBlocks();
long start = System.currentTimeMillis();
while ((br.jobsRunning() > 0)
&& ((System.currentTimeMillis() - start) < 30000)) {
LOG.info("Waiting on block regen jobs to complete ("
+ br.jobsRunning() + " running).");
Thread.sleep(1000);
bc.checkJobs();
}
}
// Verify that each block now has an extra replica.
printFileLocations(file);
fileBlocks =
dfs.getNameNode().getBlockLocations(filePath.toUri().toString(),
0L,
file.getLen()).getLocatedBlocks();
for (LocatedBlock b : fileBlocks) {
assertEquals("block was improperly replicated",
repl+1, b.getLocations().length);
}
bc.updateStatus();
assertEquals("unexpected copy failures occurred",
0, br.getNumFileCopyFailures());
assertEquals("unexpected number of file copy operations",
numBlocks, br.getNumFilesCopied());
// No corrupt block fixing should have happened
assertEquals("corrupt block fixer unexpectedly performed fixing",
0, br.getNumFilesFixed());
assertEquals("corrupt block fixer unexpectedly attempted fixing",
0, br.getNumFileFixFailures());
// Verify file contents
validateFileCopy(fileSys, filePath, file.getLen(), crcs, true);
// Done
teardown();
}
public static LocatedBlock[] decommissioningBlocks;
public static String[] decommissioningFiles;
// Assumes current repl is 2
static void validateFileCopy(FileSystem fs, Path path, long size,
long[] blockCrcs, boolean twiceThrough)
throws IOException {
final int timesThrough = (twiceThrough ? 2 : 1);
final int numBlocks = (int) Math.ceil((double)size / BLOCK_SIZE);
// Check all the blocks timesThrough times
FSDataInputStream in = fs.open(path);
CRC32 crc = new CRC32();
for (int i = 0; i < timesThrough; i++) {
for (int b = 0; b < numBlocks; b++) {
int chunkSize = (int) Math.min(BLOCK_SIZE, (size-(b*BLOCK_SIZE)));
byte[] buf = new byte[chunkSize];
in.read(buf);
crc.reset();
crc.update(buf);
assertEquals(("Block crc " + b + " did not match on iteration " + i),
blockCrcs[b], crc.getValue());
}
assert in.getPos() == size : "Did not read to end of file";
if (i < (timesThrough - 1)) {
in.seekToNewSource(0);
}
}
}
private void printFileLocations(FileStatus file)
throws IOException {
System.out.println(file.getPath() + " block locations:");
BlockLocation[] locations = fileSys.getFileBlockLocations(file, 0,
file.getLen());
for (int idx = 0; idx < locations.length; idx++) {
String[] loc = locations[idx].getNames();
System.out.print("Block[" + idx + "] : ");
for (int j = 0; j < loc.length; j++) {
System.out.print(loc[j] + " ");
}
System.out.println();
}
}
static class DistBlockRegeneratorFake extends DistBlockIntegrityMonitor {
public DistBlockRegeneratorFake(Configuration conf) {
super(conf);
}
@Override
public void configureJob(Job job,
Class<? extends BlockReconstructor> rClass) {
super.configureJob(job, rClass);
LocatedBlock[] lb = TestBlockCopier.decommissioningBlocks;
String[] hashes = new String[lb.length];
for (int i = 0; i < lb.length; i++) {
hashes[i] = Integer.toString(lb[i].getBlock().hashCode());
}
((JobConf)job.getConfiguration()).setClass(ReconstructionMapper.RECONSTRUCTOR_CLASS_TAG,
ReconstructorFakeData.class,
BlockReconstructor.class);
((JobConf)job.getConfiguration()).setStrings("hdfs.testblockcopier.blockhashes", hashes);
}
@Override
protected Map<String, Integer> getLostFiles(
Pattern pattern, String[] dfsckArgs) throws IOException {
Map<String, Integer> map = new HashMap<String, Integer>();
// Disable CorruptionMonitor
if (pattern.equals(DistBlockIntegrityMonitor.LIST_CORRUPT_FILE_PATTERN)) {
return map;
}
for (String file : TestBlockCopier.decommissioningFiles) {
map.put(file, 1);
}
return map;
}
}
static class ReconstructorFakeData extends BlockReconstructor.DecommissioningBlockReconstructor {
private final Map<Integer, Boolean> decomBlockHashes;
public ReconstructorFakeData(Configuration conf) throws IOException {
super(conf);
// Deserialize block hashes
String[] hashes =
conf.getStrings("hdfs.testblockcopier.blockhashes", new String[0]);
decomBlockHashes = new HashMap<Integer, Boolean>();
for (String hash : hashes) {
decomBlockHashes.put(Integer.parseInt(hash), true);
}
}
@Override
List<LocatedBlockWithMetaInfo> lostBlocksInFile(DistributedFileSystem fs,
String uriPath,
FileStatus stat)
throws IOException {
List<LocatedBlockWithMetaInfo> blocks =
new ArrayList<LocatedBlockWithMetaInfo>();
VersionedLocatedBlocks locatedBlocks;
int namespaceId = 0;
int methodsFingerprint = 0;
if (DFSClient.isMetaInfoSuppoted(fs.getClient().namenodeProtocolProxy)) {
LocatedBlocksWithMetaInfo lbksm = fs.getClient().namenode.
openAndFetchMetaInfo(uriPath, 0, stat.getLen());
namespaceId = lbksm.getNamespaceID();
locatedBlocks = lbksm;
methodsFingerprint = lbksm.getMethodFingerPrint();
} else {
locatedBlocks = fs.getClient().namenode.open(uriPath, 0, stat.getLen());
}
final int dataTransferVersion = locatedBlocks.getDataProtocolVersion();
List<LocatedBlock> lb = locatedBlocks.getLocatedBlocks();
for (LocatedBlock b : lb) {
if (decomBlockHashes.get(b.getBlock().hashCode()) != null) {
blocks.add(new LocatedBlockWithMetaInfo(b.getBlock(),
b.getLocations(), b.getStartOffset(),
dataTransferVersion, namespaceId, methodsFingerprint));
}
}
return blocks;
}
}
}