/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Random;
import java.util.TreeMap;
import javax.security.auth.login.LoginException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.raid.RaidHistogram.BlockFixStatus;
import org.apache.hadoop.raid.protocol.RaidProtocol;
import org.apache.hadoop.security.UnixUserGroupInformation;
import junit.framework.TestCase;
public class TestRaidHistogram extends TestCase {
public TestRaidHistogram(String name) {
super(name);
// TODO Auto-generated constructor stub
}
final static Log LOG = LogFactory.getLog(TestRaidHistogram.class);
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"build/contrib/raid/test/data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
Configuration conf;
String namenode = null;
MiniDFSCluster dfsCluster = null;
String hftp = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
RaidNode cnode = null;
String jobTrackerName = null;
Random rand;
int nPercents = 10;
String monitorDirStr = "/a";
String[] monitorDirs = monitorDirStr.split(",");
public volatile boolean running = true;
private void mySetup() throws Exception {
if (System.getProperty("hadoop.log.dir") == null) {
String base = new File(".").getAbsolutePath();
System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs");
}
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
long seed = (new Random()).nextLong();
LOG.info("Random seed is " + seed);
rand = new Random(seed);
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort());
conf.set("mapred.raid.http.address", "localhost:0");
// Make sure initial repl is smaller than NUM_DATANODES
conf.setInt(RaidNode.RAID_PARITY_INITIAL_REPL_KEY, 1);
Utils.loadTestCodecs(conf, 3, 1, 3, "/destraid", "/destraidrs");
conf.setBoolean("dfs.permissions", false);
conf.set(BlockIntegrityMonitor.RAIDNODE_CORRUPT_FILE_COUNTER_DIRECTORIES_KEY,
monitorDirStr);
// initialize as 1hour and 2hours windows
conf.set(BlockIntegrityMonitor.MONITOR_SECONDS_KEY, "3600,120");
dfsCluster = new MiniDFSCluster(conf, 1, true, null);
dfsCluster.waitActive();
fileSys = dfsCluster.getFileSystem();
namenode = fileSys.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
mr = new MiniMRCluster(4, namenode, 3);
jobTrackerName = "localhost:" + mr.getJobTrackerPort();
hftp = "hftp://localhost.localdomain:" + dfsCluster.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
cb.addPolicy("RaidTest1", "/user/dhruba/raidtest",
1, 1);
cb.addPolicy("RaidTest2", "/user/dhruba/raidtestrs",
1, 1, "rs");
cb.persist();
}
private void myTearDown() throws Exception {
if (cnode != null) { cnode.stop(); cnode.join(); }
if (dfsCluster != null) {dfsCluster.shutdown();}
if (mr != null) {mr.shutdown(); }
}
public class FakeBlockFixerThread extends Thread {
public RaidProtocol rpcRaidnode;
public RaidProtocol raidnode;
public int start;
public int range;
public long round;
FakeBlockFixerThread(int newStart, int newRange, long newRound)
throws Exception {
UnixUserGroupInformation ugi;
try {
ugi = UnixUserGroupInformation.login(conf, true);
} catch (LoginException e) {
throw (IOException)(new IOException().initCause(e));
}
rpcRaidnode = RaidShell.createRPCRaidnode(RaidNode.getAddress(conf),
conf, ugi);
raidnode = RaidShell.createRaidnode(rpcRaidnode);
start = newStart;
range = newRange;
round = newRound;
}
public void run() {
try {
for (int i = 0 ;i < round; i++) {
int value = rand.nextInt(range);
// Make sure the minimum and maximum values are sent.
if (i == 0) {
value = start;
} else if (i == 1) {
value = start + range - 1;
} else {
value += start;
}
String path1;
String path2;
for (int j = 0 ;j <= monitorDirs.length; j++) {
if (j == monitorDirs.length) {
path1 = "/others/" + rand.nextInt();
path2 = "/others/failed" + start + "_" + i;
} else {
path1 = monitorDirs[j] + "/" + rand.nextInt();
path2 = monitorDirs[j] + "/" + start + "_" + i;
}
try {
raidnode.sendRecoveryTime(path1, value, null);
} catch (IOException ioe) {
LOG.error(ioe);
}
try {
raidnode.sendRecoveryTime(path2, Integer.MAX_VALUE, null);
} catch (IOException ioe) {
LOG.error(ioe);
}
}
}
} finally {
RPC.stopProxy(rpcRaidnode);
}
}
}
public void sendRecoveryTimes(int nPercents, int start,
int range, int rounds) throws Exception {
FakeBlockFixerThread[] threads = new FakeBlockFixerThread[nPercents];
for (int i = 0; i < nPercents; i++, start += range) {
threads[i] = new FakeBlockFixerThread(start, range, rounds);
threads[i].start();
}
for (int i = 0; i < nPercents; i++) {
threads[i].join();
}
}
public void printBlockFixStatus(TreeMap<Long, BlockFixStatus> status) {
for (Long window: status.keySet()) {
LOG.info("Window: " + window);
BlockFixStatus bfs = status.get(window);
LOG.info("failedPaths: " + bfs.failedPaths);
String values = "";
for (Long val: bfs.percentValues) {
values += "/" + val;
}
LOG.info("percentValues: " + values);
}
}
// send recovery time multiple times
public void testRepeatSendingRecoveryTime() throws Exception {
int rounds = 4;
int nPercents = 2;
int range = 1000000;
int dividedRange = range / 1000;
float step = 1.0f / nPercents;
long gapTime = 3000L;
ArrayList<Long> windows = new ArrayList<Long>();
windows.add(gapTime);
windows.add(3600000L);
int sendRound = 2;
try {
mySetup();
Configuration localConf = new Configuration(conf);
localConf.set(BlockIntegrityMonitor.MONITOR_SECONDS_KEY,
gapTime/1000 + ",3600");
cnode = RaidNode.createRaidNode(null, localConf);
ArrayList<Float> percents = new ArrayList<Float>();
for (int i = 0 ; i <= 2; i++) {
percents.add(step * i);
}
Collections.shuffle(percents);
for (int r = 0; r < rounds; r++) {
// submit some data
long sTime = System.currentTimeMillis();
sendRecoveryTimes(2, 0, range, sendRound);
LOG.info("Get blockFixStatus");
String monitorDir = monitorDirs[0];
TreeMap<Long, BlockFixStatus> status =
cnode.blockIntegrityMonitor.getBlockFixStatus(
monitorDir, nPercents, percents, sTime + gapTime - 1000);
printBlockFixStatus(status);
assertTrue(status.containsKey(windows.get(0)));
assertTrue(status.containsKey(windows.get(1)));
BlockFixStatus bfs = status.get(windows.get(0));
// Verify failed recovered files for the first window
assertEquals("The number of failed recovery files should match",
sendRound*nPercents, bfs.failedPaths);
// Verify percent values for the first window
assertEquals(nPercents + 1, bfs.percentValues.length);
assertEquals(0, bfs.percentValues[0]);
for (int j = 1; j <= nPercents; j++) {
assertEquals(dividedRange * j - 1, bfs.percentValues[j]);
}
bfs = status.get(windows.get(1));
// Verify failed recovered files for the second window
assertEquals("The number of failed recovery files should match",
sendRound*nPercents, bfs.failedPaths);
// Verify percent values for the second window
assertEquals(nPercents + 1, bfs.percentValues.length);
assertEquals(0, bfs.percentValues[0]);
for (int j = 1; j <= nPercents; j++) {
assertEquals(dividedRange * j - 1, bfs.percentValues[j]);
}
Thread.sleep(gapTime + 1000);
status = cnode.blockIntegrityMonitor.getBlockFixStatus(
monitorDir, nPercents, percents, System.currentTimeMillis());
printBlockFixStatus(status);
assertTrue(status.containsKey(windows.get(0)));
assertTrue(status.containsKey(windows.get(1)));
bfs = status.get(windows.get(0));
// Verify failed recovered files for the first window
assertEquals("The number of failed recovery files should be 0",
0, bfs.failedPaths);
// Verify percent values for the first window, they should all be -1
assertEquals(nPercents + 1, bfs.percentValues.length);
assertEquals(-1, bfs.percentValues[0]);
for (int j = 1; j <= nPercents; j++) {
assertEquals(-1, bfs.percentValues[j]);
}
}
} finally {
myTearDown();
}
}
/**
* Have three stages. Each stage spawns nPercents threads.
* Each thread iterate $rounds rounds and send random number for
* each monitor dir to raidnode including succeed files and failed files.
* Set two windows: The first window covers stage3 only.
* The second window covers stage2 and stage3 only.
* Calling getBlockFixStatus should be able to filter out all stage1 points
* The histogram counts for the second window should be double as the of
* the first window.
*/
public void testHistograms() throws Exception {
int rounds = 10000;
int range = 1000000;
int dividedRange = range / 1000;
float step = 1.0f / nPercents;
try {
mySetup();
cnode = RaidNode.createRaidNode(null, conf);
ArrayList<Float> percents = new ArrayList<Float>();
for (int i = 0 ; i <= nPercents; i++) {
percents.add(step * i);
}
Collections.shuffle(percents);
// submit some old data
sendRecoveryTimes(nPercents, range*(nPercents + 1), range, rounds);
Thread.sleep(100);
long ckpTime1 = System.currentTimeMillis();
sendRecoveryTimes(nPercents, 0, range, rounds);
Thread.sleep(100);
long ckpTime2 = System.currentTimeMillis();
sendRecoveryTimes(nPercents, 0, range, rounds);
long endTime = System.currentTimeMillis();
ArrayList<Long> newWindows = new ArrayList<Long>();
newWindows.add(endTime - ckpTime2);
newWindows.add(endTime - ckpTime1);
HashMap<String, RaidHistogram> recoveryTimes =
cnode.blockIntegrityMonitor.getRecoveryTimes();
for (RaidHistogram histogram: recoveryTimes.values()) {
histogram.setNewWindows(newWindows);
}
for (int i = 0; i <= monitorDirs.length; i++) {
String monitorDir;
if (i < monitorDirs.length) {
monitorDir = monitorDirs[i];
} else {
monitorDir = BlockIntegrityMonitor.OTHERS;
}
assertEquals("Stale entries are not filtered", rounds*nPercents*3*2,
cnode.blockIntegrityMonitor.getNumberOfPoints(monitorDir));
TreeMap<Long, BlockFixStatus> status =
cnode.blockIntegrityMonitor.getBlockFixStatus(
monitorDir, nPercents, percents, endTime);
assertTrue(status.containsKey(newWindows.get(0)));
assertTrue(status.containsKey(newWindows.get(1)));
BlockFixStatus bfs = status.get(newWindows.get(0));
assertEquals("Stale entries are not filtered", rounds*nPercents*2*2,
cnode.blockIntegrityMonitor.getNumberOfPoints(monitorDir));
// Verify failed recovered files for the first window
assertEquals("The number of failed recovery files should match",
rounds*nPercents, bfs.failedPaths);
// Verify histogram for the first window
assertEquals(nPercents, bfs.counters.length);
for (int j = 0; j < nPercents; j++) {
assertEquals(rounds, bfs.counters[j]);
}
// Verify percent values for the first window
assertEquals(nPercents + 1, bfs.percentValues.length);
assertEquals(0, bfs.percentValues[0]);
for (int j = 1; j <= nPercents; j++) {
assertEquals(dividedRange * j - 1, bfs.percentValues[j]);
}
bfs = status.get(newWindows.get(1));
// Verify failed recovered files for the second window
assertEquals("The number of failed recovery files should match",
rounds*nPercents, bfs.failedPaths);
// Verify histogram for the second window
assertEquals(nPercents, bfs.counters.length);
for (int j = 0; j < nPercents; j++) {
assertEquals(rounds*2, bfs.counters[j]);
}
// Verify percent values for the second window
assertEquals(nPercents + 1, bfs.percentValues.length);
assertEquals(0, bfs.percentValues[0]);
for (int j = 1; j <= nPercents; j++) {
assertEquals(dividedRange * j - 1, bfs.percentValues[j]);
}
}
} finally {
myTearDown();
}
}
}