/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
import static junit.framework.Assert.fail;
import java.io.IOException;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobTracker.SafeModeAction;
import org.apache.hadoop.mapred.tools.MRAdmin;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
/**
* A test for JobTracker safemode. In safemode, no tasks are scheduled, and
* no tasks are marked as failed (they are killed instead).
*/
public class TestJobTrackerQuiescence {
final Path testDir =
new Path(System.getProperty("test.build.data", "/tmp"), "jt-safemode");
final Path inDir = new Path(testDir, "input");
final Path shareDir = new Path(testDir, "share");
final Path outputDir = new Path(testDir, "output");
final int maxMapTasks = 1;
private MiniDFSCluster dfs;
private MiniMRCluster mr;
private FileSystem fileSys;
private JobTracker jt;
private static final Log LOG =
LogFactory.getLog(TestJobTrackerQuiescence.class);
@Before
public void setUp() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("dfs.replication.considerLoad", false);
dfs = new MiniDFSCluster(conf, 1, true, null, null);
dfs.waitActive();
fileSys = dfs.getFileSystem();
// clean up
fileSys.delete(testDir, true);
if (!fileSys.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// Write the input file
UtilsForTests.writeFile(dfs.getNameNode(), conf,
new Path(inDir + "/file"), (short)1);
dfs.startDataNodes(conf, 1, true, null, null, null, null);
dfs.waitActive();
String namenode = (dfs.getFileSystem()).getUri().getHost() + ":"
+ (dfs.getFileSystem()).getUri().getPort();
JobConf jtConf = new JobConf();
jtConf.setInt("mapred.tasktracker.map.tasks.maximum", maxMapTasks);
jtConf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
jtConf.setBoolean(JobTracker.JT_HDFS_MONITOR_ENABLE, true);
jtConf.setInt(JobTracker.JT_HDFS_MONITOR_THREAD_INTERVAL, 1000);
mr = new MiniMRCluster(1, namenode, 1, null, null, jtConf);
mr.waitUntilIdle();
mr.setInlineCleanupThreads();
jt = mr.getJobTrackerRunner().getJobTracker();
}
@After
public void tearDown() {
if (mr != null) {
try {
mr.shutdown();
} catch (Exception e) {}
}
if (dfs != null) {
try {
dfs.shutdown();
} catch (Exception e) {}
}
}
@Test
public void testHDFSMonitor() throws Exception {
/*
* Try 'automatic' safe-mode
*/
// Put HDFS in safe-mode
dfs.getNameNode().setSafeMode(
org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_ENTER);
int numTries = 20;
while (!jt.isInSafeMode() && numTries > 0) {
Thread.sleep(1000);
--numTries;
}
// By now JT should be in safe-mode
assertEquals(true, jt.isInSafeMode());
// Remove HDFS from safe-mode
dfs.getNameNode().setSafeMode(
org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_LEAVE);
numTries = 20;
while (jt.isInSafeMode() && numTries > 0) {
Thread.sleep(1000);
--numTries;
}
// By now JT should not be in safe-mode
assertEquals(false, jt.isInSafeMode());
/*
* Now ensure 'automatic' mode doesn't interfere with 'admin set' safe-mode
*/
dfs.getNameNode().setSafeMode(
org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_ENTER);
numTries = 20;
while (!jt.isInSafeMode() && numTries > 0) {
Thread.sleep(1000);
--numTries;
}
// By now JT should be in safe-mode
assertEquals(true, jt.isInSafeMode());
// Now, put JT in admin set safe-mode
enterSafeMode();
// Bring HDFS back from safe-mode
dfs.getNameNode().setSafeMode(
org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_LEAVE);
numTries = 20;
while (jt.isInSafeMode() && numTries > 0) {
Thread.sleep(1000);
--numTries;
}
// But now JT should *still* be in safe-mode
assertEquals(true, jt.isInSafeMode());
assertEquals(true, jt.isInAdminSafeMode());
// Leave JT safe-mode
leaveSafeMode();
assertEquals(false, jt.isInAdminSafeMode());
// Bounce HDFS back in-out
dfs.getNameNode().setSafeMode(
org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_ENTER);
Thread.sleep(5000);
dfs.getNameNode().setSafeMode(
org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_LEAVE);
numTries = 20;
while (jt.isInSafeMode() && numTries > 0) {
Thread.sleep(1000);
--numTries;
}
// By now JT should not be in safe-mode
assertEquals(false, jt.isInSafeMode());
}
@Test
public void testMRAdminSafeModeWait() throws Exception {
enterSafeMode();
ExecutorService executor = Executors.newSingleThreadExecutor();
Future<Void> future = executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
MRAdmin mrAdmin = new MRAdmin(mr.createJobConf());
mrAdmin.run(new String[] { "-safemode", "wait" });
return null;
}
});
try {
future.get(1, TimeUnit.SECONDS);
fail("JT should still be in safemode");
} catch (TimeoutException e) {
// expected
}
leaveSafeMode();
try {
future.get(10, TimeUnit.SECONDS);
} catch (TimeoutException e) {
fail("JT should no longer be in safemode");
}
}
@Test
public void testJobsPauseInSafeMode() throws Exception {
FileSystem fileSys = dfs.getFileSystem();
JobConf jobConf = mr.createJobConf();
int numMaps = 10;
int numReds = 1;
String mapSignalFile = UtilsForTests.getMapSignalFile(shareDir);
String redSignalFile = UtilsForTests.getReduceSignalFile(shareDir);
jobConf.set("user.name", UserGroupInformation.getCurrentUser().getUserName());
// Configure the job
JobConf job = configureJob(jobConf, numMaps, numReds,
mapSignalFile, redSignalFile);
fileSys.delete(shareDir, true);
// Submit the job
JobClient jobClient = new JobClient(job);
RunningJob rJob = jobClient.submitJob(job);
JobID id = rJob.getID();
// wait for the job to be inited
mr.initializeJob(id);
// Make sure that the master job is 50% completed
while (UtilsForTests.getJobStatus(jobClient, id).mapProgress() < 0.5f) {
UtilsForTests.waitFor(10);
}
assertEquals(numMaps / 2, getCompletedMapCount(rJob));
enterSafeMode();
// Signal all the maps to complete
UtilsForTests.signalTasks(dfs, fileSys, true, mapSignalFile, redSignalFile);
// Signal the reducers to complete
UtilsForTests.signalTasks(dfs, fileSys, false, mapSignalFile,
redSignalFile);
// only assigned maps complete in safemode since no more maps may be
// assigned
Thread.sleep(10000);
assertEquals(numMaps / 2 + maxMapTasks, getCompletedMapCount(rJob));
leaveSafeMode();
// job completes after leaving safemode
UtilsForTests.waitTillDone(jobClient);
assertTrue(rJob.isSuccessful());
}
private int getCompletedMapCount(RunningJob rJob) throws IOException {
TaskCompletionEvent[] taskCompletionEvents = rJob.getTaskCompletionEvents(0);
int mapCount = 0;
for (TaskCompletionEvent tce : taskCompletionEvents) {
if (tce.isMap) {
mapCount++;
}
}
return mapCount;
}
private JobConf configureJob(JobConf conf, int maps, int reduces,
String mapSignal, String redSignal) throws IOException {
UtilsForTests.configureWaitingJobConf(conf, inDir, outputDir, maps,
reduces, "test-jt-safemode", mapSignal, redSignal);
return conf;
}
private void enterSafeMode() throws IOException {
jt.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
}
private void leaveSafeMode() throws IOException {
jt.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
}
}