/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.TaskInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.UtilsForTests;
import org.apache.hadoop.mapred.JobClient.NetworkedJob;
import org.apache.hadoop.examples.SleepJob;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.Path;
import testjar.GenerateTaskChildProcess;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.AfterClass;
import org.junit.Test;
/**
* Verify the Task Tracker Info functionality.
*/
public class TestTaskTrackerInfoSuccessfulFailedJobs {
private static MRCluster cluster = null;
private static JobClient client = null;
static final Log LOG = LogFactory.
getLog(TestTaskTrackerInfoSuccessfulFailedJobs.class);
private static Configuration conf = null;
private static JTProtocol remoteJTClient = null;
StatisticsCollectionHandler statisticsCollectionHandler = null;
int taskTrackerHeartBeatInterval = 0;
public TestTaskTrackerInfoSuccessfulFailedJobs() throws Exception {
}
@BeforeClass
public static void setUp() throws Exception {
cluster = MRCluster.createCluster(new Configuration());
cluster.setUp();
conf = new Configuration(cluster.getConf());
conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
remoteJTClient = cluster.getJTClient().getProxy();
}
@AfterClass
public static void tearDown() throws Exception {
cluster.tearDown();
}
@Test
/**
* This tests Task tracker summary information for
* since start - total tasks, successful tasks
* Last Hour - total tasks, successful tasks
* Last Day - total tasks, successful tasks
* It is checked for multiple job submissions.
* @param none
* @return void
*/
public void testTaskTrackerInfoAll() throws Exception {
//This boolean will decide whether to run job again
boolean continueLoop = true;
//counter for job Loop
int countLoop = 0;
String jobTrackerUserName = remoteJTClient.getDaemonUser();
LOG.info("jobTrackerUserName is :" + jobTrackerUserName);
//This counter will check for count of a loop,
//which might become infinite.
int count = 0;
SleepJob job = new SleepJob();
job.setConf(conf);
int totalMapTasks = 5;
int totalReduceTasks = 1;
conf = job.setupJobConf(totalMapTasks, totalReduceTasks,
100, 100, 100, 100);
JobConf jconf = new JobConf(conf);
count = 0;
//The last hour and last day are given 60 seconds and 120 seconds
//recreate values rate, replacing one hour and 1 day. Waiting for
//them to be ona just created stage when testacse starts.
while (remoteJTClient.getInfoFromAllClients("last_day","total_tasks")
!= 0) {
count++;
UtilsForTests.waitFor(1000);
//If the count goes beyond a point, then break; This is to avoid
//infinite loop under unforeseen circumstances. Testcase will
//anyway fail later.
if (count > 180) {
Assert.fail("Since this value has not reached 0" +
"in more than 180 seconds. Failing at this point");
}
}
statisticsCollectionHandler = null;
statisticsCollectionHandler = remoteJTClient.
getInfoFromAllClientsForAllTaskType();
int totalTasksSinceStartBeforeJob = statisticsCollectionHandler.
getSinceStartTotalTasks();
int succeededTasksSinceStartBeforeJob = statisticsCollectionHandler.
getSinceStartSucceededTasks();
int totalTasksLastHourBeforeJob = statisticsCollectionHandler.
getLastHourTotalTasks();
int succeededTasksLastHourBeforeJob = statisticsCollectionHandler.
getLastHourSucceededTasks();
int totalTasksLastDayBeforeJob = statisticsCollectionHandler.
getLastDayTotalTasks();
int succeededTasksLastDayBeforeJob = statisticsCollectionHandler.
getLastDaySucceededTasks();
//Submitting the job
RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf);
JobInfo jInfo = remoteJTClient.getJobInfo(rJob.getID());
LOG.info("jInfo is :" + jInfo);
//Assert if jobInfo is null
Assert.assertNotNull("jobInfo is null", jInfo);
count = 0;
LOG.info("Waiting till the job is completed...");
while (jInfo != null && !jInfo.getStatus().isJobComplete()) {
UtilsForTests.waitFor(1000);
count++;
jInfo = remoteJTClient.getJobInfo(rJob.getID());
//If the count goes beyond a point, then break; This is to avoid
//infinite loop under unforeseen circumstances. Testcase will
//anyway fail later.
if (count > 40) {
Assert.fail("job has not reached completed state for more" +
" than 400 seconds. Failing at this point");
}
}
//Waiting for 20 seconds to make sure that all the completed tasks
//are reflected in their corresponding Tasktracker boxes.
taskTrackerHeartBeatInterval = remoteJTClient.
getTaskTrackerHeartbeatInterval();
//Waiting for 6 times the Task tracker heart beat interval to
//account for network slowness, job tracker processing time
//after receiving the tasktracker updates etc.
UtilsForTests.waitFor(taskTrackerHeartBeatInterval * 6);
statisticsCollectionHandler = null;
statisticsCollectionHandler =
remoteJTClient.getInfoFromAllClientsForAllTaskType();
int totalTasksSinceStartAfterJob = statisticsCollectionHandler.
getSinceStartTotalTasks();
int succeededTasksSinceStartAfterJob = statisticsCollectionHandler.
getSinceStartSucceededTasks();
int totalTasksLastHourAfterJob = statisticsCollectionHandler.
getLastHourTotalTasks();
int succeededTasksLastHourAfterJob = statisticsCollectionHandler.
getLastHourSucceededTasks();
int totalTasksLastDayAfterJob = statisticsCollectionHandler.
getLastDayTotalTasks();
int succeededTasksLastDayAfterJob = statisticsCollectionHandler.
getLastDaySucceededTasks();
int totalTasksForJob = (totalMapTasks + totalReduceTasks + 2);
Assert.assertEquals("The number of total tasks, since start" +
" dont match",
(totalTasksSinceStartBeforeJob + totalTasksForJob),
totalTasksSinceStartAfterJob);
Assert.assertEquals("The number of succeeded tasks, " +
"since start dont match",
(succeededTasksSinceStartBeforeJob + totalTasksForJob),
succeededTasksSinceStartAfterJob);
Assert.assertEquals("The number of total tasks, last hour" +
" dont match",
(totalTasksLastHourBeforeJob + totalTasksForJob),
totalTasksLastHourAfterJob);
Assert.assertEquals("The number of succeeded tasks, " +
"last hour dont match",
(succeededTasksLastHourBeforeJob + totalTasksForJob),
succeededTasksLastHourAfterJob);
Assert.assertEquals("The number of total tasks, last day" +
" dont match",
(totalTasksLastDayBeforeJob + totalTasksForJob),
totalTasksLastDayAfterJob);
Assert.assertEquals("The number of succeeded tasks, " +
"since start dont match",
(succeededTasksLastDayBeforeJob + totalTasksForJob),
succeededTasksLastDayAfterJob);
}
@Test
/**
* This tests Task tracker task killed
* summary information for
* since start - total tasks, successful tasks
* Last Hour - total tasks, successful tasks
* Last Day - total tasks, successful tasks
* It is checked for multiple job submissions.
* @param none
* @return void
*/
public void testTaskTrackerInfoKilled() throws Exception {
//This boolean will decide whether to run job again
boolean continueLoop = true;
//counter for job Loop
int countLoop = 0;
TaskInfo taskInfo = null;
String jobTrackerUserName = remoteJTClient.getDaemonUser();
LOG.info("jobTrackerUserName is :" + jobTrackerUserName);
//This counter will check for count of a loop,
//which might become infinite.
int count = 0;
SleepJob job = new SleepJob();
job.setConf(conf);
int totalMapTasks = 5;
int totalReduceTasks = 1;
conf = job.setupJobConf(totalMapTasks, totalReduceTasks,
100, 100, 100, 100);
JobConf jconf = new JobConf(conf);
count = 0;
//The last hour and last day are given 60 seconds and 120 seconds
//recreate values rate, replacing one hour and 1 day. Waiting for
//them to be ona just created stage when testacse starts.
while (remoteJTClient.getInfoFromAllClients("last_day","total_tasks")
!= 0) {
count++;
UtilsForTests.waitFor(1000);
//If the count goes beyond a point, then break; This is to avoid
//infinite loop under unforeseen circumstances. Testcase will
//anyway fail later.
if (count > 140) {
Assert.fail("Since this value has not reached 0" +
"in more than 140 seconds. Failing at this point");
}
}
statisticsCollectionHandler = null;
statisticsCollectionHandler = remoteJTClient.
getInfoFromAllClientsForAllTaskType();
int totalTasksSinceStartBeforeJob = statisticsCollectionHandler.
getSinceStartTotalTasks();
int succeededTasksSinceStartBeforeJob = statisticsCollectionHandler.
getSinceStartSucceededTasks();
int totalTasksLastHourBeforeJob = statisticsCollectionHandler.
getLastHourTotalTasks();
int succeededTasksLastHourBeforeJob = statisticsCollectionHandler.
getLastHourSucceededTasks();
int totalTasksLastDayBeforeJob = statisticsCollectionHandler.
getLastDayTotalTasks();
int succeededTasksLastDayBeforeJob = statisticsCollectionHandler.
getLastDaySucceededTasks();
//Submitting the job
RunningJob rJob = cluster.getJTClient().getClient().
submitJob(jconf);
JobInfo jInfo = remoteJTClient.getJobInfo(rJob.getID());
LOG.info("jInfo is :" + jInfo);
count = 0;
while (count < 60) {
if (jInfo.getStatus().getRunState() == JobStatus.RUNNING) {
break;
} else {
UtilsForTests.waitFor(1000);
jInfo = remoteJTClient.getJobInfo(rJob.getID());
}
count++;
}
Assert.assertTrue("Job has not been started for 1 min.",
count != 60);
//Assert if jobInfo is null
Assert.assertNotNull("jobInfo is null", jInfo);
TaskInfo[] taskInfos = remoteJTClient.getTaskInfo(rJob.getID());
for (TaskInfo taskinfo : taskInfos) {
if (!taskinfo.isSetupOrCleanup()) {
taskInfo = taskinfo;
}
}
count = 0;
taskInfo = remoteJTClient.getTaskInfo(taskInfo.getTaskID());
while (count < 60) {
if (taskInfo.getTaskStatus().length > 0) {
if (taskInfo.getTaskStatus()[0].getRunState()
== TaskStatus.State.RUNNING) {
break;
}
}
UtilsForTests.waitFor(1000);
taskInfo = remoteJTClient.getTaskInfo(taskInfo.getTaskID());
count++;
}
Assert.assertTrue("Task has not been started for 1 min.",
count != 60);
NetworkedJob networkJob = (cluster.getJTClient().getClient()).new
NetworkedJob(jInfo.getStatus());
TaskID tID = TaskID.downgrade(taskInfo.getTaskID());
TaskAttemptID taskAttID = new TaskAttemptID(tID , 0);
networkJob.killTask(taskAttID, false);
count = 0;
LOG.info("Waiting till the job is completed...");
while (!jInfo.getStatus().isJobComplete()) {
UtilsForTests.waitFor(1000);
count++;
jInfo = remoteJTClient.getJobInfo(rJob.getID());
//If the count goes beyond a point, then break; This is to avoid
//infinite loop under unforeseen circumstances. Testcase will
//anyway fail later.
if (count > 40) {
Assert.fail("job has not reached completed state for more" +
" than 400 seconds. Failing at this point");
}
}
//Waiting for 20 seconds to make sure that all the completed tasks
//are reflected in their corresponding Tasktracker boxes.
taskTrackerHeartBeatInterval = remoteJTClient.
getTaskTrackerHeartbeatInterval();
//Waiting for 6 times the Task tracker heart beat interval to
//account for network slowness, job tracker processing time
//after receiving the tasktracker updates etc.
UtilsForTests.waitFor(taskTrackerHeartBeatInterval * 6);
statisticsCollectionHandler = null;
statisticsCollectionHandler =
remoteJTClient.getInfoFromAllClientsForAllTaskType();
int totalTasksSinceStartAfterJob = statisticsCollectionHandler.
getSinceStartTotalTasks();
int succeededTasksSinceStartAfterJob = statisticsCollectionHandler.
getSinceStartSucceededTasks();
int totalTasksLastHourAfterJob = statisticsCollectionHandler.
getLastHourTotalTasks();
int succeededTasksLastHourAfterJob = statisticsCollectionHandler.
getLastHourSucceededTasks();
int totalTasksLastDayAfterJob = statisticsCollectionHandler.
getLastDayTotalTasks();
int succeededTasksLastDayAfterJob = statisticsCollectionHandler.
getLastDaySucceededTasks();
//Total tasks expected is setup, Cleanup + totalMapTasks
//+ totalReduceTasks
int totalTasksForJob = (totalMapTasks + totalReduceTasks + 2);
//The total tasks will be equal to the totalTasksSinceStartBeforeJob
// + totalTasksFor present Job + 1 more task which was killed.
//This kiled task will be re-attempted by the job tracker and would have
//rerun in another tasktracker and would have completed successfully,
//which is captured in totalTasksForJob
Assert.assertEquals("The number of total tasks, since start" +
" dont match",
(totalTasksSinceStartBeforeJob + totalTasksForJob + 1),
totalTasksSinceStartAfterJob );
Assert.assertEquals("The number of succeeded tasks, " +
"since start dont match",
(succeededTasksSinceStartBeforeJob + totalTasksForJob),
succeededTasksSinceStartAfterJob);
Assert.assertEquals("The number of total tasks, last hour" +
" dont match",
(totalTasksLastHourBeforeJob + totalTasksForJob + 1),
totalTasksLastHourAfterJob);
Assert.assertEquals("The number of succeeded tasks, " +
"last hour dont match",
(succeededTasksLastHourBeforeJob + totalTasksForJob),
succeededTasksLastHourAfterJob);
Assert.assertEquals("The number of total tasks, last day" +
" dont match",
(totalTasksLastDayBeforeJob + totalTasksForJob + 1),
totalTasksLastDayAfterJob);
Assert.assertEquals("The number of succeeded tasks, " +
"since start dont match",
(succeededTasksLastDayBeforeJob + totalTasksForJob),
succeededTasksLastDayAfterJob);
}
@Test
/**
* This tests Task tracker task failure
* summary information for
* since start - total tasks, successful tasks
* Last Hour - total tasks, successful tasks
* Last Day - total tasks, successful tasks
* @param none
* @return void
*/
public void testTaskTrackerInfoTaskFailure() throws Exception {
//This boolean will decide whether to run job again
boolean continueLoop = true;
//counter for job Loop
int countLoop = 0;
TaskInfo taskInfo = null;
String jobTrackerUserName = remoteJTClient.getDaemonUser();
LOG.info("jobTrackerUserName is :" + jobTrackerUserName);
//This counter will check for count of a loop,
//which might become infinite.
int count = 0;
Configuration conf = new Configuration(cluster.getConf());
conf.setBoolean("mapreduce.map.output.compress", false);
conf.set("mapred.map.output.compression.codec",
"org.apache.hadoop.io.compress.DefaultCodec");
JobConf jconf = new JobConf(conf);
Path inputDir = new Path("input");
Path outputDir = new Path("output");
cleanup(inputDir, conf);
cleanup(outputDir, conf);
createInput(inputDir, conf);
jconf.setJobName("Task Failed job");
jconf.setJarByClass(UtilsForTests.class);
jconf.setMapperClass(GenerateTaskChildProcess.FailedMapper.class);
jconf.setNumMapTasks(1);
jconf.setNumReduceTasks(0);
jconf.setMaxMapAttempts(1);
FileInputFormat.setInputPaths(jconf, inputDir);
FileOutputFormat.setOutputPath(jconf, outputDir);
count = 0;
//The last hour and last day are given 60 seconds and 120 seconds
//recreate values rate, replacing one hour and 1 day. Waiting for
//them to be ona just created stage when testacse starts.
while (remoteJTClient.getInfoFromAllClients("last_day","total_tasks")
!= 0) {
count++;
UtilsForTests.waitFor(1000);
//If the count goes beyond a point, then break; This is to avoid
//infinite loop under unforeseen circumstances. Testcase will
//anyway fail later.
if (count > 140) {
Assert.fail("Since this value has not reached 0" +
"in more than 140 seconds. Failing at this point");
}
}
statisticsCollectionHandler = null;
statisticsCollectionHandler = remoteJTClient.
getInfoFromAllClientsForAllTaskType();
int totalTasksSinceStartBeforeJob = statisticsCollectionHandler.
getSinceStartTotalTasks();
int succeededTasksSinceStartBeforeJob = statisticsCollectionHandler.
getSinceStartSucceededTasks();
int totalTasksLastHourBeforeJob = statisticsCollectionHandler.
getLastHourTotalTasks();
int succeededTasksLastHourBeforeJob = statisticsCollectionHandler.
getLastHourSucceededTasks();
int totalTasksLastDayBeforeJob = statisticsCollectionHandler.
getLastDayTotalTasks();
int succeededTasksLastDayBeforeJob = statisticsCollectionHandler.
getLastDaySucceededTasks();
RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf);
JobID id = rJob.getID();
JobInfo jInfo = remoteJTClient.getJobInfo(id);
LOG.info("jInfo is :" + jInfo);
count = 0;
while (count < 60) {
if (jInfo.getStatus().getRunState() == JobStatus.RUNNING) {
break;
} else {
UtilsForTests.waitFor(1000);
jInfo = remoteJTClient.getJobInfo(rJob.getID());
}
count++;
}
Assert.assertTrue("Job has not been started for 1 min.",
count != 60);
//Assert if jobInfo is null
Assert.assertNotNull("jobInfo is null", jInfo);
count = 0;
LOG.info("Waiting till the job is completed...");
while ( jInfo != null && (!jInfo.getStatus().isJobComplete())) {
UtilsForTests.waitFor(1000);
count++;
jInfo = remoteJTClient.getJobInfo(id);
//If the count goes beyond a point, then break; This is to avoid
//infinite loop under unforeseen circumstances. Testcase will
//anyway fail later.
if (count > 40) {
Assert.fail("job has not reached completed state for more" +
" than 400 seconds. Failing at this point");
}
}
//Waiting for 20 seconds to make sure that all the completed tasks
//are reflected in their corresponding Tasktracker boxes.
taskTrackerHeartBeatInterval = remoteJTClient.
getTaskTrackerHeartbeatInterval();
//Waiting for 6 times the Task tracker heart beat interval to
//account for network slowness, job tracker processing time
//after receiving the tasktracker updates etc.
UtilsForTests.waitFor(taskTrackerHeartBeatInterval * 6);
statisticsCollectionHandler = null;
statisticsCollectionHandler =
remoteJTClient.getInfoFromAllClientsForAllTaskType();
int totalTasksSinceStartAfterJob = statisticsCollectionHandler.
getSinceStartTotalTasks();
int succeededTasksSinceStartAfterJob = statisticsCollectionHandler.
getSinceStartSucceededTasks();
int totalTasksLastHourAfterJob = statisticsCollectionHandler.
getLastHourTotalTasks();
int succeededTasksLastHourAfterJob = statisticsCollectionHandler.
getLastHourSucceededTasks();
int totalTasksLastDayAfterJob = statisticsCollectionHandler.
getLastDayTotalTasks();
int succeededTasksLastDayAfterJob = statisticsCollectionHandler.
getLastDaySucceededTasks();
//1 map running 4 times before failure, plus sometimes two failures
//which are not captured in Job summary, but caught in
//tasktracker summary.
//0 reduces, setup and cleanup
int totalTasksForJob = 4;
Assert.assertTrue("The number of total tasks, since start" +
" dont match", (totalTasksSinceStartAfterJob >=
totalTasksSinceStartBeforeJob + totalTasksForJob));
Assert.assertTrue("The number of succeeded tasks, " +
"since start dont match",
(succeededTasksSinceStartAfterJob >=
succeededTasksSinceStartBeforeJob));
Assert.assertTrue("The number of total tasks, last hour" +
" dont match", (totalTasksLastHourAfterJob >=
totalTasksLastHourBeforeJob + totalTasksForJob));
Assert.assertTrue("The number of succeeded tasks, " +
"last hour dont match", (succeededTasksLastHourAfterJob >=
succeededTasksLastHourBeforeJob));
Assert.assertTrue("The number of total tasks, last day" +
" dont match", totalTasksLastDayAfterJob >=
totalTasksLastDayBeforeJob + totalTasksForJob);
Assert.assertTrue("The number of succeeded tasks, " +
"since start dont match", succeededTasksLastDayAfterJob >=
succeededTasksLastDayBeforeJob);
}
//This creates the input directories in the dfs
private void createInput(Path inDir, Configuration conf) throws
IOException {
String input = "Hadoop is framework for data intensive distributed "
+ "applications.\n"
+ "Hadoop enables applications to work with thousands of nodes.";
FileSystem fs = inDir.getFileSystem(conf);
if (!fs.mkdirs(inDir)) {
throw new IOException("Failed to create the input directory:"
+ inDir.toString());
}
fs.setPermission(inDir, new FsPermission(FsAction.ALL,
FsAction.ALL, FsAction.ALL));
DataOutputStream file = fs.create(new Path(inDir, "data.txt"));
int i = 0;
while(i < 1000 * 3000) {
file.writeBytes(input);
i++;
}
file.close();
}
//This cleans up the specified directories in the dfs
private void cleanup(Path dir, Configuration conf) throws
IOException {
FileSystem fs = dir.getFileSystem(conf);
fs.delete(dir, true);
}
}