/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.mapreduce.test.system.TTClient;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.TaskInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.UtilsForTests;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.examples.SleepJob;
import java.io.DataOutputStream;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.AfterClass;
import org.junit.Test;
/**
* Verify the retired job history Location.
*/
public class TestJobHistoryLocation {
private static MRCluster cluster = null;
private static FileSystem dfs = null;
private static JobClient jobClient = null;
private static String jobHistoryDonePathString = null;
private static int count = 0;
private static int fileCount = 0;
private static boolean jobFileFound = false;
private static int retiredJobInterval = 0;
private static Configuration conf = null;
static final Log LOG = LogFactory.
getLog(TestJobHistoryLocation.class);
public TestJobHistoryLocation() throws Exception {
}
@BeforeClass
public static void setUp() throws Exception {
cluster = MRCluster.createCluster(new Configuration());
String [] expExcludeList = {"java.net.ConnectException",
"java.io.IOException", "org.apache.hadoop.metrics2.MetricsException"};
cluster.setExcludeExpList(expExcludeList);
conf = new Configuration(cluster.getConf());
cluster.setUp();
jobClient = cluster.getJTClient().getClient();
dfs = jobClient.getFs();
}
@AfterClass
public static void tearDown() throws Exception {
cluster.tearDown();
}
/**
* This tests when successful / failed jobs are retired, the location
* of the retired jobs are as according to
* mapred.job.tracker.history.completed.location.
* This tests when there are 100 files in the done directory,
* still the retired jobs are as according to
* mapred.job.tracker.history.completed.location.
* @param none
* @return void
*/
@Test
public void testRetiredJobsHistoryLocation() throws Exception {
JTProtocol remoteJTClient = cluster.getJTClient().getProxy();
int testIterationLoop = 0;
do {
SleepJob job = null;
testIterationLoop++;
job = new SleepJob();
job.setConf(conf);
conf = job.setupJobConf(5, 1, 100, 100, 100, 100);
//Get the value of mapred.jobtracker.retirejob.check. If not
//found then use 60000 milliseconds, which is the application default.
retiredJobInterval =
conf.getInt("mapred.jobtracker.retirejob.check", 60000);
//Assert if retiredJobInterval is 0
if ( retiredJobInterval == 0 ) {
Assert.fail("mapred.jobtracker.retirejob.check is 0");
}
conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
jobFileFound = false;
JobConf jconf = new JobConf(conf);
jobHistoryDonePathString = null;
jobHistoryDonePathString = jconf.
get("mapred.job.tracker.history.completed.location");
//Assert if jobHistoryDonePathString is null
Assert.assertNotNull("mapred.job.tracker.history.completed.location " +
"is null", jobHistoryDonePathString);
LOG.info("jobHistoryDonePath location is :" + jobHistoryDonePathString);
FileStatus[] jobHistoryDoneFileStatuses = dfs.
listStatus(new Path (jobHistoryDonePathString));
String jobHistoryPathString = jconf.get("hadoop.job.history.location");
//Submitting the job
RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf);
JobID jobID = rJob.getID();
JobInfo jInfo = remoteJTClient.getJobInfo(jobID);
String jobIDString = jobID.toString();
LOG.info("jobIDString is :" + jobIDString);
//Assert if jobInfo is null
Assert.assertNotNull("jobInfo is null", jInfo);
waitTillRunState(jInfo, jobID, remoteJTClient);
if (jobHistoryPathString != null) {
FileStatus[] jobHistoryFileStatuses = dfs.
listStatus(new Path (jobHistoryPathString));
jobFileFound = false;
for (FileStatus jobHistoryFileStatus : jobHistoryFileStatuses) {
if ((jobHistoryFileStatus.getPath().toString()).
matches(jobIDString)) {
jobFileFound = true;
break;
}
}
Assert.assertTrue("jobFileFound is false", jobFileFound);
}
TaskInfo[] taskInfos = cluster.getJTClient().getProxy()
.getTaskInfo(rJob.getID());
//Killing this job will happen only in the second iteration.
if (testIterationLoop == 2) {
//Killing the job because all the verification needed
//for this testcase is completed.
rJob.killJob();
}
//Making sure that the job is complete.
count = 0;
while (jInfo != null && !jInfo.getStatus().isJobComplete()) {
UtilsForTests.waitFor(10000);
count++;
jInfo = remoteJTClient.getJobInfo(rJob.getID());
//If the count goes beyond 100 seconds, then break; This is to avoid
//infinite loop.
if (count > 10) {
Assert.fail("job has not reached running state for more than" +
"100 seconds. Failing at this point");
}
}
//After checking for Job Completion, waiting for 4 times of
//retiredJobInterval seconds for the job to go to retired state
UtilsForTests.waitFor(retiredJobInterval * 4);
jobHistoryDoneFileStatuses = dfs.
listStatus(new Path (jobHistoryDonePathString));
checkJobHistoryFileInformation( jobHistoryDoneFileStatuses, jobIDString);
Assert.assertTrue("jobFileFound is false. Job History " +
"File is not found in the done directory",
jobFileFound);
Assert.assertEquals("Both the job related files are not found",
fileCount, 2);
} while ( testIterationLoop < 2 );
}
/**
* This tests when multiple instances of successful / failed jobs are
* retired, the location of the retired jobs are as according to
* mapred.job.tracker.history.completed.location
* @param none
* @return void
*/
@Test
public void testRetiredMultipleJobsHistoryLocation() throws Exception {
Configuration conf = new Configuration(cluster.getConf());
JTProtocol remoteJTClient = cluster.getJTClient().getProxy();
int testIterationLoop = 0;
FileStatus[] jobHistoryDoneFileStatuses;
RunningJob[] rJobCollection = new RunningJob[4];
JobID[] rJobIDCollection = new JobID[4];
String jobHistoryDonePathString = null;
JobInfo jInfo = null;
for ( int noOfJobs = 0; noOfJobs < 4; noOfJobs++ ) {
SleepJob job = null;
testIterationLoop++;
job = new SleepJob();
job.setConf(conf);
conf = job.setupJobConf(5, 1, 100, 100, 100, 100);
conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens",
false);
JobConf jconf = new JobConf(conf);
jobHistoryDonePathString = null;
jobHistoryDonePathString = jconf.
get("mapred.job.tracker.history.completed.location");
//Assert if jobHistoryDonePathString is null
Assert.assertNotNull("mapred.job.tracker.history.completed.location "
+ "is null", jobHistoryDonePathString);
LOG.info("jobHistoryDonePath location is :" +
jobHistoryDonePathString);
//Submitting the job
RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf);
JobID jobID = rJob.getID();
rJobCollection[noOfJobs] = rJob;
rJobIDCollection[noOfJobs] = jobID;
jInfo = remoteJTClient.getJobInfo(jobID);
LOG.info("jobIDString is :" + jobID.toString());
//Assert if jobInfo is null
Assert.assertNotNull("jobInfo is null", jInfo);
}
//Wait for the jobs to start running.
for (int noOfJobs = 0; noOfJobs < 4; noOfJobs++) {
waitTillRunState(jInfo, rJobIDCollection[noOfJobs], remoteJTClient);
}
//Killing two jobs
(rJobCollection[0]).killJob();
(rJobCollection[3]).killJob();
//Making sure that the jobs are complete.
for (int noOfJobs = 0; noOfJobs < 4; noOfJobs++) {
count = 0;
while (remoteJTClient.getJobInfo(rJobIDCollection[noOfJobs]) != null &&
!(remoteJTClient.getJobInfo(rJobIDCollection[noOfJobs])).
getStatus().isJobComplete()) {
UtilsForTests.waitFor(10000);
count++;
//If the count goes beyond 100 seconds, then break; This is to avoid
//infinite loop.
if (count > 20) {
Assert.fail("job has not reached completed state for more than" +
"200 seconds. Failing at this point");
}
}
}
//After checking for Job Completion, waiting for 4 times
// of retiredJobInterval seconds for the job to go to retired state
UtilsForTests.waitFor(retiredJobInterval * 4);
jobHistoryDoneFileStatuses = dfs.
listStatus(new Path (jobHistoryDonePathString));
for (int noOfJobs = 0; noOfJobs < 4; noOfJobs++) {
checkJobHistoryFileInformation( jobHistoryDoneFileStatuses,
(rJobIDCollection[noOfJobs]).toString());
Assert.assertTrue("jobFileFound is false. Job History " +
"File is not found in the done directory",
jobFileFound);
Assert.assertEquals("Both the job related files are not found",
fileCount, 2);
}
}
//Waiting till job starts running
private void waitTillRunState(JobInfo jInfo, JobID jobID,
JTProtocol remoteJTClient) throws Exception {
int count = 0;
while (jInfo != null && jInfo.getStatus().getRunState()
!= JobStatus.RUNNING) {
UtilsForTests.waitFor(10000);
count++;
jInfo = remoteJTClient.getJobInfo(jobID);
//If the count goes beyond 100 seconds, then break; This is to avoid
//infinite loop.
if (count > 10) {
Assert.fail("job has not reached running state for more than" +
"100 seconds. Failing at this point");
}
}
}
//Checking for job file information in done directory
//Since done directory has sub directories search under all
//the sub directories.
private void checkJobHistoryFileInformation( FileStatus[]
jobHistoryDoneFileStatuses, String jobIDString ) throws Exception {
fileCount = 0;
jobFileFound = false;
for (FileStatus jobHistoryDoneFileStatus : jobHistoryDoneFileStatuses) {
FileStatus[] jobHistoryDoneFileStatuses1 = dfs.
listStatus(jobHistoryDoneFileStatus.getPath());
for (FileStatus jobHistoryDoneFileStatus1 : jobHistoryDoneFileStatuses1) {
FileStatus[] jobHistoryDoneFileStatuses2 = dfs.
listStatus(jobHistoryDoneFileStatus1.getPath());
for (FileStatus jobHistoryDoneFileStatus2 :
jobHistoryDoneFileStatuses2) {
FileStatus[] jobHistoryDoneFileStatuses3 = dfs.
listStatus(jobHistoryDoneFileStatus2.getPath());
for (FileStatus jobHistoryDoneFileStatus3 :
jobHistoryDoneFileStatuses3) {
FileStatus[] jobHistoryDoneFileStatuses4 = dfs.
listStatus(jobHistoryDoneFileStatus3.getPath());
for (FileStatus jobHistoryDoneFileStatus4 :
jobHistoryDoneFileStatuses4) {
FileStatus[] jobHistoryDoneFileStatuses5 = dfs.
listStatus(jobHistoryDoneFileStatus4.getPath());
for (FileStatus jobHistoryDoneFileStatus5 :
jobHistoryDoneFileStatuses5) {
FileStatus[] jobHistoryDoneFileStatuses6 = dfs.
listStatus(jobHistoryDoneFileStatus5.getPath());
for (FileStatus jobHistoryDoneFileStatus6 :
jobHistoryDoneFileStatuses6) {
if ( (jobHistoryDoneFileStatus6.getPath().toString()).
indexOf(jobIDString) != -1 ) {
jobFileFound = true;
fileCount++;
//Both the conf file and the job file has to be present
if (fileCount == 2) {
break;
}
}
}
}
}
}
}
}
}
}
}