/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.util.Collection; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.Log; import org.apache.hadoop.mapreduce.test.system.JTProtocol; import org.apache.hadoop.mapreduce.test.system.TTClient; import org.apache.hadoop.mapreduce.test.system.JobInfo; import org.apache.hadoop.mapreduce.test.system.TaskInfo; import org.apache.hadoop.mapreduce.test.system.MRCluster; import org.apache.hadoop.mapreduce.test.system.FinishTaskControlAction; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.examples.SleepJob; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.AfterClass; import org.junit.Test; /** * This tests when successful and failed jobs are retired, * their jobInProgress object are removed properly. */ public class TestRetiredJobs { private static MRCluster cluster = null; private static JobClient jobClient = null; private static int retiredJobInterval = 0; static final Log LOG = LogFactory.getLog(TestRetiredJobs.class); public TestRetiredJobs() throws Exception { } @BeforeClass public static void setUp() throws Exception { cluster = MRCluster.createCluster(new Configuration()); cluster.setUp(); jobClient = cluster.getJTClient().getClient(); } @AfterClass public static void tearDown() throws Exception { cluster.tearDown(); } @Test /** * This tests when successful and failed jobs are retired, * their jobInProgress object are removed properly. * This is verified by checking whether getJobInfo * method returns a JobInfo object when running, * and whether getJobInfo method returns null * after job is retired. * @param none * @return void */ public void testRetiredJobsSuccessful() throws Exception { Configuration conf = new Configuration(cluster.getConf()); conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); JTProtocol remoteJTClient = cluster.getJTClient().getProxy(); int testLoopCount = 0; //First run a successful job and verify if JobInProgress //object is removed by checking the getJobInfo. In the //second iteration, verify if a killed job JobInProgress //is removed. do { testLoopCount++; SleepJob job = new SleepJob(); job.setConf(conf); conf = job.setupJobConf(5, 1, 100, 100, 100, 100); //Get the value of mapred.jobtracker.retirejob.check. If not //found then use 60000 milliseconds, which is the application default. retiredJobInterval = conf.getInt("mapred.jobtracker.retirejob.check", 60000); //Assert if retiredJobInterval is 0 if ( retiredJobInterval == 0 ) { Assert.fail("mapred.jobtracker.retirejob.check is 0"); } JobConf jconf = new JobConf(conf); //Controls the job till all verification is done FinishTaskControlAction.configureControlActionForJob(conf); //Submitting the job RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf); JobID jobID = rJob.getID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobID); LOG.info("jInfo is :" + jInfo); boolean jobStarted = cluster.getJTClient().isJobStarted(jobID); Assert.assertTrue("Job has not started even after a minute", jobStarted ); LOG.info("job id is :" + jobID.toString()); TaskInfo[] taskInfos = cluster.getJTClient().getProxy() .getTaskInfo(jobID); // getJobInfo method should // return a JobInProgress object when running, JobInfo jobInfo = cluster.getJTClient().getProxy() .getJobInfo(jobID); Assert.assertNotNull("The Job information is not present ", jobInfo); //Allow the job to continue through MR control job. for (TaskInfo taskInfoRemaining : taskInfos) { FinishTaskControlAction action = new FinishTaskControlAction(TaskID .downgrade(taskInfoRemaining.getTaskID())); Collection<TTClient> tts = cluster.getTTClients(); for (TTClient cli : tts) { cli.getProxy().sendAction(action); } } //Killing this job will happen only in the second iteration. if (testLoopCount == 2) { //Killing the job because all the verification needed //for this testcase is completed. rJob.killJob(); } //Making sure that the job is complete. int count = 0; while (jInfo != null && !jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(10000); count++; jInfo = remoteJTClient.getJobInfo(rJob.getID()); //If the count goes more than 100 seconds, then fail; This is to //avoid infinite loop if (count > 10) { Assert.fail("Since the job has not completed even after" + " 100 seconds, failing at this point"); } } //Waiting for a specific period of time for retire thread //to be called taking into consideration the network issues if (retiredJobInterval > 40000) { UtilsForTests.waitFor(retiredJobInterval * 2); } else { UtilsForTests.waitFor(retiredJobInterval * 4); } jobInfo = null; // getJobInfo method should return null // after job is retired. JobInProgress // object should not be present. jobInfo = cluster.getJTClient().getProxy() .getJobInfo(jobID); Assert.assertNull("Job information is still available " + "after retirement of job ", jobInfo); } while (testLoopCount < 2); } }