/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.util.Properties;
import org.apache.hadoop.mapred.ControlledMapReduceJob.ControlledMapReduceJobRunner;
/**
* End to end tests based on MiniMRCluster to verify that queue capacities are
* honored. Automates the tests related to queue capacities: submits jobs to
* different queues simultaneously and ensures that capacities are honored
*/
public class TestQueueCapacities extends ClusterWithCapacityScheduler {
/**
* Test single queue.
*
* <p>
*
* Submit a job with more M/R tasks than total capacity. Full queue capacity
* should be utilized and remaining M/R tasks should wait for slots to be
* available.
*
* @throws Exception
*/
public void testSingleQueue()
throws Exception {
Properties schedulerProps = new Properties();
schedulerProps.put(
"mapred.capacity-scheduler.queue.default.guaranteed-capacity", "100");
Properties clusterProps = new Properties();
clusterProps
.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(3));
clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
.valueOf(3));
// cluster capacity 12 maps, 12 reduces
startCluster(4, clusterProps, schedulerProps);
ControlledMapReduceJobRunner jobRunner =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
getJobConf(), 16, 16);
jobRunner.start();
ControlledMapReduceJob controlledJob = jobRunner.getJob();
JobID myJobID = jobRunner.getJobID();
JobInProgress myJob = getJobTracker().getJob(myJobID);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 12);
// Wait till the cluster reaches steady state. This confirms that the rest
// of the tasks are not running and waiting for slots
// to be freed.
waitTillAllSlotsAreOccupied(true);
LOG.info("Trying to finish 2 maps");
controlledJob.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 2);
assertTrue("Number of maps finished", myJob.finishedMaps() == 2);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 12);
waitTillAllSlotsAreOccupied(true);
LOG.info("Trying to finish 2 more maps");
controlledJob.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 4);
assertTrue("Number of maps finished", myJob.finishedMaps() == 4);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 12);
waitTillAllSlotsAreOccupied(true);
LOG.info("Trying to finish the last 12 maps");
controlledJob.finishNTasks(true, 12);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 16);
assertTrue("Number of maps finished", myJob.finishedMaps() == 16);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 0);
ControlledMapReduceJob.haveAllTasksFinished(myJob, true);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, false, 12);
waitTillAllSlotsAreOccupied(false);
LOG.info("Trying to finish 4 reduces");
controlledJob.finishNTasks(false, 4);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, false, 4);
assertTrue("Number of reduces finished", myJob.finishedReduces() == 4);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, false, 12);
waitTillAllSlotsAreOccupied(false);
LOG.info("Trying to finish the last 12 reduces");
controlledJob.finishNTasks(false, 12);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, false, 16);
assertTrue("Number of reduces finished", myJob.finishedReduces() == 16);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, false, 0);
ControlledMapReduceJob.haveAllTasksFinished(myJob, false);
jobRunner.join();
}
/**
* Test single queue with multiple jobs.
*
* @throws Exception
*/
public void testSingleQueueMultipleJobs()
throws Exception {
Properties schedulerProps = new Properties();
schedulerProps.put(
"mapred.capacity-scheduler.queue.default.guaranteed-capacity", "100");
Properties clusterProps = new Properties();
clusterProps
.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(3));
clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
.valueOf(0));
// cluster capacity 12 maps, 0 reduces
startCluster(4, clusterProps, schedulerProps);
singleQMultipleJobs1();
singleQMultipleJobs2();
}
/**
* Test multiple queues.
*
* These tests use 4 queues default, Q2, Q3 and Q4 with guaranteed capacities
* 10, 20, 30, 40 respectively), user limit 100%, priority not respected, one
* user per queue. Reclaim time 5 minutes.
*
* @throws Exception
*/
public void testMultipleQueues()
throws Exception {
Properties schedulerProps = new Properties();
String[] queues = new String[] { "default", "Q2", "Q3", "Q4" };
int GC = 0;
for (String q : queues) {
GC += 10;
schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
"guaranteed-capacity"), String.valueOf(GC)); // TODO: use strings
schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
"minimum-user-limit-percent"), String.valueOf(100));
schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
"reclaim-time-limit"), String.valueOf(300));
}
Properties clusterProps = new Properties();
clusterProps
.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(2));
clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
.valueOf(2));
clusterProps.put("mapred.queue.names", queues[0] + "," + queues[1] + ","
+ queues[2] + "," + queues[3]);
// cluster capacity 10 maps, 10 reduces and 4 queues with capacities 1, 2,
// 3, 4 respectively.
startCluster(5, clusterProps, schedulerProps);
multipleQsWithOneQBeyondCapacity(queues);
multipleQueuesWithinCapacities(queues);
}
/**
* Submit a job with more M/R tasks than total queue capacity and then submit
* another job. First job utilizes all the slots. When the second job is
* submitted, the tasks of the second job wait for slots to be available. As
* the tasks of the first jobs finish and there are no more tasks pending, the
* tasks of the second job start running on the freed up slots.
*
* @throws Exception
*/
private void singleQMultipleJobs1()
throws Exception {
ControlledMapReduceJobRunner jobRunner1 =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
getJobConf(), 16, 0);
ControlledMapReduceJobRunner jobRunner2 =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
getJobConf(), 12, 0);
jobRunner1.start();
ControlledMapReduceJob controlledJob1 = jobRunner1.getJob();
JobID jobID1 = jobRunner1.getJobID();
JobInProgress jip1 = getJobTracker().getJob(jobID1);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 12);
// Confirm that the rest of the tasks are not running and waiting for slots
// to be freed.
waitTillAllSlotsAreOccupied(true);
// Now start the second job.
jobRunner2.start();
JobID jobID2 = jobRunner2.getJobID();
ControlledMapReduceJob controlledJob2 = jobRunner2.getJob();
JobInProgress jip2 = getJobTracker().getJob(jobID2);
LOG.info("Trying to finish 2 map");
controlledJob1.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 2);
assertTrue("Number of maps finished", jip1.finishedMaps() == 2);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 12);
waitTillAllSlotsAreOccupied(true);
LOG.info("Trying to finish 2 more maps");
controlledJob1.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 4);
assertTrue("Number of maps finished", jip1.finishedMaps() == 4);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 12);
waitTillAllSlotsAreOccupied(true);
// All tasks of Job1 started running/finished. Now job2 should start
LOG.info("Trying to finish 2 more maps");
controlledJob1.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 6);
assertTrue("Number of maps finished", jip1.finishedMaps() == 6);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 10);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 2);
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 10);
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 2);
LOG.info("Trying to finish 10 more maps and hence job1");
controlledJob1.finishNTasks(true, 10);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 16);
assertTrue("Number of maps finished", jip1.finishedMaps() == 16);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 12);
controlledJob1.finishJob();
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 0);
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 12);
// Finish job2 also
controlledJob2.finishJob();
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip2, true, 12);
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 0);
jobRunner1.join();
jobRunner2.join();
}
/**
* Submit a job with less M/R tasks than total capacity and another job with
* more M/R tasks than the remaining capacity. First job should utilize the
* required slots and other job should utilize the available slots and its
* remaining tasks wait for slots to become free.
*
* @throws Exception
*/
private void singleQMultipleJobs2()
throws Exception {
ControlledMapReduceJobRunner jobRunner1 =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
getJobConf(), 8, 0);
ControlledMapReduceJobRunner jobRunner2 =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
getJobConf(), 12, 0);
jobRunner1.start();
ControlledMapReduceJob controlledJob1 = jobRunner1.getJob();
JobID jobID1 = jobRunner1.getJobID();
JobInProgress jip1 = getJobTracker().getJob(jobID1);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 8);
ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 8);
// Now start the second job.
jobRunner2.start();
JobID jobID2 = jobRunner2.getJobID();
ControlledMapReduceJob controlledJob2 = jobRunner2.getJob();
JobInProgress jip2 = getJobTracker().getJob(jobID2);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 4);
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 8);
// The rest of the tasks of job2 should wait.
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 4);
LOG.info("Trying to finish 2 maps of job1");
controlledJob1.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 2);
assertTrue("Number of maps finished", jip1.finishedMaps() == 2);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 6);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 6);
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 6);
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 6);
LOG.info("Trying to finish 6 more maps of job1");
controlledJob1.finishNTasks(true, 6);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 8);
assertTrue("Number of maps finished", jip1.finishedMaps() == 8);
ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 12);
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 0);
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 12);
// Finish job2 also
controlledJob2.finishJob();
ControlledMapReduceJob.waitTillNTotalTasksFinish(jip2, true, 12);
ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 0);
jobRunner1.join();
jobRunner2.join();
}
/**
* Test to verify running of tasks in a queue going over its capacity. In
* queue default, user U1 starts a job J1, having more M/R tasks than the
* total slots. M/R tasks of job J1 should start running on all the nodes (100
* % utilization).
*
* @throws Exception
*/
private void multipleQsWithOneQBeyondCapacity(String[] queues)
throws Exception {
JobConf conf = getJobConf();
conf.setQueueName(queues[0]);
conf.setUser("U1");
ControlledMapReduceJobRunner jobRunner =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(conf, 15,
0);
jobRunner.start();
ControlledMapReduceJob controlledJob = jobRunner.getJob();
JobID myJobID = jobRunner.getJobID();
JobInProgress myJob = getJobTracker().getJob(myJobID);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 10);
// Confirm that the rest of the tasks are not running and waiting for slots
// to be freed.
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 10);
LOG.info("Trying to finish 3 maps");
controlledJob.finishNTasks(true, 3);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 3);
assertTrue("Number of maps finished", myJob.finishedMaps() == 3);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 10);
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 10);
LOG.info("Trying to finish 2 more maps");
controlledJob.finishNTasks(true, 2);
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 5);
assertTrue("Number of maps finished", myJob.finishedMaps() == 5);
ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 10);
waitTillAllSlotsAreOccupied(true);
ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 10);
// Finish job
controlledJob.finishJob();
ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 15);
ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 0);
jobRunner.join();
}
/**
* Test to verify queue capacities across multiple queues. In this test, jobs
* are submitted to different queues - all below the queue's capacity and
* verifies that all the jobs are running. This will test code paths related
* to job initialization, considering multiple queues for scheduling jobs etc.
*
* <p>
*
* One user per queue. Four jobs are submitted to the four queues such that
* they exactly fill up the queues. No queue should be beyond capacity. All
* jobs should be running.
*
* @throws Exception
*/
private void multipleQueuesWithinCapacities(String[] queues)
throws Exception {
String[] users = new String[] { "U1", "U2", "U3", "U4" };
ControlledMapReduceJobRunner[] jobRunners =
new ControlledMapReduceJobRunner[4];
ControlledMapReduceJob[] controlledJobs = new ControlledMapReduceJob[4];
JobInProgress[] jips = new JobInProgress[4];
// Initialize all the jobs
// Start all the jobs in parallel
JobConf conf = getJobConf();
int numTasks = 1;
for (int i = 0; i < 4; i++) {
conf.setQueueName(queues[i]);
conf.setUser(users[i]);
jobRunners[i] =
ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
getJobConf(), numTasks, numTasks);
jobRunners[i].start();
controlledJobs[i] = jobRunners[i].getJob();
JobID jobID = jobRunners[i].getJobID();
jips[i] = getJobTracker().getJob(jobID);
// Wait till all the jobs start running all of their tasks
ControlledMapReduceJob.waitTillNTasksStartRunning(jips[i], true,
numTasks);
ControlledMapReduceJob.waitTillNTasksStartRunning(jips[i], false,
numTasks);
numTasks += 1;
}
// Ensure steady state behavior
waitTillAllSlotsAreOccupied(true);
waitTillAllSlotsAreOccupied(false);
numTasks = 1;
for (int i = 0; i < 4; i++) {
ControlledMapReduceJob.assertNumTasksRunning(jips[i], true, numTasks);
ControlledMapReduceJob.assertNumTasksRunning(jips[i], false, numTasks);
numTasks += 1;
}
// Finish the jobs and join them
numTasks = 1;
for (int i = 0; i < 4; i++) {
controlledJobs[i].finishJob();
ControlledMapReduceJob
.waitTillNTotalTasksFinish(jips[i], true, numTasks);
ControlledMapReduceJob.assertNumTasksRunning(jips[i], true, 0);
ControlledMapReduceJob.waitTillNTotalTasksFinish(jips[i], false,
numTasks);
ControlledMapReduceJob.assertNumTasksRunning(jips[i], false, 0);
jobRunners[i].join();
numTasks += 1;
}
}
}