/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker; /** * A {@link TaskScheduler} that limits the maximum number of tasks * running for a job. The limit is set by means of the * {@link JTConfig#JT_RUNNINGTASKS_PER_JOB} property. */ class LimitTasksPerJobTaskScheduler extends JobQueueTaskScheduler { private static final Log LOG = LogFactory.getLog( "org.apache.hadoop.mapred.TaskLimitedJobQueueTaskScheduler"); private long maxTasksPerJob; public LimitTasksPerJobTaskScheduler() { super(); } @Override public synchronized void start() throws IOException { super.start(); QueueManager queueManager = taskTrackerManager.getQueueManager(); String queueName = queueManager.getJobQueueInfos()[0].getQueueName(); queueManager.setSchedulerInfo(queueName ,"Maximum Tasks Per Job :: " + String.valueOf(maxTasksPerJob)); } @Override public synchronized void setConf(Configuration conf) { super.setConf(conf); maxTasksPerJob = conf.getLong(JTConfig.JT_RUNNINGTASKS_PER_JOB, Long.MAX_VALUE); if (maxTasksPerJob <= 0) { String msg = JTConfig.JT_RUNNINGTASKS_PER_JOB + " is set to zero or a negative value. Aborting."; LOG.fatal(msg); throw new RuntimeException (msg); } } @Override public synchronized List<Task> assignTasks(TaskTracker taskTracker) throws IOException { TaskTrackerStatus taskTrackerStatus = taskTracker.getStatus(); final int numTaskTrackers = taskTrackerManager.getClusterStatus().getTaskTrackers(); Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); Task task; /* Stats about the current taskTracker */ final int mapTasksNumber = taskTrackerStatus.countMapTasks(); final int reduceTasksNumber = taskTrackerStatus.countReduceTasks(); final int maximumMapTasksNumber = taskTrackerStatus.getMaxMapSlots(); final int maximumReduceTasksNumber = taskTrackerStatus.getMaxReduceSlots(); /* * Statistics about the whole cluster. Most are approximate because of * concurrency */ final int[] maxMapAndReduceLoad = getMaxMapAndReduceLoad( maximumMapTasksNumber, maximumReduceTasksNumber); final int maximumMapLoad = maxMapAndReduceLoad[0]; final int maximumReduceLoad = maxMapAndReduceLoad[1]; final int beginAtStep; /* * When step == 0, this loop starts as many map tasks it can wrt * maxTasksPerJob * When step == 1, this loop starts as many reduce tasks it can wrt * maxTasksPerJob * When step == 2, this loop starts as many map tasks it can * When step == 3, this loop starts as many reduce tasks it can * * It may seem that we would improve this loop by queuing jobs we cannot * start in steps 0 and 1 because of maxTasksPerJob, and using that queue * in step 2 and 3. * A first thing to notice is that the time with the current algorithm is * logarithmic, because it is the sum of (p^k) for k from 1 to N, were * N is the number of jobs and p is the probability for a job to not exceed * limits The probability for the cache to be useful would be similar to * p^N, that is 1/(e^N), whereas its size and the time spent to manage it * would be in ln(N). * So it is not a good idea. */ if (maxTasksPerJob != Long.MAX_VALUE) { beginAtStep = 0; } else { beginAtStep = 2; } List<Task> assignedTasks = new ArrayList<Task>(); scheduleTasks: for (int step = beginAtStep; step <= 3; ++step) { /* If we reached the maximum load for this step, go to the next */ if ((step == 0 || step == 2) && mapTasksNumber >= maximumMapLoad || (step == 1 || step == 3) && reduceTasksNumber >= maximumReduceLoad) { continue; } /* For each job, start its tasks */ synchronized (jobQueue) { for (JobInProgress job : jobQueue) { /* Ignore non running jobs */ if (job.getStatus().getRunState() != JobStatus.RUNNING) { continue; } /* Check that we're not exceeding the global limits */ if ((step == 0 || step == 1) && (job.runningMaps() + job.runningReduces() >= maxTasksPerJob)) { continue; } if (step == 0 || step == 2) { task = job.obtainNewMapTask(taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } else { task = job.obtainNewReduceTask(taskTrackerStatus, numTaskTrackers, taskTrackerManager.getNumberOfUniqueHosts()); } if (task != null) { assignedTasks.add(task); break scheduleTasks; } } } } return assignedTasks; } /** * Determine the maximum number of maps or reduces that we are willing to run * on a taskTracker which accept a maximum of localMaxMapLoad maps and * localMaxReduceLoad reduces * @param localMaxMapLoad The local maximum number of map tasks for a host * @param localMaxReduceLoad The local maximum number of reduce tasks for a * host * @return An array of the two maximums: map then reduce. */ protected synchronized int[] getMaxMapAndReduceLoad(int localMaxMapLoad, int localMaxReduceLoad) { // Approximate because of concurrency final int numTaskTrackers = taskTrackerManager.getClusterStatus().getTaskTrackers(); /* Hold the result */ int maxMapLoad = 0; int maxReduceLoad = 0; int neededMaps = 0; int neededReduces = 0; Collection<JobInProgress> jobQueue = jobQueueJobInProgressListener.getJobQueue(); synchronized (jobQueue) { for (JobInProgress job : jobQueue) { if (job.getStatus().getRunState() == JobStatus.RUNNING) { neededMaps += job.desiredMaps() - job.finishedMaps(); neededReduces += job.desiredReduces() - job.finishedReduces(); } } } if (numTaskTrackers > 0) { maxMapLoad = Math.min(localMaxMapLoad, (int) Math .ceil((double) neededMaps / numTaskTrackers)); maxReduceLoad = Math.min(localMaxReduceLoad, (int) Math .ceil((double) neededReduces / numTaskTrackers)); } return new int[] { maxMapLoad, maxReduceLoad }; } }