/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobQueueJobInProgressListener.JobSchedulingInfo;
import org.apache.hadoop.util.StringUtils;
/**
* This class asynchronously initializes jobs submitted to the
* Map/Reduce cluster running with the {@link CapacityTaskScheduler}.
*
* <p>
* The class comprises of a main poller thread, and a set of worker
* threads that together initialize the jobs. The poller thread periodically
* looks at jobs submitted to the scheduler, and selects a set of them
* to be initialized. It passes these to the worker threads for initializing.
* Each worker thread is configured to look at jobs submitted to a fixed
* set of queues. It initializes jobs in a round robin manner - selecting
* the first job in order from each queue ready to be initialized.
* </p>
*
* <p>
* An initialized job occupies memory resources on the Job Tracker. Hence,
* the poller limits the number of jobs initialized at any given time to
* a configured limit. The limit is specified per user per queue.
* </p>
*
* <p>
* However, since a job needs to be initialized before the scheduler can
* select tasks from it to run, it tries to keep a backlog of jobs
* initialized so the scheduler does not need to wait and let empty slots
* go waste. The core logic of the poller is to pick up the right jobs,
* which have a good potential to be run next by the scheduler. To do this,
* it picks up jobs submitted across users and across queues to account
* both for guaranteed capacities and user limits. It also always initializes
* high priority jobs, whenever they need to be initialized, even if this
* means going over the limit for initialized jobs.
* </p>
*/
public class JobInitializationPoller extends Thread {
private static final Log LOG = LogFactory
.getLog(JobInitializationPoller.class.getName());
private JobQueuesManager jobQueueManager;
private long sleepInterval;
private int poolSize;
/**
* A worker thread that initializes jobs in one or more queues assigned to
* it.
*
* Jobs are initialized in a round robin fashion one from each queue at a
* time.
*/
class JobInitializationThread extends Thread {
private JobInProgress initializingJob;
private volatile boolean startIniting;
private AtomicInteger currentJobCount = new AtomicInteger(0); // number of jobs to initialize
/**
* The hash map which maintains relationship between queue to jobs to
* initialize per queue.
*/
private Map<String, Map<JobSchedulingInfo, JobInProgress>> jobsPerQueue;
public JobInitializationThread() {
startIniting = true;
jobsPerQueue =
new ConcurrentHashMap<String, Map<JobSchedulingInfo, JobInProgress>>();
}
@Override
public void run() {
while (startIniting) {
initializeJobs();
try {
if (startIniting) {
Thread.sleep(sleepInterval);
} else {
break;
}
} catch (Throwable t) {
}
}
}
// The key method that initializes jobs from queues
// This method is package-private to allow test cases to call it
// synchronously in a controlled manner.
void initializeJobs() {
// while there are more jobs to initialize...
while (currentJobCount.get() > 0) {
Set<String> queues = jobsPerQueue.keySet();
for (String queue : queues) {
JobInProgress job = getFirstJobInQueue(queue);
if (job == null) {
continue;
}
LOG.info("Initializing job : " + job.getJobID() + " in Queue "
+ job.getProfile().getQueueName() + " For user : "
+ job.getProfile().getUser());
if (startIniting) {
setInitializingJob(job);
ttm.initJob(job);
setInitializingJob(null);
} else {
break;
}
}
}
}
/**
* This method returns the first job in the queue and removes the same.
*
* @param queue
* queue name
* @return First job in the queue and removes it.
*/
private JobInProgress getFirstJobInQueue(String queue) {
Map<JobSchedulingInfo, JobInProgress> jobsList = jobsPerQueue.get(queue);
synchronized (jobsList) {
if (jobsList.isEmpty()) {
return null;
}
Iterator<JobInProgress> jobIterator = jobsList.values().iterator();
JobInProgress job = jobIterator.next();
jobIterator.remove();
currentJobCount.getAndDecrement();
return job;
}
}
/*
* Test method to check if the thread is currently initialising the job
*/
synchronized JobInProgress getInitializingJob() {
return this.initializingJob;
}
synchronized void setInitializingJob(JobInProgress job) {
this.initializingJob = job;
}
void terminate() {
startIniting = false;
}
void addJobsToQueue(String queue, JobInProgress job) {
Map<JobSchedulingInfo, JobInProgress> jobs = jobsPerQueue.get(queue);
if (jobs == null) {
LOG.error("Invalid queue passed to the thread : " + queue
+ " For job :: " + job.getJobID());
}
synchronized (jobs) {
JobSchedulingInfo schedInfo = new JobSchedulingInfo(job);
jobs.put(schedInfo, job);
currentJobCount.getAndIncrement();
}
}
void addQueue(String queueName) {
CapacitySchedulerQueue queue = jobQueueManager.getQueue(queueName);
TreeMap<JobSchedulingInfo, JobInProgress> jobs =
new TreeMap<JobSchedulingInfo, JobInProgress>(queue.getComparator());
jobsPerQueue.put(queueName, jobs);
}
}
/**
* Set of jobs which have been passed to Initialization threads.
* This is maintained so that we dont call initTasks() for same job twice.
*/
private HashMap<JobID, JobInProgress> initializedJobs;
private volatile boolean running;
private TaskTrackerManager ttm;
/**
* The map which provides information which thread should be used to
* initialize jobs for a given job queue.
*/
private Map<String, JobInitializationThread> threadsToQueueMap;
public JobInitializationPoller(JobQueuesManager mgr,
CapacitySchedulerConf rmConf, Set<String> queue,
TaskTrackerManager ttm) {
initializedJobs = new HashMap<JobID,JobInProgress>();
this.jobQueueManager = mgr;
threadsToQueueMap =
Collections.synchronizedMap(new HashMap<String,
JobInitializationThread>());
super.setName("JobInitializationPollerThread");
running = true;
this.ttm = ttm;
}
void setTaskTrackerManager(TaskTrackerManager ttm) {
this.ttm = ttm;
}
/*
* method to read all configuration values required by the initialisation
* poller
*/
void init(int numQueues,
CapacitySchedulerConf capacityConf) {
sleepInterval = capacityConf.getSleepInterval();
poolSize = Math.min(capacityConf.getMaxWorkerThreads(), numQueues);
assignThreadsToQueues();
Collection<JobInitializationThread> threads = threadsToQueueMap.values();
for (JobInitializationThread t : threads) {
if (!t.isAlive()) {
t.setDaemon(true);
t.start();
}
}
}
void reinit(Set<String> queues) {
Set<String> oldQueues = threadsToQueueMap.keySet();
int i=0;
JobInitializationThread[] threads =
threadsToQueueMap.values().toArray(new JobInitializationThread[0]);
for (String newQueue : queues) {
if (!oldQueues.contains(newQueue)) {
JobInitializationThread t = threads[i++ % threads.length];
t.addQueue(newQueue);
threadsToQueueMap.put(newQueue, t);
}
}
}
/**
* This is main thread of initialization poller, We essentially do
* following in the main threads:
*
* <ol>
* <li> Clean up the list of initialized jobs list which poller maintains
* </li>
* <li> Select jobs to initialize in the polling interval.</li>
* </ol>
*/
public void run() {
while (running) {
try {
cleanUpInitializedJobsList();
selectJobsToInitialize();
if (!this.isInterrupted()) {
Thread.sleep(sleepInterval);
}
} catch (InterruptedException e) {
LOG.error("Job Initialization poller interrupted"
+ StringUtils.stringifyException(e));
}
}
}
/**
* The key method which does selecting jobs to be initalized across
* queues and assign those jobs to their appropriate init-worker threads.
* <br/>
* This method is overriden in test case which is used to test job
* initialization poller.
*
*/
void selectJobsToInitialize() {
for (String queue : jobQueueManager.getAllQueues()) {
ArrayList<JobInProgress> jobsToInitialize = getJobsToInitialize(queue);
printJobs(jobsToInitialize);
JobInitializationThread t = threadsToQueueMap.get(queue);
for (JobInProgress job : jobsToInitialize) {
t.addJobsToQueue(queue, job);
}
}
}
/**
* Method used to print log statements about which jobs are being
* passed to init-threads.
*
* @param jobsToInitialize list of jobs which are passed to be
* init-threads.
*/
private void printJobs(ArrayList<JobInProgress> jobsToInitialize) {
for (JobInProgress job : jobsToInitialize) {
LOG.info("Passing to Initializer Job Id :" + job.getJobID()
+ " User: " + job.getProfile().getUser() + " Queue : "
+ job.getProfile().getQueueName());
}
}
/**
* This method exists to be overridden by test cases that wish to
* create a test-friendly worker thread which can be controlled
* synchronously.
*
* @return Instance of worker init-threads.
*/
JobInitializationThread createJobInitializationThread() {
return new JobInitializationThread();
}
/**
* Method which is used by the poller to assign appropriate worker thread
* to a queue. The number of threads would be always less than or equal
* to number of queues in a system. If number of threads is configured to
* be more than number of queues then poller does not create threads more
* than number of queues.
*
*/
private void assignThreadsToQueues() {
Collection<String> queueNames = jobQueueManager.getAllQueues();
int countOfQueues = queueNames.size();
String[] queues = (String[]) queueNames.toArray(
new String[countOfQueues]);
int numberOfQueuesPerThread = countOfQueues / poolSize;
int numberOfQueuesAssigned = 0;
for (int i = 0; i < poolSize; i++) {
JobInitializationThread initializer = createJobInitializationThread();
int batch = (i * numberOfQueuesPerThread);
for (int j = batch; j < (batch + numberOfQueuesPerThread); j++) {
initializer.addQueue(queues[j]);
threadsToQueueMap.put(queues[j], initializer);
numberOfQueuesAssigned++;
}
}
if (numberOfQueuesAssigned < countOfQueues) {
// Assign remaining queues in round robin fashion to other queues
int startIndex = 0;
for (int i = numberOfQueuesAssigned; i < countOfQueues; i++) {
JobInitializationThread t = threadsToQueueMap
.get(queues[startIndex]);
t.addQueue(queues[i]);
threadsToQueueMap.put(queues[i], t);
startIndex++;
}
}
}
/**
*
* Method used to select jobs to be initialized for a given queue. <br/>
*
* We want to ensure that enough jobs have been initialized, so that when the
* Scheduler wants to consider a new job to run, it's ready. We clearly don't
* want to initialize too many jobs as each initialized job has a memory
* footprint, sometimes significant.
*
* Number of jobs to be initialized is restricted by two values: - Maximum
* number of users whose jobs we want to initialize, which is equal to
* the number of concurrent users the queue can support. - Maximum number
* of initialized jobs per user. The product of these two gives us the
* total number of initialized jobs.
*
* Note that this is a rough number, meant for decreasing extra memory
* footprint. It's OK if we go over it once in a while, if we have to.
*
* This can happen as follows. Suppose we have initialized 3 jobs for a
* user. Now, suppose the user submits a job who's priority is higher than
* that of the 3 jobs initialized. This job needs to be initialized, since it
* will run earlier than the 3 jobs. We'll now have 4 initialized jobs for the
* user. If memory becomes a problem, we should ideally un-initialize one of
* the 3 jobs, to keep the count of initialized jobs at 3, but that's
* something we don't do for now. This situation can also arise when a new
* user submits a high priority job, thus superceeding a user whose jobs have
* already been initialized. The latter user's initialized jobs are redundant,
* but we'll leave them initialized.
*
* @param queueName name of the queue to pick the jobs to initialize.
* @return list of jobs to be initalized in a queue. An empty queue is
* returned if no jobs are found.
*/
ArrayList<JobInProgress> getJobsToInitialize(String queueName) {
CapacitySchedulerQueue queue = jobQueueManager.getQueue(queueName);
ArrayList<JobInProgress> jobsToInitialize = new ArrayList<JobInProgress>();
Set<String> usersOverLimit = new HashSet<String>();
Collection<JobInProgress> jobs = queue.getWaitingJobs();
/*
* Walk through the collection of waiting jobs.
* We maintain a map of jobs that have already been initialized. If a
* job exists in that map, increment the count for that job's user
* and move on to the next job.
*
* If the job doesn't exist, see whether we want to initialize it.
* We initialize it if: - at least one job of the user has already
* been initialized, but the user's total initialized jobs are below
* the limit, OR - this is a new user, and we haven't reached the limit
* for the number of users whose jobs we want to initialize. We break
* when we've reached the limit of maximum jobs to initialize.
*/
for (JobInProgress job : jobs) {
String user = job.getProfile().getUser();
// If the job is already initialized then continue.
if (initializedJobs.containsKey(job.getJobID())) {
continue;
}
/**
* Ensure we will not exceed queue limits
*/
if (!queue.initializeJobForQueue(job)) {
break;
}
/**
* Ensure we will not exceed user limits
*/
// Ensure we don't process a user's jobs out of order
if (usersOverLimit.contains(user)) {
continue;
}
// Check if the user is within limits
if (!queue.initializeJobForUser(job)) {
usersOverLimit.add(user); // Note down the user
continue;
}
// Ready to initialize!
// Double check to ensure that the job has not been killed!
if (job.getStatus().getRunState() == JobStatus.PREP) {
initializedJobs.put(job.getJobID(), job);
jobsToInitialize.add(job);
// Inform the queue
queue.addInitializingJob(job);
}
}
return jobsToInitialize;
}
/**
* Method which is used internally to clean up the initialized jobs
* data structure which the job initialization poller uses to check
* if a job is initalized or not.
*
* Algorithm for cleaning up task is as follows:
*
* <ul>
* <li> For jobs in <b>initalizedJobs</b> list </li>
* <ul>
* <li> If job is running</li>
* <ul>
* <li> If job is scheduled then remove the job from the waiting queue
* of the scheduler and <b>initalizedJobs</b>.<br/>
* The check for a job is scheduled or not is done by following
* formulae:<br/>
* if pending <i>task</i> < desired <i>task</i> then scheduled else
* not scheduled.<br/>
* The formulae would return <i>scheduled</i> if one task has run or failed,
* any cases in which there has been a failure but not enough to mark task
* as failed, we return <i>not scheduled</i> in formulae.
* </li>
* </ul>
*
* <li> If job is complete, then remove the job from <b>initalizedJobs</b>.
* </li>
*
* </ul>
* </ul>
*
*/
void cleanUpInitializedJobsList() {
Iterator<Entry<JobID, JobInProgress>> jobsIterator =
initializedJobs.entrySet().iterator();
while(jobsIterator.hasNext()) {
Entry<JobID,JobInProgress> entry = jobsIterator.next();
JobInProgress job = entry.getValue();
if (job.getStatus().getRunState() == JobStatus.RUNNING) {
if (isScheduled(job)) {
LOG.info("Removing scheduled jobs from waiting queue"
+ job.getJobID());
jobsIterator.remove();
continue;
}
}
if(job.isComplete()) {
LOG.info("Removing killed/completed job from initalized jobs " +
"list : "+ job.getJobID());
jobsIterator.remove();
}
}
}
/**
* Convenience method to check if job has been scheduled or not.
*
* The method may return false in case of job which has failure but
* has not failed the tip.
* @param job
* @return
*/
private boolean isScheduled(JobInProgress job) {
return ((job.pendingMaps() < job.desiredMaps())
|| (job.pendingReduces() < job.desiredReduces()));
}
void terminate() {
running = false;
for (Entry<String, JobInitializationThread> entry : threadsToQueueMap
.entrySet()) {
JobInitializationThread t = entry.getValue();
if (t.isAlive()) {
t.terminate();
t.interrupt();
}
}
}
/*
* Test method used only for testing purposes.
*/
JobInProgress getInitializingJob(String queue) {
JobInitializationThread t = threadsToQueueMap.get(queue);
if (t == null) {
return null;
} else {
return t.getInitializingJob();
}
}
Set<JobID> getInitializedJobList() {
return initializedJobs.keySet();
}
}