/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.Random;
import java.util.Collections;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.corona.InetAddress;
/**
* Sends actions to Corona Task Trackers.
* There are several threads used for sending the actions so that a single
* dead task tracker does not block all actions. To preserve the order of
* operations on a single task (like sending KillTaskAction after
* LaunchTaskAction), the actions are queued to the same queue.
* We do not need to preserve the order of task-level
* actions and the KillJobAction, since once we send KillJobAction, the job
* tracker will shutdown anyway. So any actions sent after that will fail.
*/
public class CoronaTaskLauncher {
/** Logger. */
private static final Log LOG = LogFactory.getLog(CoronaTaskLauncher.class);
/** The workers that send actions to task trackers. */
private final ActionSender[] workers;
/** The pool of worker threads that send actions to task trackers. */
private final Thread[] workerThreads;
/** The pool of list of ActionToSend based on task tracker. */
private final WorkQueues allWorkQueues;
/** The Corona Job Tracker. */
private final CoronaJobTracker coronaJT;
/** The expiry logic. */
private final ExpireTasks expireTasks;
/** Constructor.
* @param conf The configuration.
* @param coronaJT The Corona Job Tracker.
* @param expireTasks The expiry logic.
*/
CoronaTaskLauncher(
Configuration conf,
CoronaJobTracker coronaJT,
ExpireTasks expireTasks) {
this.coronaJT = coronaJT;
this.expireTasks = expireTasks;
int numLauncherThreads = conf.getInt(
"mapred.corona.jobtracker.numtasklauncherthreads", 4);
workers = new ActionSender[numLauncherThreads];
workerThreads = new Thread[numLauncherThreads];
allWorkQueues = new WorkQueues();
for (int i = 0; i < numLauncherThreads; i++) {
workers[i] = new ActionSender(i);
workerThreads[i] = new Thread(workers[i]);
workerThreads[i].setName("Task Launcher Thread #" + i);
workerThreads[i].setDaemon(true);
workerThreads[i].start();
}
}
/**
* Enqueue an action to kill the job.
* @param jobId The job identifier.
* @param allTrackers All trackers to send the kill to.
*/
@SuppressWarnings("deprecation")
public void killJob(JobID jobId, Map<String, InetAddress> allTrackers) {
for (Map.Entry<String, InetAddress> entry : allTrackers.entrySet()) {
String trackerName = entry.getKey();
InetAddress addr = entry.getValue();
String description = "KillJobAction " + jobId;
ActionToSend action = new ActionToSend(trackerName, addr,
new KillJobAction(jobId), description);
allWorkQueues.enqueueAction(action);
LOG.info("Queueing " + description + " to worker " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
}
}
/**
* Enqueue kill tasks actions.
* @param trackerName The name of the tracker to send the kill actions to.
* @param addr The address of the tracker to send the kill actions to.
* @param killActions The kill actions to send.
*/
public void killTasks(
String trackerName, InetAddress addr, List<KillTaskAction> killActions) {
for (KillTaskAction killAction : killActions) {
String description = "KillTaskAction " + killAction.getTaskID();
LOG.info("Queueing " + description + " to worker " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
allWorkQueues.enqueueAction(
new ActionToSend(trackerName, addr, killAction, description));
}
}
/**
* Enqueue a commit task action.
* @param trackerName The name of the tracker to send the commit action to.
* @param addr The address of the tracker to send the commit action to.
* @param action The commit action to send.
*/
public void commitTask(
String trackerName, InetAddress addr, CommitTaskAction action) {
String description = "KillTaskAction " + action.getTaskID();
LOG.info("Queueing " + description + " to worker " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
allWorkQueues.enqueueAction(new ActionToSend(
trackerName, addr, action, description));
}
/**
* Remove a launching task.
* @param attempt The task attempt ID.
* @return A boolean indicating if an enqueued action was removed.
*/
@SuppressWarnings("deprecation")
public boolean removeLaunchingTask(TaskAttemptID attempt) {
return allWorkQueues.removeLaunchingTask(attempt);
}
/**
* Enqueue a launch task action.
* @param task The task to launch.
* @param trackerName The name of the tracker to send the task to.
* @param addr The address of the tracker to send the task to.
*/
public void launchTask(Task task, String trackerName, InetAddress addr) {
CoronaSessionInfo info = new CoronaSessionInfo(
coronaJT.getSessionId(), coronaJT.getJobTrackerAddress(),
coronaJT.getSecondaryTrackerAddress());
LaunchTaskAction action = new LaunchTaskAction(task, info);
String description = "LaunchTaskAction " + action.getTask().getTaskID();
ActionToSend actionToSend =
new ActionToSend(trackerName, addr, action, description);
LOG.info("Queueing " + description + " to worker " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
allWorkQueues.enqueueAction(actionToSend);
}
/**
* Represents an action to send to a task tracker.
*/
private class ActionToSend {
/** The host of the tracker. */
private final String trackerHost;
/** The name of the tracker. */
private final String trackerName;
/** The port of the tracker. */
private final int port;
/** The action to send. */
private final TaskTrackerAction ttAction;
/** Description for logging. */
private final String description;
/** Action creation time */
private final long ctime = System.currentTimeMillis();
/** key is used in the WorkQueues */
private String key;
/** Constructor
* @param trackerName The name of the tracker.
* @param addr The address of the tracker.
* @param action The action to send.
*/
private ActionToSend(String trackerName, InetAddress addr,
TaskTrackerAction action, String description) {
this.trackerName = trackerName;
this.trackerHost = addr.host;
this.port = addr.port;
this.ttAction = action;
this.description = description;
this.key = this.trackerHost + ":" + this.port;
}
}
private class TrackerQueue {
boolean beingProcessed = false;
List<ActionToSend> actionQueue = new ArrayList<ActionToSend>();;
}
private class WorkQueues {
private final Map<String, TrackerQueue> trackerQueueMap =
new HashMap<String, TrackerQueue>();
private Random randomGenerator = new Random();
/**
* Remove a task pending launch.
* @param attempt The task attempt ID.
* @return A boolean indicating if a pending launch was removed.
*/
@SuppressWarnings("deprecation")
boolean removeLaunchingTask(TaskAttemptID attempt) {
synchronized (trackerQueueMap) {
Iterator<TrackerQueue> queueIter = trackerQueueMap.values().iterator();
while (queueIter.hasNext()) {
Iterator<ActionToSend> actIter= queueIter.next().actionQueue.iterator();
while ( actIter.hasNext()) {
ActionToSend action = (ActionToSend)actIter.next();
if (action.ttAction instanceof LaunchTaskAction &&
((LaunchTaskAction) action.ttAction).getTask().
getTaskID().equals(attempt)) {
actIter.remove();
return true;
}
}
}
}
return false;
}
/**
* Enqueue an action to this tracker.
* @param a The action.
*/
void enqueueAction(ActionToSend a) {
synchronized (trackerQueueMap) {
TrackerQueue existingQueue = trackerQueueMap.get(a.key);
if (existingQueue != null) {
existingQueue.actionQueue.add(a);
}
else {
// no existing work queue for the appInfo
TrackerQueue newQueue = new TrackerQueue();
newQueue.actionQueue.add(a);
trackerQueueMap.put(a.key, newQueue);
}
trackerQueueMap.notify();
}
}
/**
* get a list of AendAction to work on
* @param id The threadId id
* @param actions The action list
*/
void getQueue(int id, List<ActionToSend> actions) throws InterruptedException {
synchronized(trackerQueueMap){
if (trackerQueueMap.size() == 0) {
trackerQueueMap.wait();
}
Object[] tmpLists = trackerQueueMap.values().toArray();
// find the starting index for the thread to check.
// To make sure each tackTracker gets processed, each thread starts
// from the entry which maps to its threadid
int tmpIndex = randomGenerator.nextInt(tmpLists.length);
int checkedQueues = 0;
while (checkedQueues < tmpLists.length){
TrackerQueue tmpQueue = (TrackerQueue)tmpLists[tmpIndex];
if (tmpQueue.actionQueue.size() > 0 && tmpQueue.beingProcessed == false) {
actions.addAll(tmpQueue.actionQueue);
tmpQueue.actionQueue.clear();
tmpQueue.beingProcessed = true;
return;
}
tmpIndex = (tmpIndex +1) % tmpLists.length;
checkedQueues ++;
}
trackerQueueMap.wait();
}
return;
}
void resetQueueFlag(String key) {
synchronized (trackerQueueMap) {
TrackerQueue existingQueue = trackerQueueMap.get(key);
if (existingQueue != null) {
existingQueue.beingProcessed = false;
}
trackerQueueMap.notify();
}
}
}
/**
* A worker that sends actions to trackers. All actions for a task are hashed
* to a single worker.
*/
private class ActionSender implements Runnable {
/** The worker identifier. */
private final int id;
private String lastKey = null;
/** Constructor.
* @param id The identifier of the worker.
*/
public ActionSender(int id) {
this.id = id;
}
@Override
public void run() {
LOG.info("Starting TaskLauncher thread#" + id);
while (true) {
lastKey = null;
try {
launchTasks();
} catch (InterruptedException e) {
// Ignore, these are daemon threads.
if (LOG.isDebugEnabled()) {
LOG.debug("Got InterruptedException while launching a task", e);
}
} finally {
if (lastKey != null) {
allWorkQueues.resetQueueFlag(lastKey);
}
}
}
}
/**
* Sends a bunch of tasks at a time.
*
* @throws InterruptedException
*/
private void launchTasks() throws InterruptedException {
List<ActionToSend> actions = new ArrayList<ActionToSend>();
allWorkQueues.getQueue(id, actions);
if (actions.size() == 0) {
return;
}
long actionSendStart = System.currentTimeMillis();
String trackerName = actions.get(0).trackerName;
String host = actions.get(0).trackerHost;
int port = actions.get(0).port;
lastKey = actions.get(0).key;
if (coronaJT.getTrackerStats().isFaulty(trackerName)) {
for (ActionToSend actionToSend: actions) {
LOG.warn("Not sending " + actionToSend.description + " to " +
actionToSend.trackerHost + ":" + actionToSend.port +
" since previous communication failed");
coronaJT.processTaskLaunchError(actionToSend.ttAction);
}
return;
}
// Fill in the job tracker information.
CoronaSessionInfo info = new CoronaSessionInfo(
coronaJT.getSessionId(), coronaJT.getJobTrackerAddress(),
coronaJT.getSecondaryTrackerAddress());
for (ActionToSend actionToSend: actions) {
actionToSend.ttAction.setExtensible(info);
}
// Get the tracker address.
String trackerRpcAddress = host + ":" + port;
long setupTime = System.currentTimeMillis();
long expireTaskTime = 0, getClientTime = 0, submitActionTime = 0;
try {
// Start the timer on the task just before making the connection
// and RPC. If there are any errors after this point, we will reuse
// the error handling for expired launch tasks.
for (ActionToSend actionToSend: actions) {
if (actionToSend.ttAction instanceof LaunchTaskAction) {
LaunchTaskAction lta = (LaunchTaskAction) actionToSend.ttAction;
expireTasks.addNewTask(lta.getTask().getTaskID());
}
}
TaskTrackerAction[] actArr = new TaskTrackerAction[actions.size()];
int index = 0;
for (ActionToSend actionToSend: actions) {
assert(actionToSend.trackerHost.equals(host) && actionToSend.port == port);
actArr[index] = actionToSend.ttAction;
index++;
}
expireTaskTime = System.currentTimeMillis();
CoronaTaskTrackerProtocol client = coronaJT.getTaskTrackerClient(host, port);
getClientTime = System.currentTimeMillis();
client.submitActions(actArr);
submitActionTime = System.currentTimeMillis();
} catch (IOException e) {
for (ActionToSend actionToSend: actions) {
LOG.error("Could not send " + actionToSend.description +
" to " + trackerRpcAddress, e);
coronaJT.resetTaskTrackerClient(
actionToSend.trackerHost, actionToSend.port);
coronaJT.getTrackerStats().recordConnectionError(trackerName);
coronaJT.processTaskLaunchError(actionToSend.ttAction);
}
}
for (ActionToSend actionToSend: actions) {
// Time To Send
long TTS = System.currentTimeMillis() - actionToSend.ctime;
if (TTS > 500) {
LOG.info("Thread " + id + " processed " + actionToSend.description + " for " +
actionToSend.trackerName + " " + actionToSend.port + " " +
TTS + " msec after its creation. Times spent:" +
" setupTime = " + (setupTime - actionSendStart) +
" expireTaskTime = " + (expireTaskTime - actionSendStart) +
" getClientTime = " + (getClientTime - actionSendStart) +
" submitActionTime = " + (submitActionTime - actionSendStart));
}
}
}
} // Worker
}