/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapred.JobClient.RawSplit; import org.apache.hadoop.mapred.SortedRanges.Range; import org.apache.hadoop.mapred.TaskStatus.Phase; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.net.Node; import org.apache.hadoop.util.StringUtils; /************************************************************* * TaskInProgress maintains all the info needed for a * Task in the lifetime of its owning Job. A given Task * might be speculatively executed or reexecuted, so we * need a level of indirection above the running-id itself. * <br> * A given TaskInProgress contains multiple taskids, * 0 or more of which might be executing at any one time. * (That's what allows speculative execution.) A taskid * is now *never* recycled. A TIP allocates enough taskids * to account for all the speculation and failures it will * ever have to handle. Once those are up, the TIP is dead. * ************************************************************** */ class TaskInProgress { static final int MAX_TASK_EXECS = 1; int maxTaskAttempts = 4; long speculativeLag; double maxProgressRateForSpeculation; private boolean speculativeForced = false; private boolean useProcessingRateForSpeculation = false; private static final int NUM_ATTEMPTS_PER_RESTART = 1000; public static final Log LOG = LogFactory.getLog(TaskInProgress.class); // Defines the TIP private String jobFile = null; private RawSplit rawSplit; private int numMaps; private int partition; private TaskID id; private JobInProgressTraits job; private final int numSlotsRequired; // Status of the TIP private int successEventNumber = -1; private int numTaskFailures = 0; private int numKilledTasks = 0; private double progress = 0; private double progressRate; private Phase processingPhase; private ProcessingRates processingRates = new ProcessingRates(0, 0, 0, 0); private String state = ""; private long startTime = 0; private long lastDispatchTime = 0; // most recent time task given to TT private long execStartTime = 0; private long execFinishTime = 0; private int completes = 0; private boolean failed = false; private boolean killed = false; private long maxSkipRecords = 0; private FailedRanges failedRanges = new FailedRanges(); private volatile boolean skipping = false; private boolean jobCleanup = false; private boolean jobSetup = false; // The 'next' usable taskid of this tip int nextTaskId = 0; // The taskid that took this TIP to SUCCESS private TaskAttemptID successfulTaskId; // The first taskid of this tip private TaskAttemptID firstTaskId; // The taskid of speculative task private TaskAttemptID speculativeTaskId; // Map from task Id -> TaskTracker Id, contains tasks that are // currently runnings private TreeMap<TaskAttemptID, String> activeTasks = new TreeMap<TaskAttemptID, String>(); // All attempt Ids of this TIP private TreeSet<TaskAttemptID> tasks = new TreeSet<TaskAttemptID>(); private JobConf conf; private Map<TaskAttemptID,List<String>> taskDiagnosticData = new TreeMap<TaskAttemptID,List<String>>(); /** * Map from taskId -> TaskStatus */ TreeMap<TaskAttemptID,TaskStatus> taskStatuses = new TreeMap<TaskAttemptID,TaskStatus>(); // Map from taskId -> TaskTracker Id, // contains cleanup attempts and where they ran, if any private TreeMap<TaskAttemptID, String> cleanupTasks = new TreeMap<TaskAttemptID, String>(); private TreeSet<String> machinesWhereFailed = new TreeSet<String>(); private TreeSet<TaskAttemptID> tasksReportedClosed = new TreeSet<TaskAttemptID>(); //list of tasks to kill, <taskid> -> <shouldFail> private TreeMap<TaskAttemptID, Boolean> tasksToKill = new TreeMap<TaskAttemptID, Boolean>(); //task to commit, <taskattemptid> private TaskAttemptID taskToCommit; private volatile Counters counters = new Counters(); private HashMap<TaskAttemptID, Long> dispatchTimeMap = new HashMap<TaskAttemptID, Long>(); // Whether to use the input record processing rate when speculating maps // based on the processing rate. Has no effect if speculating based on the // progress rate. public static final String USE_MAP_RECORDS_PROCESSING_RATE = "mapreduce.job.speculative.use.map.record.rate"; /** * Private helper class to pass around / store processing rates more easily */ private final static class ProcessingRates { private double mapRate = 0; private double copyRate = 0; private double sortRate = 0; private double reduceRate = 0; public ProcessingRates(double mapRate, double copyRate, double sortRate, double reduceRate) { this.mapRate = mapRate; this.copyRate = copyRate; this.sortRate = sortRate; this.reduceRate = reduceRate; } public ProcessingRates(ProcessingRates p) { this.mapRate = p.mapRate; this.copyRate = p.copyRate; this.sortRate = p.sortRate; this.reduceRate = p.reduceRate; } public double getRate(Phase p) { if (p == Phase.MAP) { return this.mapRate; } else if (p == Phase.SHUFFLE) { return this.copyRate; } else if (p == Phase.SORT) { return this.sortRate; } else if (p == Phase.REDUCE) { return this.reduceRate; } else { throw new RuntimeException("Invalid phase " + p); } } } /** * Constructor for MapTask */ public TaskInProgress(JobID jobid, String jobFile, RawSplit rawSplit, JobConf conf, JobInProgressTraits job, int partition, int numSlotsRequired) { this.jobFile = jobFile; this.rawSplit = rawSplit; this.job = job; this.conf = conf; this.partition = partition; this.maxSkipRecords = SkipBadRecords.getMapperMaxSkipRecords(conf); this.numSlotsRequired = numSlotsRequired; setMaxTaskAttempts(); init(jobid); } /** * Constructor for ReduceTask */ public TaskInProgress(JobID jobid, String jobFile, int numMaps, int partition, JobConf conf, JobInProgressTraits job, int numSlotsRequired) { this.jobFile = jobFile; this.numMaps = numMaps; this.partition = partition; this.job = job; this.conf = conf; this.maxSkipRecords = SkipBadRecords.getReducerMaxSkipGroups(conf); this.numSlotsRequired = numSlotsRequired; setMaxTaskAttempts(); init(jobid); } /** * Set the max number of attempts before we declare a TIP as "failed" */ private void setMaxTaskAttempts() { if (isMapTask()) { this.maxTaskAttempts = conf.getMaxMapAttempts(); } else { this.maxTaskAttempts = conf.getMaxReduceAttempts(); } } /** * Return the index of the tip within the job, so * "task_200707121733_1313_0002_m_012345" would return 12345; * @return int the tip index */ public int idWithinJob() { return partition; } public boolean isJobCleanupTask() { return jobCleanup; } public void setJobCleanupTask() { jobCleanup = true; } public boolean isJobSetupTask() { return jobSetup; } public void setJobSetupTask() { jobSetup = true; } public boolean isOnlyCommitPending() { for (TaskStatus t : taskStatuses.values()) { if (t.getRunState() == TaskStatus.State.COMMIT_PENDING) { return true; } } return false; } public boolean isCommitPending(TaskAttemptID taskId) { TaskStatus t = taskStatuses.get(taskId); if (t == null) { return false; } return t.getRunState() == TaskStatus.State.COMMIT_PENDING; } /** * @return true if using processing rate to determine whether the task should * be speculated */ public boolean isUsingProcessingRateForSpeculation() { return useProcessingRateForSpeculation; } /** * Initialization common to Map and Reduce */ void init(JobID jobId) { this.startTime = JobTracker.getClock().getTime(); this.id = new TaskID(jobId, isMapTask(), partition); this.skipping = startSkipping(); long speculativeDuration; if (isMapTask()) { this.speculativeLag = conf.getMapSpeculativeLag(); speculativeDuration = conf.getMapSpeculativeDuration(); } else { this.speculativeLag = conf.getReduceSpeculativeLag(); speculativeDuration = conf.getReduceSpeculativeDuration(); } // speculate only if 1/(1000 * progress_rate) > speculativeDuration // ie. : // speculate only if progress_rate < 1/(1000 * speculativeDuration) if (speculativeDuration > 0) { this.maxProgressRateForSpeculation = 1.0/(1000.0*speculativeDuration); } else { // disable this check for durations <= 0 this.maxProgressRateForSpeculation = -1.0; } this.useProcessingRateForSpeculation = conf.getBoolean("mapreduce.job.speculative.using.processing.rate", false); } //////////////////////////////////// // Accessors, info, profiles, etc. //////////////////////////////////// /** * Return the dispatch time */ public long getDispatchTime(TaskAttemptID taskid){ Long l = dispatchTimeMap.get(taskid); if (l != null) { return l.longValue(); } return 0; } public long getLastDispatchTime(){ return this.lastDispatchTime; } /** * Set the dispatch time */ public void setDispatchTime(TaskAttemptID taskid, long disTime){ dispatchTimeMap.put(taskid, disTime); this.lastDispatchTime = disTime; } /** * Return the start time */ public long getStartTime() { return startTime; } /** * Return the exec start time */ public long getExecStartTime() { return execStartTime; } /** * Set the exec start time */ public void setExecStartTime(long startTime) { execStartTime = startTime; } /** * Return the exec finish time */ public long getExecFinishTime() { return execFinishTime; } /** * Set the exec finish time */ public void setExecFinishTime(long finishTime) { execFinishTime = finishTime; JobHistory.Task.logUpdates(id, execFinishTime); // log the update } /** * Return the parent job */ public JobInProgressTraits getJob() { return job; } /** * Return an ID for this task, not its component taskid-threads */ public TaskID getTIPId() { return this.id; } /** * Whether this is a map task */ public boolean isMapTask() { return rawSplit != null; } /** * Returns the type of the {@link TaskAttemptID} passed. * The type of an attempt is determined by the nature of the task and not its * id. * For example, * - Attempt 'attempt_123_01_m_01_0' might be a job-setup task even though it * has a _m_ in its id. Hence the task type of this attempt is JOB_SETUP * instead of MAP. * - Similarly reduce attempt 'attempt_123_01_r_01_0' might have failed and is * now supposed to do the task-level cleanup. In such a case this attempt * will be of type TASK_CLEANUP instead of REDUCE. */ TaskType getAttemptType (TaskAttemptID id) { if (isCleanupAttempt(id)) { return TaskType.TASK_CLEANUP; } else if (isJobSetupTask()) { return TaskType.JOB_SETUP; } else if (isJobCleanupTask()) { return TaskType.JOB_CLEANUP; } else if (isMapTask()) { return TaskType.MAP; } else { return TaskType.REDUCE; } } /** * Is the Task associated with taskid is the first attempt of the tip? * @param taskId * @return Returns true if the Task is the first attempt of the tip */ public boolean isFirstAttempt(TaskAttemptID taskId) { return firstTaskId == null ? false : firstTaskId.equals(taskId); } /** * Is the Task associated with taskid is the speculative attempt of the tip? * @param taskId * @return Returns true if the Task is the speculative attempt of the tip */ public boolean isSpeculativeAttempt(TaskAttemptID taskId) { return speculativeTaskId == null ? false : speculativeTaskId.equals(taskId); } /** * Is this tip currently running any tasks? * @return true if any tasks are running */ public boolean isRunning() { return !activeTasks.isEmpty(); } /** * Is this attempt currently running ? * @param taskId task attempt id. * @return true if attempt taskId is running */ boolean isAttemptRunning(TaskAttemptID taskId) { return activeTasks.containsKey(taskId); } TaskAttemptID getSuccessfulTaskid() { return successfulTaskId; } private void setSuccessfulTaskid(TaskAttemptID successfulTaskId) { this.successfulTaskId = successfulTaskId; } private void resetSuccessfulTaskid() { this.successfulTaskId = null; } /** * Is this tip complete? * * @return <code>true</code> if the tip is complete, else <code>false</code> */ public synchronized boolean isComplete() { return (completes > 0); } /** * Is the given taskid the one that took this tip to completion? * * @param taskid taskid of attempt to check for completion * @return <code>true</code> if taskid is complete, else <code>false</code> */ public boolean isComplete(TaskAttemptID taskid) { return ((completes > 0) && taskid.equals(getSuccessfulTaskid())); } /** * Is the tip a failure? * * @return <code>true</code> if tip has failed, else <code>false</code> */ public boolean isFailed() { return failed; } /** * Number of times the TaskInProgress has failed. */ public int numTaskFailures() { return numTaskFailures; } /** * Number of times the TaskInProgress has been killed by the framework. */ public int numKilledTasks() { return numKilledTasks; } /** * Get the overall progress (from 0 to 1.0) for this TIP */ public double getProgress() { return progress; } /** * Get the last known progress rate for this task */ public double getProgressRate() { return progressRate; } /** * Get the processing rate for this task (e.g. bytes/ms in reduce) */ public double getProcessingRate(TaskStatus.Phase phase) { // we don't have processing rate information for the starting and cleaning // up phase if (phase != TaskStatus.Phase.MAP && phase != TaskStatus.Phase.SHUFFLE && phase != TaskStatus.Phase.SORT && phase != TaskStatus.Phase.REDUCE) { return 0; } return processingRates.getRate(getProcessingPhase()); } /** * Get the phase of processing */ public Phase getProcessingPhase() { return processingPhase; } /** * Get the task's counters */ public Counters getCounters() { return counters; } /** * Returns whether a component task-thread should be * closed because the containing JobInProgress has completed * or the task is killed by the user */ public boolean shouldClose(TaskAttemptID taskid) { /** * If the task hasn't been closed yet, and it belongs to a completed * TaskInProgress close it. * * However, for completed map tasks we do not close the task which * actually was the one responsible for _completing_ the TaskInProgress. */ if (tasksReportedClosed.contains(taskid)) { if (tasksToKill.keySet().contains(taskid)) return true; else return false; } boolean close = false; TaskStatus ts = taskStatuses.get(taskid); if ((ts != null) && ((this.failed) || ((job.getStatus().getRunState() != JobStatus.RUNNING && (job.getStatus().getRunState() != JobStatus.PREP))))) { tasksReportedClosed.add(taskid); close = true; } else if ((completes > 0) && // isComplete() is synchronized! !(isMapTask() && !jobSetup && !jobCleanup && isComplete(taskid))) { tasksReportedClosed.add(taskid); close = true; } else if (isCommitPending(taskid) && !shouldCommit(taskid)) { tasksReportedClosed.add(taskid); close = true; } else { close = tasksToKill.keySet().contains(taskid); } return close; } /** * Commit this task attempt for the tip. * @param taskid */ public void doCommit(TaskAttemptID taskid) { taskToCommit = taskid; } /** * Returns whether the task attempt should be committed or not */ public boolean shouldCommit(TaskAttemptID taskid) { return !isComplete() && isCommitPending(taskid) && taskToCommit.equals(taskid); } /** * Creates a "status report" for this task. Includes the * task ID and overall status, plus reports for all the * component task-threads that have ever been started. */ synchronized TaskReport generateSingleReport() { ArrayList<String> diagnostics = new ArrayList<String>(); for (List<String> l : taskDiagnosticData.values()) { diagnostics.addAll(l); } TIPStatus currentStatus = null; if (isRunning() && !isComplete()) { currentStatus = TIPStatus.RUNNING; } else if (isComplete()) { currentStatus = TIPStatus.COMPLETE; } else if (wasKilled()) { currentStatus = TIPStatus.KILLED; } else if (isFailed()) { currentStatus = TIPStatus.FAILED; } else if (!(isComplete() || isRunning() || wasKilled())) { currentStatus = TIPStatus.PENDING; } TaskReport report = new TaskReport (getTIPId(), (float)progress, state, diagnostics.toArray(new String[diagnostics.size()]), currentStatus, execStartTime, execFinishTime, counters); if (currentStatus == TIPStatus.RUNNING) { report.setRunningTaskAttempts(activeTasks.keySet()); } else if (currentStatus == TIPStatus.COMPLETE) { report.setSuccessfulAttempt(getSuccessfulTaskid()); } return report; } /** * Get the diagnostic messages for a given task within this tip. * * @param taskId the id of the required task * @return the list of diagnostics for that task */ synchronized List<String> getDiagnosticInfo(TaskAttemptID taskId) { return taskDiagnosticData.get(taskId); } //////////////////////////////////////////////// // Update methods, usually invoked by the owning // job. //////////////////////////////////////////////// /** * Save diagnostic information for a given task. * * @param taskId id of the task * @param diagInfo diagnostic information for the task */ public void addDiagnosticInfo(TaskAttemptID taskId, String diagInfo) { List<String> diagHistory = taskDiagnosticData.get(taskId); if (diagHistory == null) { diagHistory = new ArrayList<String>(); taskDiagnosticData.put(taskId, diagHistory); } diagHistory.add(diagInfo); } /** * A status message from a client has arrived. * It updates the status of a single component-thread-task, * which might result in an overall TaskInProgress status update. * @return has the task changed its state noticeably? */ synchronized boolean updateStatus(TaskStatus status) { TaskAttemptID taskid = status.getTaskID(); String taskTracker = status.getTaskTracker(); String diagInfo = status.getDiagnosticInfo(); TaskStatus oldStatus = taskStatuses.get(taskid); boolean changed = true; if (diagInfo != null && diagInfo.length() > 0) { long runTime = status.getRunTime(); LOG.info("Error from " + taskid + " on " + taskTracker + " runTime(msec) " + runTime + ": " + diagInfo); addDiagnosticInfo(taskid, diagInfo); } if(skipping) { failedRanges.updateState(status); } if (oldStatus != null) { TaskStatus.State oldState = oldStatus.getRunState(); TaskStatus.State newState = status.getRunState(); // We should never recieve a duplicate success/failure/killed // status update for the same taskid! This is a safety check, // and is addressed better at the TaskTracker to ensure this. // @see {@link TaskTracker.transmitHeartbeat()} if ((newState != TaskStatus.State.RUNNING && newState != TaskStatus.State.COMMIT_PENDING && newState != TaskStatus.State.FAILED_UNCLEAN && newState != TaskStatus.State.KILLED_UNCLEAN && newState != TaskStatus.State.UNASSIGNED) && (oldState == newState)) { LOG.warn("Recieved duplicate status update of '" + newState + "' for '" + taskid + "' of TIP '" + getTIPId() + "'" + "oldTT=" + oldStatus.getTaskTracker() + " while newTT=" + status.getTaskTracker()); return false; } // The task is not allowed to move from completed back to running. // We have seen out of order status messagesmoving tasks from complete // to running. This is a spot fix, but it should be addressed more // globally. if ((newState == TaskStatus.State.RUNNING || newState == TaskStatus.State.UNASSIGNED) && (oldState == TaskStatus.State.FAILED || oldState == TaskStatus.State.KILLED || oldState == TaskStatus.State.FAILED_UNCLEAN || oldState == TaskStatus.State.KILLED_UNCLEAN || oldState == TaskStatus.State.SUCCEEDED || oldState == TaskStatus.State.COMMIT_PENDING)) { return false; } //Do not accept any status once the task is marked FAILED/KILLED //This is to handle the case of the JobTracker timing out a task //due to launch delay, but the TT comes back with any state or //TT got expired if (oldState == TaskStatus.State.FAILED || oldState == TaskStatus.State.KILLED) { tasksToKill.put(taskid, true); return false; } changed = oldState != newState; } // if task is a cleanup attempt, do not replace the complete status, // update only specific fields. // For example, startTime should not be updated, // but finishTime has to be updated. if (!isCleanupAttempt(taskid)) { taskStatuses.put(taskid, status); //we don't want to include setup tasks in the task execution stats if (!isJobSetupTask() && !isJobCleanupTask() && ((isMapTask() && job.hasSpeculativeMaps()) || (!isMapTask() && job.hasSpeculativeReduces()))) { processingPhase = status.getPhase(); updateProgressRate(JobTracker.getClock().getTime()); if (useProcessingRateForSpeculation) { updateProcessingRate(JobTracker.getClock().getTime()); } } } else { taskStatuses.get(taskid).statusUpdate(status.getRunState(), status.getProgress(), status.getStateString(), status.getPhase(), status.getFinishTime()); } // Recompute progress recomputeProgress(); return changed; } /** * Indicate that one of the taskids in this TaskInProgress * has failed. */ public void incompleteSubTask(TaskAttemptID taskid, JobStatus jobStatus) { // // Note the failure and its location // TaskStatus status = taskStatuses.get(taskid); String trackerName; String trackerHostName = null; TaskStatus.State taskState = TaskStatus.State.FAILED; if (status != null) { trackerName = status.getTaskTracker(); trackerHostName = JobInProgressTraits.convertTrackerNameToHostName(trackerName); // Check if the user manually KILLED/FAILED this task-attempt... Boolean shouldFail = tasksToKill.remove(taskid); if (shouldFail != null) { if (status.getRunState() == TaskStatus.State.FAILED || status.getRunState() == TaskStatus.State.KILLED) { taskState = (shouldFail) ? TaskStatus.State.FAILED : TaskStatus.State.KILLED; } else { taskState = (shouldFail) ? TaskStatus.State.FAILED_UNCLEAN : TaskStatus.State.KILLED_UNCLEAN; } status.setRunState(taskState); addDiagnosticInfo(taskid, "Task has been " + taskState + " by the user" ); } taskState = status.getRunState(); if (taskState != TaskStatus.State.FAILED && taskState != TaskStatus.State.KILLED && taskState != TaskStatus.State.FAILED_UNCLEAN && taskState != TaskStatus.State.KILLED_UNCLEAN) { LOG.info("Task '" + taskid + "' running on '" + trackerName + "' in state: '" + taskState + "' being failed!"); status.setRunState(TaskStatus.State.FAILED); taskState = TaskStatus.State.FAILED; } // tasktracker went down and failed time was not reported. if (0 == status.getFinishTime()){ status.setFinishTime(JobTracker.getClock().getTime()); } } this.activeTasks.remove(taskid); // Since we do not fail completed reduces (whose outputs go to hdfs), we // should note this failure only for completed maps, only if this taskid; // completed this map. however if the job is done, there is no need to // manipulate completed maps if (this.isMapTask() && !jobSetup && !jobCleanup && isComplete(taskid) && jobStatus.getRunState() != JobStatus.SUCCEEDED) { this.completes--; // Reset the successfulTaskId since we don't have a SUCCESSFUL task now resetSuccessfulTaskid(); } // Note that there can be failures of tasks that are hosted on a machine // that has not yet registered with restarted jobtracker // recalculate the counts only if its a genuine failure if (tasks.contains(taskid)) { if (taskState == TaskStatus.State.FAILED) { numTaskFailures++; machinesWhereFailed.add(trackerHostName); if(maxSkipRecords>0) { //skipping feature enabled LOG.debug("TaskInProgress adding" + status.getNextRecordRange()); failedRanges.add(status.getNextRecordRange()); skipping = startSkipping(); } } else if (taskState == TaskStatus.State.KILLED) { numKilledTasks++; } } if (numTaskFailures >= maxTaskAttempts) { LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times."); kill(); } } /** * Get whether to start skipping mode. */ private boolean startSkipping() { if(maxSkipRecords>0 && numTaskFailures>=SkipBadRecords.getAttemptsToStartSkipping(conf)) { return true; } return false; } /** * Finalize the <b>completed</b> task; note that this might not be the first * task-attempt of the {@link TaskInProgress} and hence might be declared * {@link TaskStatus.State.SUCCEEDED} or {@link TaskStatus.State.KILLED} * * @param taskId id of the completed task-attempt * @param finalTaskState final {@link TaskStatus.State} of the task-attempt */ private void completedTask(TaskAttemptID taskId, TaskStatus.State finalTaskState) { TaskStatus status = taskStatuses.get(taskId); status.setRunState(finalTaskState); activeTasks.remove(taskId); } /** * Indicate that one of the taskids in this already-completed * TaskInProgress has successfully completed; hence we mark this * taskid as {@link TaskStatus.State.KILLED}. */ void alreadyCompletedTask(TaskAttemptID taskid) { // 'KILL' the task completedTask(taskid, TaskStatus.State.KILLED); // Note the reason for the task being 'KILLED' addDiagnosticInfo(taskid, "Already completed TIP"); LOG.info("Already complete TIP " + getTIPId() + " has completed task " + taskid); } /** * Indicate that one of the taskids in this TaskInProgress * has successfully completed! */ public void completed(TaskAttemptID taskid) { // // Record that this taskid is complete // completedTask(taskid, TaskStatus.State.SUCCEEDED); // Note the successful taskid setSuccessfulTaskid(taskid); // // Now that the TIP is complete, the other speculative // subtasks will be closed when the owning tasktracker // reports in and calls shouldClose() on this object. // this.completes++; this.execFinishTime = JobTracker.getClock().getTime(); recomputeProgress(); } /** * Get the split locations */ public String[] getSplitLocations() { if (isMapTask() && !jobSetup && !jobCleanup) { return rawSplit.getLocations(); } return new String[0]; } /** * Get the Status of the tasks managed by this TIP */ public TaskStatus[] getTaskStatuses() { return taskStatuses.values().toArray(new TaskStatus[taskStatuses.size()]); } /** * Get all the {@link TaskAttemptID}s in this {@link TaskInProgress} */ TaskAttemptID[] getAllTaskAttemptIDs() { return tasks.toArray(new TaskAttemptID[tasks.size()]); } /** * Get the status of the specified task * @param taskid * @return */ public TaskStatus getTaskStatus(TaskAttemptID taskid) { return taskStatuses.get(taskid); } /** * The TIP's been ordered kill()ed. */ public void kill() { if (isComplete() || failed) { return; } this.failed = true; killed = true; this.execFinishTime = JobTracker.getClock().getTime(); recomputeProgress(); } /** * Was the task killed? * @return true if the task killed */ public boolean wasKilled() { return killed; } /** * Kill the given task */ boolean killTask(TaskAttemptID taskId, boolean shouldFail, String diagnosticInfo) { TaskStatus st = taskStatuses.get(taskId); if(st != null && (st.getRunState() == TaskStatus.State.RUNNING || st.getRunState() == TaskStatus.State.COMMIT_PENDING || st.inTaskCleanupPhase() || st.getRunState() == TaskStatus.State.UNASSIGNED) && tasksToKill.put(taskId, shouldFail) == null ) { addDiagnosticInfo(taskId, diagnosticInfo); LOG.info(diagnosticInfo); return true; } return false; } /** * This method is called whenever there's a status change * for one of the TIP's sub-tasks. It recomputes the overall * progress for the TIP. We examine all sub-tasks and find * the one that's most advanced (and non-failed). */ void recomputeProgress() { if (isComplete()) { this.progress = 1; // update the counters and the state TaskStatus completedStatus = taskStatuses.get(getSuccessfulTaskid()); this.counters = completedStatus.getCounters(); this.state = completedStatus.getStateString(); } else if (failed) { this.progress = 0; // reset the counters and the state this.state = ""; this.counters = new Counters(); } else { double bestProgress = 0; String bestState = ""; Counters bestCounters = new Counters(); for (Iterator<TaskAttemptID> it = taskStatuses.keySet().iterator(); it.hasNext();) { TaskAttemptID taskid = it.next(); TaskStatus status = taskStatuses.get(taskid); if (status.getRunState() == TaskStatus.State.SUCCEEDED) { bestProgress = 1; bestState = status.getStateString(); bestCounters = status.getCounters(); break; } else if (status.getRunState() == TaskStatus.State.COMMIT_PENDING) { //for COMMIT_PENDING, we take the last state that we recorded //when the task was RUNNING bestProgress = this.progress; bestState = this.state; bestCounters = this.counters; } else if (status.getRunState() == TaskStatus.State.RUNNING) { if (status.getProgress() >= bestProgress) { bestProgress = status.getProgress(); bestState = status.getStateString(); if (status.getIncludeCounters()) { bestCounters = status.getCounters(); } else { bestCounters = this.counters; } } } } this.progress = bestProgress; this.state = bestState; this.counters = bestCounters; } } ///////////////////////////////////////////////// // "Action" methods that actually require the TIP // to do something. ///////////////////////////////////////////////// /** * Return whether this TIP still needs to run */ boolean isRunnable() { return !failed && (completes == 0); } /** * Can this task be speculated? This requires that it isn't done or almost * done and that it isn't already being speculatively executed. * * Added for use by queue scheduling algorithms. * @param currentTime */ boolean canBeSpeculated(long currentTime) { if (skipping || !isRunnable() || !isRunning() || completes != 0 || isOnlyCommitPending() || activeTasks.size() > MAX_TASK_EXECS) { if (isMapTask() ? job.shouldLogCannotspeculativeMaps() : job.shouldLogCannotspeculativeReduces()) { LOG.info("Task "+ getTIPId() + " cannot be speculated because of " + "skipping = "+ skipping + " isRunnable() = "+ isRunnable() + " isRunning() = "+ isRunning() + " completes = " + completes + " isOnlyCommitPending() = "+ isOnlyCommitPending() + " activetask-size = "+ activeTasks.size() + " MAX_TASK_EXECS = " + MAX_TASK_EXECS); } return false; } if (isSpeculativeForced()) { return true; } // no speculation for first few seconds if (currentTime - lastDispatchTime < speculativeLag) { if (isMapTask() ? job.shouldLogCannotspeculativeMaps() : job.shouldLogCannotspeculativeReduces()) { LOG.info("Task "+ getTIPId() + " cannot be speculated because of " + "no speculation for first few seconds"); } return false; } // if the task is making progress fast enough to complete within // the acceptable duration allowed for each task - do not speculate if ((maxProgressRateForSpeculation > 0) && (progressRate > maxProgressRateForSpeculation)) { if (isMapTask() ? job.shouldLogCannotspeculativeMaps() : job.shouldLogCannotspeculativeReduces()) { LOG.info("Task "+ getTIPId() +" cannot be speculated because " + "the task progress rate is fast enough to complete." + " maxProgressRateForSpeculation = " + maxProgressRateForSpeculation + " and progressRate = " + progressRate); } return false; } if (isMapTask() ? job.shouldSpeculateAllRemainingMaps() : job.shouldSpeculateAllRemainingReduces()) { if (LOG.isDebugEnabled()) { LOG.debug("Speculate " + getTIPId() + " because the job is almost finished"); } return true; } if (useProcessingRateForSpeculation) { return canBeSpeculatedUsingProcessingRate(currentTime); } else { return canBeSpeculatedUsingProgressRate(currentTime); } } boolean canBeSpeculatedUsingProgressRate(long currentTime) { DataStatistics taskStats = job.getRunningTaskStatistics(isMapTask()); if (LOG.isDebugEnabled()) { LOG.debug("activeTasks.size(): " + activeTasks.size() + " " + activeTasks.firstKey() + " task's progressrate: " + progressRate + " taskStats : " + taskStats); } // Find if task should be speculated based on standard deviation // the max difference allowed between the tasks's progress rate // and the mean progress rate of sibling tasks. double maxDiff = (taskStats.std() == 0 ? taskStats.mean()/3 : job.getSlowTaskThreshold() * taskStats.std()); // if stddev > mean - we are stuck. cap the max difference at a // more meaningful number. maxDiff = Math.min(maxDiff, taskStats.mean() * job.getStddevMeanRatioMax()); boolean canBeSpeculated = (taskStats.mean() - progressRate > maxDiff); if (canBeSpeculated) { LOG.info("Task " + getTIPId() + " can be speculated with progressRate = " + progressRate + " and taskStats = " + taskStats); }else{ if (isMapTask() ? job.shouldLogCannotspeculativeMaps() : job.shouldLogCannotspeculativeReduces()) { LOG.info("Task "+ getTIPId() + " cannot be speculated with progressRate = " + progressRate + " and taskStats = " + taskStats + " and maxDiff = "+ maxDiff); } } return canBeSpeculated; } /** * For the map task, using the bytes processed/sec as the processing rate * For the reduce task, using different rate for different phase: * copy: using the bytes copied/sec as the processing rate * sort: using the accumulated progress rate as the processing rate * reduce: using the the bytes processed/sec as the processing rate * @param currentTime * @return */ boolean canBeSpeculatedUsingProcessingRate(long currentTime) { TaskStatus.Phase p = getProcessingPhase(); // check if the task is on one of following four phases if ((p != TaskStatus.Phase.MAP) && (p != TaskStatus.Phase.SHUFFLE) && (p != TaskStatus.Phase.SORT) && (p != TaskStatus.Phase.REDUCE)) { return false; } DataStatistics taskStats = job.getRunningTaskStatistics(p); if (LOG.isDebugEnabled()) { LOG.debug("TaskID: " + this.id + "processing phase is " + p + " and processing rate for this phase is " + getProcessingRate(p)); } // Find if task should be speculated based on standard deviation // the max difference allowed between the tasks's progress rate // and the mean progress rate of sibling tasks. double maxDiff = (taskStats.std() == 0 ? taskStats.mean()/3 : job.getSlowTaskThreshold() * taskStats.std()); // if stddev > mean - we are stuck. cap the max difference at a // more meaningful number. maxDiff = Math.min(maxDiff, taskStats.mean() * job.getStddevMeanRatioMax()); return (taskStats.mean() - processingRates.getRate(p) > maxDiff); } /** * Return a Task that can be sent to a TaskTracker for execution. */ public Task getTaskToRun(String taskTracker) { // Create the 'taskid'; do not count the 'killed' tasks against the job! TaskAttemptID taskid = null; if (nextTaskId < (MAX_TASK_EXECS + maxTaskAttempts + numKilledTasks)) { // Make sure that the attempts are unqiue across restarts int attemptId = job.getNumRestarts() * NUM_ATTEMPTS_PER_RESTART + nextTaskId; taskid = new TaskAttemptID( id, attemptId); ++nextTaskId; } else { LOG.warn("Exceeded limit of " + (MAX_TASK_EXECS + maxTaskAttempts) + " (plus " + numKilledTasks + " killed)" + " attempts for the tip '" + getTIPId() + "'"); return null; } //keep track of the last time we started an attempt at this TIP //used to calculate the progress rate of this TIP setDispatchTime(taskid, JobTracker.getClock().getTime()); if (0 == execStartTime){ // assume task starts running now execStartTime = JobTracker.getClock().getTime(); } return addRunningTask(taskid, taskTracker); } public Task addRunningTask(TaskAttemptID taskid, String taskTracker) { return addRunningTask(taskid, taskTracker, false); } /** * Adds a previously running task to this tip. This is used in case of * jobtracker restarts. */ public Task addRunningTask(TaskAttemptID taskid, String taskTracker, boolean taskCleanup) { // 1 slot is enough for taskCleanup task int numSlotsNeeded = taskCleanup ? 1 : numSlotsRequired; // create the task Task t = null; if (isMapTask()) { LOG.debug("attempt " + numTaskFailures + " sending skippedRecords " + failedRanges.getIndicesCount()); String splitClass = null; BytesWritable split; if (!jobSetup && !jobCleanup) { splitClass = rawSplit.getClassName(); split = rawSplit.getBytes(); } else { split = new BytesWritable(); } t = new MapTask(jobFile, taskid, partition, splitClass, split, numSlotsNeeded, job.getUser()); } else { t = new ReduceTask(jobFile, taskid, partition, numMaps, numSlotsNeeded, job.getUser()); } if (jobCleanup) { t.setJobCleanupTask(); } if (jobSetup) { t.setJobSetupTask(); } if (taskCleanup) { t.setTaskCleanupTask(); t.setState(taskStatuses.get(taskid).getRunState()); cleanupTasks.put(taskid, taskTracker); } t.setConf(conf); LOG.debug("Launching task with skipRanges:"+failedRanges.getSkipRanges()); t.setSkipRanges(failedRanges.getSkipRanges()); t.setSkipping(skipping); if(failedRanges.isTestAttempt()) { t.setWriteSkipRecs(false); } if (activeTasks.size() >= 1) { speculativeTaskId = taskid; } else { speculativeTaskId = null; } activeTasks.put(taskid, taskTracker); tasks.add(taskid); // Ask JobTracker to note that the task exists // jobtracker.createTaskEntry(taskid, taskTracker, this); /* // code to find call paths to createTaskEntry StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); boolean found = false; for (StackTraceElement s: stackTraceElements) { if (s.getMethodName().indexOf("heartbeat") != -1 || s.getMethodName().indexOf("findTask") != -1 || s.getMethodName().indexOf("createAndAddAttempt") != -1 || s.getMethodName().indexOf("processTaskAttempt") != -1) { found = true; break; } } if (!found) { RuntimeException e = new RuntimeException ("calling addRunningTask from outside heartbeat"); LOG.info(StringUtils.stringifyException(e)); throw (e); } */ // check and set the first attempt if (firstTaskId == null) { firstTaskId = taskid; } return t; } boolean isRunningTask(TaskAttemptID taskid) { TaskStatus status = taskStatuses.get(taskid); return status != null && status.getRunState() == TaskStatus.State.RUNNING; } boolean isCleanupAttempt(TaskAttemptID taskid) { return cleanupTasks.containsKey(taskid); } String machineWhereCleanupRan(TaskAttemptID taskid) { return cleanupTasks.get(taskid); } String machineWhereTaskRan(TaskAttemptID taskid) { return taskStatuses.get(taskid).getTaskTracker(); } boolean wasKilled(TaskAttemptID taskid) { return tasksToKill.containsKey(taskid); } /** * Has this task already failed on this machine? * @param trackerHost The task tracker hostname * @return Has it failed? */ public boolean hasFailedOnMachine(String trackerHost) { return machinesWhereFailed.contains(trackerHost); } /** * Was this task ever scheduled to run on this machine? * @param trackerHost The task tracker hostname * @param trackerName The tracker name * @return Was task scheduled on the tracker? */ public boolean hasRunOnMachine(String trackerHost, String trackerName) { return this.activeTasks.values().contains(trackerName) || hasFailedOnMachine(trackerHost); } /** * Get the number of machines where this task has failed. * @return the size of the failed machine set */ public int getNumberOfFailedMachines() { return machinesWhereFailed.size(); } /** * Get the id of this map or reduce task. * @return The index of this tip in the maps/reduces lists. */ public int getIdWithinJob() { return partition; } /** * Set the event number that was raised for this tip */ public void setSuccessEventNumber(int eventNumber) { successEventNumber = eventNumber; } /** * Get the event number that was raised for this tip */ public int getSuccessEventNumber() { return successEventNumber; } /** * Gets the Node list of input split locations sorted in rack order. */ public String getSplitNodes() { if (!isMapTask() || jobSetup || jobCleanup) { return ""; } String[] nodes = rawSplit.getLocations(); if (nodes == null || nodes.length == 0) { return ""; } StringBuffer ret = new StringBuffer(nodes[0]); for(int i = 1; i < nodes.length;i++) { ret.append(","); ret.append(nodes[i]); } return ret.toString(); } public long getMapInputSize() { if(isMapTask() && !jobSetup && !jobCleanup) { return rawSplit.getDataLength(); } else { return 0; } } public void clearSplit() { rawSplit.clearBytes(); } /** * update progress rate for a task * * The assumption is that the JIP lock is held entering this routine. * So it's left unsynchronized. Currently the only places it's called * from are TIP.updateStatus and JIP.refreshCandidate* */ public void updateProgressRate(long currentTime) { double bestProgressRate = 0; for (TaskStatus ts : taskStatuses.values()){ if (ts.getRunState() == TaskStatus.State.RUNNING || ts.getRunState() == TaskStatus.State.SUCCEEDED || ts.getRunState() == TaskStatus.State.COMMIT_PENDING) { double tsProgressRate = ts.getProgress()/Math.max(1, currentTime - getDispatchTime(ts.getTaskID())); if (tsProgressRate > bestProgressRate){ bestProgressRate = tsProgressRate; } } } DataStatistics taskStats = job.getRunningTaskStatistics(isMapTask()); taskStats.updateStatistics(progressRate, bestProgressRate); progressRate = bestProgressRate; } /** * Update the processing rate for this task. (e.g. bytes/ms in reduce phase) * @param currentTime */ public void updateProcessingRate(long currentTime) { double bestMapRate = processingRates.getRate(Phase.MAP); double bestShuffleRate = processingRates.getRate(Phase.SHUFFLE); double bestSortRate = processingRates.getRate(Phase.SORT); double bestReduceRate = processingRates.getRate(Phase.REDUCE); // Find the best processing rates. There could be a running task and the // speculated task. (should be verified) for (TaskStatus ts : taskStatuses.values()){ // There could be failed/killed/etc tasks - filter those out as they could // have had a high processing rate that should no longer be considered. if (ts.getRunState() == TaskStatus.State.RUNNING || ts.getRunState() == TaskStatus.State.SUCCEEDED || ts.getRunState() == TaskStatus.State.COMMIT_PENDING) { double mapRate = 0; // Since we are not sure if map byte processing rate, or the map // record processing rate is better for speculation, offer an option if (conf.getBoolean(USE_MAP_RECORDS_PROCESSING_RATE, false)) { mapRate = ts.getMapRecordProcessingRate(currentTime); } else { mapRate = ts.getMapByteProcessingRate(currentTime); } double shuffleRate = ts.getCopyProcessingRate(currentTime); double sortRate = ts.getSortProcessingRate(currentTime); double reduceRate = ts.getReduceProcessingRate(currentTime); if (mapRate > bestMapRate) { bestMapRate = mapRate; } if (shuffleRate > bestShuffleRate) { bestShuffleRate = shuffleRate; } if (sortRate > bestSortRate) { bestSortRate = sortRate; } if (reduceRate > bestReduceRate) { bestReduceRate = reduceRate; } } } ProcessingRates updatedRates = new ProcessingRates(bestMapRate, bestShuffleRate, bestSortRate, bestReduceRate); // Update the statistics for the job updateJobStats(Phase.MAP, processingRates, updatedRates); updateJobStats(Phase.SHUFFLE, processingRates, updatedRates); updateJobStats(Phase.SORT, processingRates, updatedRates); updateJobStats(Phase.REDUCE, processingRates, updatedRates); processingRates = updatedRates; } /** * Helper function that updates the processing rates stats for this job. Only * updates the rate in the corresponding phase. * @param phase * @param oldRates * @param newRates */ private void updateJobStats(Phase phase, ProcessingRates oldRates, ProcessingRates newRates) { DataStatistics stats = job.getRunningTaskStatistics(phase); stats.updateStatistics(oldRates.getRate(phase), newRates.getRate(phase)); } /** * Convert a progress rate to the total duration projected by * that progress rate */ private static long progressRateToTotalDuration(double rate) { if (rate == 0) return Long.MAX_VALUE; return (long)(1.0/rate); } /** * This class keeps the records to be skipped during further executions * based on failed records from all the previous attempts. * It also narrow down the skip records if it is more than the * acceptable value by dividing the failed range into half. In this case one * half is executed in the next attempt (test attempt). * In the test attempt, only the test range gets executed, others get skipped. * Based on the success/failure of the test attempt, the range is divided * further. */ private class FailedRanges { private SortedRanges skipRanges = new SortedRanges(); private Divide divide; synchronized SortedRanges getSkipRanges() { if(divide!=null) { return divide.skipRange; } return skipRanges; } synchronized boolean isTestAttempt() { return divide!=null; } synchronized long getIndicesCount() { if(isTestAttempt()) { return divide.skipRange.getIndicesCount(); } return skipRanges.getIndicesCount(); } synchronized void updateState(TaskStatus status){ if (isTestAttempt() && (status.getRunState() == TaskStatus.State.SUCCEEDED)) { divide.testPassed = true; //since it was the test attempt we need to set it to failed //as it worked only on the test range status.setRunState(TaskStatus.State.FAILED); } } synchronized void add(Range failedRange) { LOG.warn("FailedRange:"+ failedRange); if(divide!=null) { LOG.warn("FailedRange:"+ failedRange +" test:"+divide.test + " pass:"+divide.testPassed); if(divide.testPassed) { //test range passed //other range would be bad. test it failedRange = divide.other; } else { //test range failed //other range would be good. failedRange = divide.test; } //reset divide = null; } if(maxSkipRecords==0 || failedRange.getLength()<=maxSkipRecords) { skipRanges.add(failedRange); } else { //start dividing the range to narrow down the skipped //records until maxSkipRecords are met OR all attempts //get exhausted divide = new Divide(failedRange); } } class Divide { private final SortedRanges skipRange; private final Range test; private final Range other; private boolean testPassed; Divide(Range range){ long half = range.getLength()/2; test = new Range(range.getStartIndex(), half); other = new Range(test.getEndIndex(), range.getLength()-half); //construct the skip range from the skipRanges skipRange = new SortedRanges(); for(Range r : skipRanges.getRanges()) { skipRange.add(r); } skipRange.add(new Range(0,test.getStartIndex())); skipRange.add(new Range(test.getEndIndex(), (Long.MAX_VALUE-test.getEndIndex()))); } } } TreeMap<TaskAttemptID, String> getActiveTasks() { return activeTasks; } TreeMap<TaskAttemptID, String> getActiveTasksCopy() { return new TreeMap<TaskAttemptID, String>(activeTasks); } int getNumSlotsRequired() { return numSlotsRequired; } /** * Force speculative execution if speculation is allowed in JobInProgress */ public void setSpeculativeForced(boolean speculativeForced) { this.speculativeForced = speculativeForced; } /** * Is forced speculative execution enabled? */ public boolean isSpeculativeForced() { return speculativeForced; } }