/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce.v2.app.recover; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.api.records.Phase; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.app.ControlledClock; import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleaner; import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleanupEvent; import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.service.CompositeService; import org.apache.hadoop.yarn.service.Service; import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; /* * Recovers the completed tasks from the previous life of Application Master. * The completed tasks are deciphered from the history file of the previous life. * Recovery service intercepts and replay the events for completed tasks. * While recovery is in progress, the scheduling of new tasks are delayed by * buffering the task schedule events. * The recovery service controls the clock while recovery is in progress. */ //TODO: //task cleanup for all non completed tasks public class RecoveryService extends CompositeService implements Recovery { private static final Log LOG = LogFactory.getLog(RecoveryService.class); private final ApplicationAttemptId applicationAttemptId; private final OutputCommitter committer; private final Dispatcher dispatcher; private final ControlledClock clock; private JobInfo jobInfo = null; private final Map<TaskId, TaskInfo> completedTasks = new HashMap<TaskId, TaskInfo>(); private final List<TaskEvent> pendingTaskScheduleEvents = new ArrayList<TaskEvent>(); private volatile boolean recoveryMode = false; public RecoveryService(ApplicationAttemptId applicationAttemptId, Clock clock, OutputCommitter committer) { super("RecoveringDispatcher"); this.applicationAttemptId = applicationAttemptId; this.committer = committer; this.dispatcher = createRecoveryDispatcher(); this.clock = new ControlledClock(clock); addService((Service) dispatcher); } @Override public void init(Configuration conf) { super.init(conf); // parse the history file try { parse(); } catch (Exception e) { LOG.warn(e); LOG.warn("Could not parse the old history file. Aborting recovery. " + "Starting afresh.", e); } if (completedTasks.size() > 0) { recoveryMode = true; LOG.info("SETTING THE RECOVERY MODE TO TRUE. NO OF COMPLETED TASKS " + "TO RECOVER " + completedTasks.size()); LOG.info("Job launch time " + jobInfo.getLaunchTime()); clock.setTime(jobInfo.getLaunchTime()); } } @Override public Dispatcher getDispatcher() { return dispatcher; } @Override public Clock getClock() { return clock; } @Override public Map<TaskId, TaskInfo> getCompletedTasks() { return completedTasks; } @Override public List<AMInfo> getAMInfos() { if (jobInfo == null || jobInfo.getAMInfos() == null) { return new LinkedList<AMInfo>(); } List<AMInfo> amInfos = new LinkedList<AMInfo>(); for (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo jhAmInfo : jobInfo .getAMInfos()) { AMInfo amInfo = MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(), jhAmInfo.getStartTime(), jhAmInfo.getContainerId(), jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(), jhAmInfo.getNodeManagerHttpPort()); amInfos.add(amInfo); } return amInfos; } private void parse() throws IOException { // TODO: parse history file based on startCount String jobName = TypeConverter.fromYarn(applicationAttemptId.getApplicationId()).toString(); String jobhistoryDir = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(getConfig()); FSDataInputStream in = null; Path historyFile = null; Path histDirPath = FileContext.getFileContext(getConfig()).makeQualified( new Path(jobhistoryDir)); FileContext fc = FileContext.getFileContext(histDirPath.toUri(), getConfig()); //read the previous history file historyFile = fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile( histDirPath, jobName, (applicationAttemptId.getAttemptId() - 1))); LOG.info("History file is at " + historyFile); in = fc.open(historyFile); JobHistoryParser parser = new JobHistoryParser(in); jobInfo = parser.parse(); Exception parseException = parser.getParseException(); if (parseException != null) { LOG.info("Got an error parsing job-history file " + historyFile + ", ignoring incomplete events.", parseException); } Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo .getAllTasks(); for (TaskInfo taskInfo : taskInfos.values()) { if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) { completedTasks .put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo); LOG.info("Read from history task " + TypeConverter.toYarn(taskInfo.getTaskId())); } } LOG.info("Read completed tasks from history " + completedTasks.size()); } protected Dispatcher createRecoveryDispatcher() { return new RecoveryDispatcher(); } @SuppressWarnings("rawtypes") class RecoveryDispatcher extends AsyncDispatcher { private final EventHandler actualHandler; private final EventHandler handler; RecoveryDispatcher() { super(); actualHandler = super.getEventHandler(); handler = new InterceptingEventHandler(actualHandler); } @Override @SuppressWarnings("unchecked") public void dispatch(Event event) { if (recoveryMode) { if (event.getType() == TaskAttemptEventType.TA_CONTAINER_LAUNCHED) { TaskAttemptInfo attInfo = getTaskAttemptInfo(((TaskAttemptEvent) event) .getTaskAttemptID()); LOG.info("Recovered Attempt start time " + attInfo.getStartTime()); clock.setTime(attInfo.getStartTime()); } else if (event.getType() == TaskAttemptEventType.TA_DONE || event.getType() == TaskAttemptEventType.TA_FAILMSG || event.getType() == TaskAttemptEventType.TA_KILL) { TaskAttemptInfo attInfo = getTaskAttemptInfo(((TaskAttemptEvent) event) .getTaskAttemptID()); LOG.info("Recovered Attempt finish time " + attInfo.getFinishTime()); clock.setTime(attInfo.getFinishTime()); } else if (event.getType() == TaskEventType.T_ATTEMPT_FAILED || event.getType() == TaskEventType.T_ATTEMPT_KILLED || event.getType() == TaskEventType.T_ATTEMPT_SUCCEEDED) { TaskTAttemptEvent tEvent = (TaskTAttemptEvent) event; LOG.info("Recovered Task attempt " + tEvent.getTaskAttemptID()); TaskInfo taskInfo = completedTasks.get(tEvent.getTaskAttemptID() .getTaskId()); taskInfo.getAllTaskAttempts().remove( TypeConverter.fromYarn(tEvent.getTaskAttemptID())); // remove the task info from completed tasks if all attempts are // recovered if (taskInfo.getAllTaskAttempts().size() == 0) { completedTasks.remove(tEvent.getTaskAttemptID().getTaskId()); // checkForRecoveryComplete LOG.info("CompletedTasks() " + completedTasks.size()); if (completedTasks.size() == 0) { recoveryMode = false; clock.reset(); LOG.info("Setting the recovery mode to false. " + "Recovery is complete!"); // send all pending tasks schedule events for (TaskEvent tEv : pendingTaskScheduleEvents) { actualHandler.handle(tEv); } } } } } realDispatch(event); } public void realDispatch(Event event) { super.dispatch(event); } @Override public EventHandler getEventHandler() { return handler; } } private TaskAttemptInfo getTaskAttemptInfo(TaskAttemptId id) { TaskInfo taskInfo = completedTasks.get(id.getTaskId()); return taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn(id)); } @SuppressWarnings({"rawtypes", "unchecked"}) private class InterceptingEventHandler implements EventHandler { EventHandler actualHandler; InterceptingEventHandler(EventHandler actualHandler) { this.actualHandler = actualHandler; } @Override public void handle(Event event) { if (!recoveryMode) { // delegate to the dispatcher one actualHandler.handle(event); return; } else if (event.getType() == TaskEventType.T_SCHEDULE) { TaskEvent taskEvent = (TaskEvent) event; // delay the scheduling of new tasks till previous ones are recovered if (completedTasks.get(taskEvent.getTaskID()) == null) { LOG.debug("Adding to pending task events " + taskEvent.getTaskID()); pendingTaskScheduleEvents.add(taskEvent); return; } } else if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) { TaskAttemptId aId = ((ContainerAllocatorEvent) event).getAttemptID(); TaskAttemptInfo attInfo = getTaskAttemptInfo(aId); LOG.debug("CONTAINER_REQ " + aId); sendAssignedEvent(aId, attInfo); return; } else if (event.getType() == TaskCleaner.EventType.TASK_CLEAN) { TaskAttemptId aId = ((TaskCleanupEvent) event).getAttemptID(); LOG.debug("TASK_CLEAN"); actualHandler.handle(new TaskAttemptEvent(aId, TaskAttemptEventType.TA_CLEANUP_DONE)); return; } else if (event.getType() == ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH) { TaskAttemptId aId = ((ContainerRemoteLaunchEvent) event) .getTaskAttemptID(); TaskAttemptInfo attInfo = getTaskAttemptInfo(aId); actualHandler.handle(new TaskAttemptContainerLaunchedEvent(aId, attInfo.getShufflePort())); // send the status update event sendStatusUpdateEvent(aId, attInfo); TaskAttemptState state = TaskAttemptState.valueOf(attInfo.getTaskStatus()); switch (state) { case SUCCEEDED: //recover the task output TaskAttemptContext taskContext = new TaskAttemptContextImpl(getConfig(), attInfo.getAttemptId()); try { TaskType type = taskContext.getTaskAttemptID().getTaskID().getTaskType(); int numReducers = taskContext.getConfiguration().getInt(MRJobConfig.NUM_REDUCES, 1); if(type == TaskType.REDUCE || (type == TaskType.MAP && numReducers <= 0)) { committer.recoverTask(taskContext); LOG.info("Recovered output from task attempt " + attInfo.getAttemptId()); } else { LOG.info("Will not try to recover output for " + taskContext.getTaskAttemptID()); } } catch (IOException e) { LOG.error("Caught an exception while trying to recover task "+aId, e); actualHandler.handle(new JobDiagnosticsUpdateEvent( aId.getTaskId().getJobId(), "Error in recovering task output " + e.getMessage())); actualHandler.handle(new JobEvent(aId.getTaskId().getJobId(), JobEventType.INTERNAL_ERROR)); } // send the done event LOG.info("Sending done event to recovered attempt " + aId); actualHandler.handle(new TaskAttemptEvent(aId, TaskAttemptEventType.TA_DONE)); break; case KILLED: LOG.info("Sending kill event to recovered attempt " + aId); actualHandler.handle(new TaskAttemptEvent(aId, TaskAttemptEventType.TA_KILL)); break; default: LOG.info("Sending fail event to recovered attempt " + aId); actualHandler.handle(new TaskAttemptEvent(aId, TaskAttemptEventType.TA_FAILMSG)); break; } return; } else if (event.getType() == ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP) { TaskAttemptId aId = ((ContainerLauncherEvent) event) .getTaskAttemptID(); actualHandler.handle( new TaskAttemptEvent(aId, TaskAttemptEventType.TA_CONTAINER_CLEANED)); return; } // delegate to the actual handler actualHandler.handle(event); } private void sendStatusUpdateEvent(TaskAttemptId yarnAttemptID, TaskAttemptInfo attemptInfo) { LOG.info("Sending status update event to " + yarnAttemptID); TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus(); taskAttemptStatus.id = yarnAttemptID; taskAttemptStatus.progress = 1.0f; taskAttemptStatus.stateString = attemptInfo.getTaskStatus(); // taskAttemptStatus.outputSize = attemptInfo.getOutputSize(); taskAttemptStatus.phase = Phase.CLEANUP; org.apache.hadoop.mapreduce.Counters cntrs = attemptInfo.getCounters(); if (cntrs == null) { taskAttemptStatus.counters = null; } else { taskAttemptStatus.counters = cntrs; } actualHandler.handle(new TaskAttemptStatusUpdateEvent( taskAttemptStatus.id, taskAttemptStatus)); } private void sendAssignedEvent(TaskAttemptId yarnAttemptID, TaskAttemptInfo attemptInfo) { LOG.info("Sending assigned event to " + yarnAttemptID); ContainerId cId = attemptInfo.getContainerId(); NodeId nodeId = ConverterUtils.toNodeId(attemptInfo.getHostname() + ":" + attemptInfo.getPort()); // Resource/Priority/ApplicationACLs are only needed while launching the // container on an NM, these are already completed tasks, so setting them // to null Container container = BuilderUtils.newContainer(cId, nodeId, attemptInfo.getTrackerName() + ":" + attemptInfo.getHttpPort(), null, null, null); actualHandler.handle(new TaskAttemptContainerAssignedEvent(yarnAttemptID, container, null)); } } }