LocalContainerLauncher.java example

Explorer
HDP-2.2-Patched-master
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.mapred;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSError;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;

/**
 * Runs the container task locally in a thread.
 * Since all (sub)tasks share the same local directory, they must be executed
 * sequentially in order to avoid creating/deleting the same files/dirs.
 */
public class LocalContainerLauncher extends AbstractService implements
    ContainerLauncher {

  private static final File curDir = new File(".");
  private static final Log LOG = LogFactory.getLog(LocalContainerLauncher.class);

  private FileContext curFC = null;
  private final HashSet<File> localizedFiles;
  private final AppContext context;
  private final TaskUmbilicalProtocol umbilical;
  private Thread eventHandlingThread;
  private BlockingQueue<ContainerLauncherEvent> eventQueue =
      new LinkedBlockingQueue<ContainerLauncherEvent>();

  public LocalContainerLauncher(AppContext context,
                                TaskUmbilicalProtocol umbilical) {
    super(LocalContainerLauncher.class.getName());
    this.context = context;
    this.umbilical = umbilical;
        // umbilical:  MRAppMaster creates (taskAttemptListener), passes to us
        // (TODO/FIXME:  pointless to use RPC to talk to self; should create
        // LocalTaskAttemptListener or similar:  implement umbilical protocol
        // but skip RPC stuff)

    try {
      curFC = FileContext.getFileContext(curDir.toURI());
    } catch (UnsupportedFileSystemException ufse) {
      LOG.error("Local filesystem " + curDir.toURI().toString()
                + " is unsupported?? (should never happen)");
    }

    // Save list of files/dirs that are supposed to be present so can delete
    // any extras created by one task before starting subsequent task.  Note
    // that there's no protection against deleted or renamed localization;
    // users who do that get what they deserve (and will have to disable
    // uberization in order to run correctly).
    File[] curLocalFiles = curDir.listFiles();
    localizedFiles = new HashSet<File>(curLocalFiles.length);
    for (int j = 0; j < curLocalFiles.length; ++j) {
      localizedFiles.add(curLocalFiles[j]);
    }

    // Relocalization note/future FIXME (per chrisdo, 20110315):  At moment,
    // full localization info is in AppSubmissionContext passed from client to
    // RM and then to NM for AM-container launch:  no difference between AM-
    // localization and MapTask- or ReduceTask-localization, so can assume all
    // OK.  Longer-term, will need to override uber-AM container-localization
    // request ("needed resources") with union of regular-AM-resources + task-
    // resources (and, if maps and reduces ever differ, then union of all three
    // types), OR will need localizer service/API that uber-AM can request
    // after running (e.g., "localizeForTask()" or "localizeForMapTask()").
  }

  public void serviceStart() throws Exception {
    eventHandlingThread = new Thread(new SubtaskRunner(), "uber-SubtaskRunner");
    eventHandlingThread.start();
    super.serviceStart();
  }

  public void serviceStop() throws Exception {
    if (eventHandlingThread != null) {
      eventHandlingThread.interrupt();
    }
    super.serviceStop();
  }

  @Override
  public void handle(ContainerLauncherEvent event) {
    try {
      eventQueue.put(event);
    } catch (InterruptedException e) {
      throw new YarnRuntimeException(e);  // FIXME? YarnRuntimeException is "for runtime exceptions only"
    }
  }


  /*
   * Uber-AM lifecycle/ordering ("normal" case):
   *
   * - [somebody] sends TA_ASSIGNED
   *   - handled by ContainerAssignedTransition (TaskAttemptImpl.java)
   *     - creates "remoteTask" for us == real Task
   *     - sends CONTAINER_REMOTE_LAUNCH
   *     - TA: UNASSIGNED -> ASSIGNED
   * - CONTAINER_REMOTE_LAUNCH handled by LocalContainerLauncher (us)
   *   - sucks "remoteTask" out of TaskAttemptImpl via getRemoteTask()
   *   - sends TA_CONTAINER_LAUNCHED
   *     [[ elsewhere...
   *       - TA_CONTAINER_LAUNCHED handled by LaunchedContainerTransition
   *         - registers "remoteTask" with TaskAttemptListener (== umbilical)
   *         - NUKES "remoteTask"
   *         - sends T_ATTEMPT_LAUNCHED (Task: SCHEDULED -> RUNNING)
   *         - TA: ASSIGNED -> RUNNING
   *     ]]
   *   - runs Task (runSubMap() or runSubReduce())
   *     - TA can safely send TA_UPDATE since in RUNNING state
   */
  private class SubtaskRunner implements Runnable {

    private boolean doneWithMaps = false;
    private int finishedSubMaps = 0;

    SubtaskRunner() {
    }

    @SuppressWarnings("unchecked")
    @Override
    public void run() {
      ContainerLauncherEvent event = null;

      // _must_ either run subtasks sequentially or accept expense of new JVMs
      // (i.e., fork()), else will get weird failures when maps try to create/
      // write same dirname or filename:  no chdir() in Java
      while (!Thread.currentThread().isInterrupted()) {
        try {
          event = eventQueue.take();
        } catch (InterruptedException e) {  // mostly via T_KILL? JOB_KILL?
          LOG.error("Returning, interrupted : " + e);
          return;
        }

        LOG.info("Processing the event " + event.toString());

        if (event.getType() == EventType.CONTAINER_REMOTE_LAUNCH) {

          ContainerRemoteLaunchEvent launchEv =
              (ContainerRemoteLaunchEvent)event;
          TaskAttemptId attemptID = launchEv.getTaskAttemptID(); 

          Job job = context.getAllJobs().get(attemptID.getTaskId().getJobId());
          int numMapTasks = job.getTotalMaps();
          int numReduceTasks = job.getTotalReduces();

          // YARN (tracking) Task:
          org.apache.hadoop.mapreduce.v2.app.job.Task ytask =
              job.getTask(attemptID.getTaskId());
          // classic mapred Task:
          org.apache.hadoop.mapred.Task remoteTask = launchEv.getRemoteTask();

          // after "launching," send launched event to task attempt to move
          // state from ASSIGNED to RUNNING (also nukes "remoteTask", so must
          // do getRemoteTask() call first)
          
          //There is no port number because we are not really talking to a task
          // tracker.  The shuffle is just done through local files.  So the
          // port number is set to -1 in this case.
          context.getEventHandler().handle(
              new TaskAttemptContainerLaunchedEvent(attemptID, -1));

          if (numMapTasks == 0) {
            doneWithMaps = true;
          }

          try {
            if (remoteTask.isMapOrReduce()) {
              JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId());
              jce.addCounterUpdate(JobCounter.TOTAL_LAUNCHED_UBERTASKS, 1);
              if (remoteTask.isMapTask()) {
                jce.addCounterUpdate(JobCounter.NUM_UBER_SUBMAPS, 1);
              } else {
                jce.addCounterUpdate(JobCounter.NUM_UBER_SUBREDUCES, 1);
              }
              context.getEventHandler().handle(jce);
            }
            runSubtask(remoteTask, ytask.getType(), attemptID, numMapTasks,
                       (numReduceTasks > 0));
            
          } catch (RuntimeException re) {
            JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId());
            jce.addCounterUpdate(JobCounter.NUM_FAILED_UBERTASKS, 1);
            context.getEventHandler().handle(jce);
            // this is our signal that the subtask failed in some way, so
            // simulate a failed JVM/container and send a container-completed
            // event to task attempt (i.e., move state machine from RUNNING
            // to FAIL_CONTAINER_CLEANUP [and ultimately to FAILED])
            context.getEventHandler().handle(new TaskAttemptEvent(attemptID,
                TaskAttemptEventType.TA_CONTAINER_COMPLETED));
          } catch (IOException ioe) {
            // if umbilical itself barfs (in error-handler of runSubMap()),
            // we're pretty much hosed, so do what YarnChild main() does
            // (i.e., exit clumsily--but can never happen, so no worries!)
            LOG.fatal("oopsie...  this can never happen: "
                + StringUtils.stringifyException(ioe));
            System.exit(-1);
          }

        } else if (event.getType() == EventType.CONTAINER_REMOTE_CLEANUP) {

          // no container to kill, so just send "cleaned" event to task attempt
          // to move us from SUCCESS_CONTAINER_CLEANUP to SUCCEEDED state
          // (or {FAIL|KILL}_CONTAINER_CLEANUP to {FAIL|KILL}_TASK_CLEANUP)
          context.getEventHandler().handle(
              new TaskAttemptEvent(event.getTaskAttemptID(),
                  TaskAttemptEventType.TA_CONTAINER_CLEANED));

        } else {
          LOG.warn("Ignoring unexpected event " + event.toString());
        }

      }
    }

    @SuppressWarnings("deprecation")
    private void runSubtask(org.apache.hadoop.mapred.Task task,
                            final TaskType taskType,
                            TaskAttemptId attemptID,
                            final int numMapTasks,
                            boolean renameOutputs)
    throws RuntimeException, IOException {
      org.apache.hadoop.mapred.TaskAttemptID classicAttemptID =
          TypeConverter.fromYarn(attemptID);

      try {
        JobConf conf = new JobConf(getConfig());
        conf.set(JobContext.TASK_ID, task.getTaskID().toString());
        conf.set(JobContext.TASK_ATTEMPT_ID, classicAttemptID.toString());
        conf.setBoolean(JobContext.TASK_ISMAP, (taskType == TaskType.MAP));
        conf.setInt(JobContext.TASK_PARTITION, task.getPartition());
        conf.set(JobContext.ID, task.getJobID().toString());

        // Use the AM's local dir env to generate the intermediate step 
        // output files
        String[] localSysDirs = StringUtils.getTrimmedStrings(
            System.getenv(Environment.LOCAL_DIRS.name()));
        conf.setStrings(MRConfig.LOCAL_DIR, localSysDirs);
        LOG.info(MRConfig.LOCAL_DIR + " for uber task: "
            + conf.get(MRConfig.LOCAL_DIR));

        // mark this as an uberized subtask so it can set task counter
        // (longer-term/FIXME:  could redefine as job counter and send
        // "JobCounterEvent" to JobImpl on [successful] completion of subtask;
        // will need new Job state-machine transition and JobImpl jobCounters
        // map to handle)
        conf.setBoolean("mapreduce.task.uberized", true);

        // META-FIXME: do we want the extra sanity-checking (doneWithMaps,
        // etc.), or just assume/hope the state machine(s) and uber-AM work
        // as expected?
        if (taskType == TaskType.MAP) {
          if (doneWithMaps) {
            LOG.error("CONTAINER_REMOTE_LAUNCH contains a map task ("
                      + attemptID + "), but should be finished with maps");
            throw new RuntimeException();
          }

          MapTask map = (MapTask)task;
          map.setConf(conf);

          map.run(conf, umbilical);

          if (renameOutputs) {
            renameMapOutputForReduce(conf, attemptID, map.getMapOutputFile());
          }
          relocalize();

          if (++finishedSubMaps == numMapTasks) {
            doneWithMaps = true;
          }

        } else /* TaskType.REDUCE */ {

          if (!doneWithMaps) {
            // check if event-queue empty?  whole idea of counting maps vs. 
            // checking event queue is a tad wacky...but could enforce ordering
            // (assuming no "lost events") at LocalMRAppMaster [CURRENT BUG(?): 
            // doesn't send reduce event until maps all done]
            LOG.error("CONTAINER_REMOTE_LAUNCH contains a reduce task ("
                      + attemptID + "), but not yet finished with maps");
            throw new RuntimeException();
          }

          // a.k.a. "mapreduce.jobtracker.address" in LocalJobRunner:
          // set framework name to local to make task local
          conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
          conf.set(MRConfig.MASTER_ADDRESS, "local");  // bypass shuffle

          ReduceTask reduce = (ReduceTask)task;
          reduce.setConf(conf);          

          reduce.run(conf, umbilical);
          //relocalize();  // needed only if more than one reducer supported (is MAPREDUCE-434 fixed yet?)
        }

      } catch (FSError e) {
        LOG.fatal("FSError from child", e);
        // umbilical:  MRAppMaster creates (taskAttemptListener), passes to us
        umbilical.fsError(classicAttemptID, e.getMessage());
        throw new RuntimeException();

      } catch (Exception exception) {
        LOG.warn("Exception running local (uberized) 'child' : "
            + StringUtils.stringifyException(exception));
        try {
          if (task != null) {
            // do cleanup for the task
            task.taskCleanup(umbilical);
          }
        } catch (Exception e) {
          LOG.info("Exception cleaning up: "
              + StringUtils.stringifyException(e));
        }
        // Report back any failures, for diagnostic purposes
        umbilical.reportDiagnosticInfo(classicAttemptID, 
            StringUtils.stringifyException(exception));
        throw new RuntimeException();

      } catch (Throwable throwable) {
        LOG.fatal("Error running local (uberized) 'child' : "
            + StringUtils.stringifyException(throwable));
        Throwable tCause = throwable.getCause();
        String cause = (tCause == null)
            ? throwable.getMessage()
                : StringUtils.stringifyException(tCause);
            umbilical.fatalError(classicAttemptID, cause);
        throw new RuntimeException();
      }
    }

    /**
     * Within the _local_ filesystem (not HDFS), all activity takes place within
     * a single subdir (${local.dir}/usercache/$user/appcache/$appId/$contId/),
     * and all sub-MapTasks create the same filename ("file.out").  Rename that
     * to something unique (e.g., "map_0.out") to avoid collisions.
     *
     * Longer-term, we'll modify [something] to use TaskAttemptID-based
     * filenames instead of "file.out". (All of this is entirely internal,
     * so there are no particular compatibility issues.)
     */
    @SuppressWarnings("deprecation")
    private void renameMapOutputForReduce(JobConf conf, TaskAttemptId mapId,
                                          MapOutputFile subMapOutputFile)
    throws IOException {
      FileSystem localFs = FileSystem.getLocal(conf);
      // move map output to reduce input
      Path mapOut = subMapOutputFile.getOutputFile();
      FileStatus mStatus = localFs.getFileStatus(mapOut);      
      Path reduceIn = subMapOutputFile.getInputFileForWrite(
          TypeConverter.fromYarn(mapId).getTaskID(), mStatus.getLen());
      if (LOG.isDebugEnabled()) {
        LOG.debug("Renaming map output file for task attempt "
            + mapId.toString() + " from original location " + mapOut.toString()
            + " to destination " + reduceIn.toString());
      }
      if (!localFs.mkdirs(reduceIn.getParent())) {
        throw new IOException("Mkdirs failed to create "
            + reduceIn.getParent().toString());
      }
      if (!localFs.rename(mapOut, reduceIn))
        throw new IOException("Couldn't rename " + mapOut);
    }

    /**
     * Also within the local filesystem, we need to restore the initial state
     * of the directory as much as possible.  Compare current contents against
     * the saved original state and nuke everything that doesn't belong, with
     * the exception of the renamed map outputs.
     *
     * Any jobs that go out of their way to rename or delete things from the
     * local directory are considered broken and deserve what they get...
     */
    private void relocalize() {
      File[] curLocalFiles = curDir.listFiles();
      for (int j = 0; j < curLocalFiles.length; ++j) {
        if (!localizedFiles.contains(curLocalFiles[j])) {
          // found one that wasn't there before:  delete it
          boolean deleted = false;
          try {
            if (curFC != null) {
              // this is recursive, unlike File delete():
              deleted = curFC.delete(new Path(curLocalFiles[j].getName()),true);
            }
          } catch (IOException e) {
            deleted = false;
          }
          if (!deleted) {
            LOG.warn("Unable to delete unexpected local file/dir "
                + curLocalFiles[j].getName() + ": insufficient permissions?");
          }
        }
      }
    }

  } // end SubtaskRunner

}