IsolationRunner.java example

Explorer

bigpetstore-master
- hadoop-1.2.1
  - src
- src
  - integration
    - java
      - org
        bigtop
        bigpetstore
        integration
        BigPetStoreHiveIT.java
        BigPetStoreMahoutIT.java
        BigPetStorePigIT.java
        ITUtils.java
  - main
    - java
      - org
        bigtop
        bigpetstore
        clustering
        BPSRecommnder.java
        MahoutClusterTransactionsByRegion.java
        contract
        PetStoreStatistics.java
        etl
        CrunchETL.java
        HiveViewCreator.java
        LineItem.java
        PigCSVCleaner.java
        generator
        BPSGenerator.java
        GeneratePetStoreTransactionsInputFormat.java
        PetStoreTransaction.java
        PetStoreTransactionInputSplit.java
        TransactionIteratorFactory.java
        util
        BigPetStoreConstants.java
        DeveloperTools.java
        NumericalIdUtils.java
        Pair.java
        PetStoreParseFunctions.java
        StringUtils.java
  - test
    - java
      - org
        bigtop
        bigpetstore
        docs
        TestDocs.java
        generator
        TestNumericalIdUtils.java
        TestPetStoreTransactionGeneratorJob.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.DataInputStream;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JvmTask;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;

/**
 * IsolationRunner is intended to facilitate debugging by re-running a specific
 * task, given left-over task files for a (typically failed) past job.  
 * Currently, it is limited to re-running map tasks.
 *
 * Users may coerce MapReduce to keep task files around by setting 
 * keep.failed.task.files.  See mapred_tutorial.xml for more documentation.
 */
public class IsolationRunner {
  private static final Log LOG = 
    LogFactory.getLog(IsolationRunner.class.getName());

  private static class FakeUmbilical implements TaskUmbilicalProtocol {

    public long getProtocolVersion(String protocol, long clientVersion) {
      return TaskUmbilicalProtocol.versionID;
    }
    
    public void done(TaskAttemptID taskid, JvmContext jvmContext)
        throws IOException {
      LOG.info("Task " + taskid + " reporting done.");
    }

    public void fsError(TaskAttemptID taskId, String message,
        JvmContext jvmContext) throws IOException {
      LOG.info("Task " + taskId + " reporting file system error: " + message);
    }

    public void shuffleError(TaskAttemptID taskId, String message,
        JvmContext jvmContext) throws IOException {
      LOG.info("Task " + taskId + " reporting shuffle error: " + message);
    }

    public void fatalError(TaskAttemptID taskId, String msg,
        JvmContext jvmContext) throws IOException {
      LOG.info("Task " + taskId + " reporting fatal error: " + msg);
    }

    public JvmTask getTask(JvmContext context) throws IOException {
      return null;
    }

    public boolean ping(TaskAttemptID taskid, JvmContext jvmContext) throws IOException {
      return true;
    }

    public void commitPending(TaskAttemptID taskId, TaskStatus taskStatus,
        JvmContext jvmContext) throws IOException, InterruptedException {
      statusUpdate(taskId, taskStatus, jvmContext);
    }
    
    public boolean canCommit(TaskAttemptID taskid, JvmContext jvmContext)
        throws IOException {
      return true;
    }
    
    public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus, JvmContext context) 
    throws IOException, InterruptedException {
      StringBuffer buf = new StringBuffer("Task ");
      buf.append(taskId);
      buf.append(" making progress to ");
      buf.append(taskStatus.getProgress());
      String state = taskStatus.getStateString();
      if (state != null) {
        buf.append(" and state of ");
        buf.append(state);
      }
      LOG.info(buf.toString());
      // ignore phase
      // ignore counters
      return true;
    }

    public void reportDiagnosticInfo(TaskAttemptID taskid, String trace,
        JvmContext jvmContext) throws IOException {
      LOG.info("Task " + taskid + " has problem " + trace);
    }
    
    public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId,
        int fromEventId, int maxLocs, TaskAttemptID id, JvmContext jvmContext)
        throws IOException {
      return new MapTaskCompletionEventsUpdate(TaskCompletionEvent.EMPTY_ARRAY,
          false);
    }

    public void reportNextRecordRange(TaskAttemptID taskid, 
        SortedRanges.Range range, JvmContext jvmContext) throws IOException {
      LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
    }

    @Override
    public void 
    updatePrivateDistributedCacheSizes(org.apache.hadoop.mapreduce.JobID jobId,
                                       long[] sizes){
      // NOTHING
    }
  }
  
  private ClassLoader makeClassLoader(JobConf conf, 
                                             File workDir) throws IOException {
    List<String> classPaths = new ArrayList<String>();
    // Add jar clas files (includes lib/* and classes/*)
    String jar = conf.getJar();
    if (jar != null) {
      TaskRunner.appendJobJarClasspaths(conf.getJar(), classPaths);
    }
    // Add the workdir, too.
    classPaths.add(workDir.toString());
    // Note: TaskRunner.run() does more, including DistributedCache files.
    
    // Convert to URLs
    URL[] urls = new URL[classPaths.size()];
    for (int i = 0; i < classPaths.size(); ++i) {
      urls[i] = new File(classPaths.get(i)).toURL();
    }
    return new URLClassLoader(urls);
  }
  
  /**
   * Main method.
   */
  boolean run(String[] args) 
      throws ClassNotFoundException, IOException, InterruptedException {
    if (args.length < 1) {
      System.out.println("Usage: IsolationRunner <path>/job.xml " +
               "<optional-user-name>");
      return false;
    }
    File jobFilename = new File(args[0]);
    if (!jobFilename.exists() || !jobFilename.isFile()) {
      System.out.println(jobFilename + " is not a valid job file.");
      return false;
    }
    String user;
    if (args.length > 1) {
      user = args[1];
    } else {
      user = UserGroupInformation.getCurrentUser().getShortUserName();
    }
    JobConf conf = new JobConf(new Path(jobFilename.toString()));
    conf.setUser(user);
    TaskAttemptID taskId = TaskAttemptID.forName(conf.get("mapred.task.id"));
    if (taskId == null) {
      System.out.println("mapred.task.id not found in configuration;" + 
          " job.xml is not a task config");
    }
    boolean isMap = conf.getBoolean("mapred.task.is.map", true);
    if (!isMap) {
      System.out.println("Only map tasks are supported.");
      return false;
    }
    int partition = conf.getInt("mapred.task.partition", 0);
    
    // setup the local and user working directories
    FileSystem local = FileSystem.getLocal(conf);
    LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");
    Path workDirName;
    boolean workDirExists = lDirAlloc.ifExists(MRConstants.WORKDIR, conf);
    if (workDirExists) {
      workDirName = TaskRunner.formWorkDir(lDirAlloc, conf);
    } else {
      workDirName = lDirAlloc.getLocalPathForWrite(MRConstants.WORKDIR, 
                                                       conf);
    }

    local.setWorkingDirectory(new Path(workDirName.toString()));
    
    FileSystem.get(conf).setWorkingDirectory(conf.getWorkingDirectory());
    
    // set up a classloader with the right classpath
    ClassLoader classLoader =
        makeClassLoader(conf, new File(workDirName.toString()));
    Thread.currentThread().setContextClassLoader(classLoader);
    conf.setClassLoader(classLoader);
    
    // split.dta file is used only by IsolationRunner. The file can now be in
    // any of the configured local disks, so use LocalDirAllocator to find out
    // where it is.
    Path localMetaSplit = 
        new LocalDirAllocator("mapred.local.dir").getLocalPathToRead(
            TaskTracker.getLocalSplitFile(conf.getUser(), taskId.getJobID()
                .toString(), taskId.toString()), conf);
    DataInputStream splitFile = FileSystem.getLocal(conf).open(localMetaSplit);
    TaskSplitIndex splitIndex = new TaskSplitIndex();
    splitIndex.readFields(splitFile);
    splitFile.close();
    Task task =
      new MapTask(jobFilename.toString(), taskId, partition, splitIndex, 1);
    task.setConf(conf);
    task.run(conf, new FakeUmbilical());
    return true;
  }

  /**
   * Run a single task.
   *
   * @param args the first argument is the task directory
   */
  public static void main(String[] args) 
      throws ClassNotFoundException, IOException, InterruptedException {
    if (!new IsolationRunner().run(args)) {
      System.exit(1);
    }
  }
}