JobConf.java example

Explorer
hadoop-20-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;


import java.io.IOException;

import java.net.URL;
import java.net.URLDecoder;
import java.util.Enumeration;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.io.*;
import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapred.lib.KeyFieldBasedComparator;
import org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ResourceCalculatorPlugin;
import org.apache.hadoop.util.Tool;

/**
 * A map/reduce job configuration.
 *
 * <p><code>JobConf</code> is the primary interface for a user to describe a
 * map-reduce job to the Hadoop framework for execution. The framework tries to
 * faithfully execute the job as-is described by <code>JobConf</code>, however:
 * <ol>
 *   <li>
 *   Some configuration parameters might have been marked as
 *   <a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams">
 *   final</a> by administrators and hence cannot be altered.
 *   </li>
 *   <li>
 *   While some job parameters are straight-forward to set
 *   (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly
 *   rest of the framework and/or job-configuration and is relatively more
 *   complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}).
 *   </li>
 * </ol></p>
 *
 * <p><code>JobConf</code> typically specifies the {@link Mapper}, combiner
 * (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
 * {@link OutputFormat} implementations to be used etc.
 *
 * <p>Optionally <code>JobConf</code> is used to specify other advanced facets
 * of the job such as <code>Comparator</code>s to be used, files to be put in
 * the {@link DistributedCache}, whether or not intermediate and/or job outputs
 * are to be compressed (and how), debugability via user-provided scripts
 * ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}),
 * for doing post-processing on task logs, task's stdout, stderr, syslog.
 * and etc.</p>
 *
 * <p>Here is an example on how to configure a job via <code>JobConf</code>:</p>
 * <p><blockquote><pre>
 *     // Create a new JobConf
 *     JobConf job = new JobConf(new Configuration(), MyJob.class);
 *
 *     // Specify various job-specific parameters
 *     job.setJobName("myjob");
 *
 *     FileInputFormat.setInputPaths(job, new Path("in"));
 *     FileOutputFormat.setOutputPath(job, new Path("out"));
 *
 *     job.setMapperClass(MyJob.MyMapper.class);
 *     job.setCombinerClass(MyJob.MyReducer.class);
 *     job.setReducerClass(MyJob.MyReducer.class);
 *
 *     job.setInputFormat(SequenceFileInputFormat.class);
 *     job.setOutputFormat(SequenceFileOutputFormat.class);
 * </pre></blockquote></p>
 *
 * @see JobClient
 * @see ClusterStatus
 * @see Tool
 * @see DistributedCache
 * @deprecated Use {@link Configuration} instead
 */
@Deprecated
public class JobConf extends Configuration {

  private static final Log LOG = LogFactory.getLog(JobConf.class);
  private static final String MAPRED_JOB_FINISH_WHEN_REDUCES_DONE = "mapred.job.finish.when.reduces.done";

  static{
    Configuration.addDefaultResource("mapred-default.xml");
    Configuration.addDefaultResource("mapred-site.xml");
  }

  /**
   * @deprecated Use {@link #MAPRED_JOB_MAP_MEMORY_MB_PROPERTY} and
   * {@link #MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY}
   */
  @Deprecated
  public static final String MAPRED_TASK_MAXVMEM_PROPERTY =
    "mapred.task.maxvmem";

  /**
   * @deprecated
   */
  @Deprecated
  public static final String UPPER_LIMIT_ON_TASK_VMEM_PROPERTY =
    "mapred.task.limit.maxvmem";

  /**
   * @deprecated
   */
  @Deprecated
  public static final String MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY =
    "mapred.task.default.maxvmem";

  /**
   * @deprecated
   */
  @Deprecated
  public static final String MAPRED_TASK_MAXPMEM_PROPERTY =
    "mapred.task.maxpmem";

  public static final String MAPRED_MAX_TRACKER_FAILURES_PROPERTY =
    "mapred.max.tracker.failures";

  /**
   * A value which if set for memory related configuration options,
   * indicates that the options are turned off.
   */
  public static final long DISABLED_MEMORY_LIMIT =
      ResourceCalculatorPlugin.UNAVAILABLE;

  /**
   * Name of the queue to which jobs will be submitted, if no queue
   * name is mentioned.
   */
  public static final String DEFAULT_QUEUE_NAME = "default";

  public static final String JOB_SOURCE_CONF = "hive.query.source";

  static final String MAPRED_JOB_MAP_MEMORY_MB_PROPERTY =
      "mapred.job.map.memory.mb";

  static final String MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY =
      "mapred.job.reduce.memory.mb";

  /**
   * Configuration key to set additional java options for the child map and
   * reduce tasks. Users should not set these options, these are supposed to be
   * set on the server side by the administrator. These are appended to the
   * options specified by mapred.child.java.opts. These options should be set
   * to ensure that certain JVM options are set even if users specify
   * mapred.child.java.opts incorrectly.
   */
  public static final String MAPRED_ADMIN_TASK_JAVA_OPTS = "mapred.admin.child.java.opts";

  /**
   * Configuration key to set the java command line options for the child
   * map and reduce tasks.
   *
   * Java opts for the task tracker child processes.
   * The following symbol, if present, will be interpolated: @taskid@.
   * It is replaced by current TaskID. Any other occurrences of '@' will go
   * unchanged.
   * For example, to enable verbose gc logging to a file named for the taskid in
   * /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
   *          -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
   *
   * The configuration variable {@link #MAPRED_TASK_ULIMIT} can be used to
   * control the maximum virtual memory of the child processes.
   *
   * The configuration variable {@link #MAPRED_TASK_ENV} can be used to pass
   * other environment variables to the child processes.
   *
   * @deprecated Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or
   *                 {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}
   */
  @Deprecated
  public static final String MAPRED_TASK_JAVA_OPTS = "mapred.child.java.opts";

  /**
   * Configuration key to set the java command line options for the map tasks.
   *
   * Java opts for the task tracker child map processes.
   * The following symbol, if present, will be interpolated: @taskid@.
   * It is replaced by current TaskID. Any other occurrences of '@' will go
   * unchanged.
   * For example, to enable verbose gc logging to a file named for the taskid in
   * /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
   *          -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
   *
   * The configuration variable {@link #MAPRED_MAP_TASK_ULIMIT} can be used to
   * control the maximum virtual memory of the map processes.
   *
   * The configuration variable {@link #MAPRED_MAP_TASK_ENV} can be used to pass
   * other environment variables to the map processes.
   */
  public static final String MAPRED_MAP_TASK_JAVA_OPTS =
    "mapred.map.child.java.opts";

  /**
   * Configuration key to set the java command line options for the reduce tasks.
   *
   * Java opts for the task tracker child reduce processes.
   * The following symbol, if present, will be interpolated: @taskid@.
   * It is replaced by current TaskID. Any other occurrences of '@' will go
   * unchanged.
   * For example, to enable verbose gc logging to a file named for the taskid in
   * /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
   *          -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
   *
   * The configuration variable {@link #MAPRED_REDUCE_TASK_ULIMIT} can be used
   * to control the maximum virtual memory of the reduce processes.
   *
   * The configuration variable {@link #MAPRED_REDUCE_TASK_ENV} can be used to
   * pass process environment variables to the reduce processes.
   */
  public static final String MAPRED_REDUCE_TASK_JAVA_OPTS =
    "mapred.reduce.child.java.opts";

  /**
   * Configuration key to set the java command line options for the job setup
   * tasks.
   *
   * Java opts for the task tracker child job setup processes.
   * The following symbol, if present, will be interpolated: @taskid@.
   * It is replaced by current TaskID. Any other occurrences of '@' will go
   * unchanged.
   * For example, to enable verbose gc logging to a file named for the taskid in
   * /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
   *          -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
   */
  public static final String MAPRED_JOB_SETUP_TASK_JAVA_OPTS =
      "mapred.jobsetup.child.java.opts";

  /**
   * Configuration key to set the java command line options for the job
   * cleanup tasks.
   *
   * Java opts for the task tracker child job cleanup processes.
   * The following symbol, if present, will be interpolated: @taskid@.
   * It is replaced by current TaskID. Any other occurrences of '@' will go
   * unchanged.
   * For example, to enable verbose gc logging to a file named for the taskid in
   * /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
   *          -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
   */
  public static final String MAPRED_JOB_CLEANUP_TASK_JAVA_OPTS =
      "mapred.jobcleanup.child.java.opts";

  /**
   * Configuration key to set the java command line options for the task
   * cleanup tasks.
   *
   * Java opts for the task tracker child task cleanup processes.
   * The following symbol, if present, will be interpolated: @taskid@.
   * It is replaced by current TaskID. Any other occurrences of '@' will go
   * unchanged.
   * For example, to enable verbose gc logging to a file named for the taskid in
   * /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
   *          -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
   */
  public static final String MAPRED_TASK_CLEANUP_TASK_JAVA_OPTS =
      "mapred.taskcleanup.child.java.opts";

  public static final String DEFAULT_MAPRED_TASK_JAVA_OPTS = "-Xmx200m";

  /**
   * Configuration key to set the maximum virutal memory available to the child
   * map and reduce tasks (in kilo-bytes).
   *
   * Note: This must be greater than or equal to the -Xmx passed to the JavaVM
   *       via {@link #MAPRED_TASK_JAVA_OPTS}, else the VM might not start.
   *
   * @deprecated Use {@link #MAPRED_MAP_TASK_ULIMIT} or
   *                 {@link #MAPRED_REDUCE_TASK_ULIMIT}
   */
  @Deprecated
  public static final String MAPRED_TASK_ULIMIT = "mapred.child.ulimit";

  /**
   * Configuration key to set the maximum virutal memory available to the
   * map tasks (in kilo-bytes).
   *
   * Note: This must be greater than or equal to the -Xmx passed to the JavaVM
   *       via {@link #MAPRED_MAP_TASK_JAVA_OPTS}, else the VM might not start.
   */
  public static final String MAPRED_MAP_TASK_ULIMIT = "mapred.map.child.ulimit";

  /**
   * Configuration key to set the maximum virutal memory available to the
   * reduce tasks (in kilo-bytes).
   *
   * Note: This must be greater than or equal to the -Xmx passed to the JavaVM
   *       via {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}, else the VM might not start.
   */
  public static final String MAPRED_REDUCE_TASK_ULIMIT =
    "mapred.reduce.child.ulimit";

  /**
   * Configuration key to set the maximum memory for a task (in mega-bytes).
   * Jobs requesting more will be killed.
   */
  public static final String MAX_TASK_MEMORY_MB = "mapred.child.java.max.memory.mb";

  public static final int MAX_TASK_MEMORY_MB_DEFAULT = 4096;

  /**
   * Configuration key to set the environment of the child map/reduce tasks.
   *
   * The format of the value is <code>k1=v1,k2=v2</code>. Further it can
   * reference existing environment variables via <code>$key</code>.
   *
   * Example:
   * <ul>
   *   <li> A=foo - This will set the env variable A to foo. </li>
   *   <li> B=$X:c This is inherit tasktracker's X env variable. </li>
   * </ul>
   *
   * @deprecated Use {@link #MAPRED_MAP_TASK_ENV} or
   *                 {@link #MAPRED_REDUCE_TASK_ENV}
   */
  @Deprecated
  public static final String MAPRED_TASK_ENV = "mapred.child.env";

  /**
   * Configuration key to set the maximum virutal memory available to the
   * map tasks.
   *
   * The format of the value is <code>k1=v1,k2=v2</code>. Further it can
   * reference existing environment variables via <code>$key</code>.
   *
   * Example:
   * <ul>
   *   <li> A=foo - This will set the env variable A to foo. </li>
   *   <li> B=$X:c This is inherit tasktracker's X env variable. </li>
   * </ul>
   */
  public static final String MAPRED_MAP_TASK_ENV = "mapred.map.child.env";

  /**
   * Configuration key to set the maximum virutal memory available to the
   * reduce tasks.
   *
   * The format of the value is <code>k1=v1,k2=v2</code>. Further it can
   * reference existing environment variables via <code>$key</code>.
   *
   * Example:
   * <ul>
   *   <li> A=foo - This will set the env variable A to foo. </li>
   *   <li> B=$X:c This is inherit tasktracker's X env variable. </li>
   * </ul>
   */
  public static final String MAPRED_REDUCE_TASK_ENV =
    "mapred.reduce.child.env";

  /**
   * Construct a map/reduce job configuration.
   */
  public JobConf() {
    checkAndWarnDeprecation();
  }

  /**
   * Construct a map/reduce job configuration.
   *
   * @param exampleClass a class whose containing jar is used as the job's jar.
   */
  public JobConf(Class exampleClass) {
    setJarByClass(exampleClass);
    checkAndWarnDeprecation();
  }

  /**
   * Construct a map/reduce job configuration.
   *
   * @param conf a Configuration whose settings will be inherited.
   */
  public JobConf(Configuration conf) {
    super(conf);
    checkAndWarnDeprecation();
  }


  /** Construct a map/reduce job configuration.
   *
   * @param conf a Configuration whose settings will be inherited.
   * @param exampleClass a class whose containing jar is used as the job's jar.
   */
  public JobConf(Configuration conf, Class exampleClass) {
    this(conf);
    setJarByClass(exampleClass);
  }


  /** Construct a map/reduce configuration.
   *
   * @param config a Configuration-format XML job description file.
   */
  public JobConf(String config) {
    this(new Path(config));
  }

  /** Construct a map/reduce configuration.
   *
   * @param config a Configuration-format XML job description file.
   */
  public JobConf(Path config) {
    super();
    addResource(config);
    checkAndWarnDeprecation();
  }

  /** Construct a map/reduce configuration.
   *
   * @param config a Configuration-format XML job description file.
   */
  public JobConf(Path config, Path bigParamPath, FileSystem localFs, int threshold) {
    super();
    this.bigParamPath = bigParamPath;
    this.localFs = localFs;
    this.bigParamThreshold = threshold;
    addResource(config);
    checkAndWarnDeprecation();
  }

  /** A new map/reduce configuration where the behavior of reading from the
   * default resources can be turned off.
   * <p/>
   * If the parameter {@code loadDefaults} is false, the new instance
   * will not load resources from the default files.
   *
   * @param loadDefaults specifies whether to load from the default files
   */
  public JobConf(boolean loadDefaults) {
    super(loadDefaults);
    checkAndWarnDeprecation();
  }

  /**
   * Get the user jar for the map-reduce job.
   *
   * @return the user jar for the map-reduce job.
   */
  public String getJar() { return get("mapred.jar"); }

  /**
   * Set the user jar for the map-reduce job.
   *
   * @param jar the user jar for the map-reduce job.
   */
  public void setJar(String jar) { set("mapred.jar", jar); }

  /**
   * Set the job's jar file by finding an example class location.
   *
   * @param cls the example class.
   */
  public void setJarByClass(Class cls) {
    String jar = findContainingJar(cls);
    if (jar != null) {
      setJar(jar);
    }
  }

  public String[] getLocalDirs() throws IOException {
    return getStrings("mapred.local.dir");
  }

  public String getLogDir() {
    return get("mapred.tasktracker.log.dir");
  }

  /**
   * Use MRAsyncDiskService.moveAndDeleteAllVolumes instead.
   * @see org.apache.hadoop.util.MRAsyncDiskService#cleanupAllVolumes()
   */
  @Deprecated
  public void deleteLocalFiles() throws IOException {
    String[] localDirs = getLocalDirs();
    for (int i = 0; i < localDirs.length; i++) {
      FileSystem.getLocal(this).delete(new Path(localDirs[i]));
    }
  }

  public void deleteLocalFiles(String subdir) throws IOException {
    String[] localDirs = getLocalDirs();
    for (int i = 0; i < localDirs.length; i++) {
      FileSystem.getLocal(this).delete(new Path(localDirs[i], subdir));
    }
  }

  /**
   * Constructs a local file name. Files are distributed among configured
   * local directories.
   */
  public Path getLocalPath(String pathString) throws IOException {
    return getLocalPath("mapred.local.dir", pathString);
  }

  /**
   * Get the reported username for this job.
   *
   * @return the username
   */
  public String getUser() {
    return get("user.name");
  }

  /**
   * Set the reported username for this job.
   *
   * @param user the username for this job.
   */
  public void setUser(String user) {
    set("user.name", user);
  }



  /**
   * Set whether the framework should keep the intermediate files for
   * failed tasks.
   *
   * @param keep <code>true</code> if framework should keep the intermediate files
   *             for failed tasks, <code>false</code> otherwise.
   *
   */
  public void setKeepFailedTaskFiles(boolean keep) {
    setBoolean("keep.failed.task.files", keep);
  }

  /**
   * Should the temporary files for failed tasks be kept?
   *
   * @return should the files be kept?
   */
  public boolean getKeepFailedTaskFiles() {
    return getBoolean("keep.failed.task.files", false);
  }

  /**
   * Set a regular expression for task names that should be kept.
   * The regular expression ".*_m_000123_0" would keep the files
   * for the first instance of map 123 that ran.
   *
   * @param pattern the java.util.regex.Pattern to match against the
   *        task names.
   */
  public void setKeepTaskFilesPattern(String pattern) {
    set("keep.task.files.pattern", pattern);
  }

  /**
   * Get the regular expression that is matched against the task names
   * to see if we need to keep the files.
   *
   * @return the pattern as a string, if it was set, othewise null.
   */
  public String getKeepTaskFilesPattern() {
    return get("keep.task.files.pattern");
  }

  /**
   * Set the current working directory for the default file system.
   *
   * @param dir the new current working directory.
   */
  public void setWorkingDirectory(Path dir) {
    if (!dir.isAbsolute()) {
      FileSystem.LogForCollect
          .info("set job working directory to non absolute path: " + dir
              + " working directory: " + getWorkingDirectory());
    }
    dir = new Path(getWorkingDirectory(), dir);
    set("mapred.working.dir", dir.toString());
  }

  /**
   * Get the current working directory for the default file system.
   *
   * @return the directory name.
   */
  public Path getWorkingDirectory() {
    String name = get("mapred.working.dir");
    if (name != null) {
      return new Path(name);
    } else {
      try {
        Path dir = FileSystem.get(this).getWorkingDirectory();
        set("mapred.working.dir", dir.toString());
        return dir;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }

  /**
   * Sets the number of tasks that a spawned task JVM should run
   * before it exits
   * @param numTasks the number of tasks to execute; defaults to 1;
   * -1 signifies no limit
   */
  public void setNumTasksToExecutePerJvm(int numTasks) {
    setInt("mapred.job.reuse.jvm.num.tasks", numTasks);
  }

  /**
   * Get the number of tasks that a spawned JVM should execute
   */
  public int getNumTasksToExecutePerJvm() {
    return getInt("mapred.job.reuse.jvm.num.tasks", 1);
  }

  /**
   * Get the {@link InputFormat} implementation for the map-reduce job,
   * defaults to {@link TextInputFormat} if not specified explicity.
   *
   * @return the {@link InputFormat} implementation for the map-reduce job.
   */
  public InputFormat getInputFormat() {
    return ReflectionUtils.newInstance(getClass("mapred.input.format.class",
                                                             TextInputFormat.class,
                                                             InputFormat.class),
                                                    this);
  }

  /**
   * Set the {@link InputFormat} implementation for the map-reduce job.
   *
   * @param theClass the {@link InputFormat} implementation for the map-reduce
   *                 job.
   */
  public void setInputFormat(Class<? extends InputFormat> theClass) {
    setClass("mapred.input.format.class", theClass, InputFormat.class);
  }

  /**
   * Get the {@link OutputFormat} implementation for the map-reduce job,
   * defaults to {@link TextOutputFormat} if not specified explicity.
   *
   * @return the {@link OutputFormat} implementation for the map-reduce job.
   */
  public OutputFormat getOutputFormat() {
    return ReflectionUtils.newInstance(getClass("mapred.output.format.class",
                                                              TextOutputFormat.class,
                                                              OutputFormat.class),
                                                     this);
  }

  /**
   * Get the {@link OutputCommitter} implementation for the map-reduce job,
   * defaults to {@link FileOutputCommitter} if not specified explicitly.
   *
   * @return the {@link OutputCommitter} implementation for the map-reduce job.
   */
  public OutputCommitter getOutputCommitter() {
    return (OutputCommitter)ReflectionUtils.newInstance(
      getClass("mapred.output.committer.class", FileOutputCommitter.class,
               OutputCommitter.class), this);
  }

  /**
   * Set the {@link OutputCommitter} implementation for the map-reduce job.
   *
   * @param theClass the {@link OutputCommitter} implementation for the map-reduce
   *                 job.
   */
  public void setOutputCommitter(Class<? extends OutputCommitter> theClass) {
    setClass("mapred.output.committer.class", theClass, OutputCommitter.class);
  }

  /**
   * Set the {@link OutputFormat} implementation for the map-reduce job.
   *
   * @param theClass the {@link OutputFormat} implementation for the map-reduce
   *                 job.
   */
  public void setOutputFormat(Class<? extends OutputFormat> theClass) {
    setClass("mapred.output.format.class", theClass, OutputFormat.class);
  }

  /**
   * Should the map outputs be compressed before transfer?
   * Uses the SequenceFile compression.
   *
   * @param compress should the map outputs be compressed?
   */
  public void setCompressMapOutput(boolean compress) {
    setBoolean("mapred.compress.map.output", compress);
  }

  /**
   * Are the outputs of the maps be compressed?
   *
   * @return <code>true</code> if the outputs of the maps are to be compressed,
   *         <code>false</code> otherwise.
   */
  public boolean getCompressMapOutput() {
    return getBoolean("mapred.compress.map.output", false);
  }

  /**
   * Set the given class as the  {@link CompressionCodec} for the map outputs.
   *
   * @param codecClass the {@link CompressionCodec} class that will compress
   *                   the map outputs.
   */
  public void
  setMapOutputCompressorClass(Class<? extends CompressionCodec> codecClass) {
    setCompressMapOutput(true);
    setClass("mapred.map.output.compression.codec", codecClass,
             CompressionCodec.class);
  }

  /**
   * Get the {@link CompressionCodec} for compressing the map outputs.
   *
   * @param defaultValue the {@link CompressionCodec} to return if not set
   * @return the {@link CompressionCodec} class that should be used to compress the
   *         map outputs.
   * @throws IllegalArgumentException if the class was specified, but not found
   */
  public Class<? extends CompressionCodec>
  getMapOutputCompressorClass(Class<? extends CompressionCodec> defaultValue) {
    Class<? extends CompressionCodec> codecClass = defaultValue;
    String name = get("mapred.map.output.compression.codec");
    if (name != null) {
      try {
        codecClass = getClassByName(name).asSubclass(CompressionCodec.class);
      } catch (ClassNotFoundException e) {
        throw new IllegalArgumentException("Compression codec " + name +
                                           " was not found.", e);
      }
    }
    return codecClass;
  }

  /**
   * Get the key class for the map output data. If it is not set, use the
   * (final) output key class. This allows the map output key class to be
   * different than the final output key class.
   *
   * @return the map output key class.
   */
  public Class<?> getMapOutputKeyClass() {
    Class<?> retv = getClass("mapred.mapoutput.key.class", null, Object.class);
    if (retv == null) {
      retv = getOutputKeyClass();
    }
    return retv;
  }

  /**
   * Set the key class for the map output data. This allows the user to
   * specify the map output key class to be different than the final output
   * value class.
   *
   * @param theClass the map output key class.
   */
  public void setMapOutputKeyClass(Class<?> theClass) {
    setClass("mapred.mapoutput.key.class", theClass, Object.class);
  }

  /**
   * Get the value class for the map output data. If it is not set, use the
   * (final) output value class This allows the map output value class to be
   * different than the final output value class.
   *
   * @return the map output value class.
   */
  public Class<?> getMapOutputValueClass() {
    Class<?> retv = getClass("mapred.mapoutput.value.class", null,
        Object.class);
    if (retv == null) {
      retv = getOutputValueClass();
    }
    return retv;
  }

  /**
   * Set the value class for the map output data. This allows the user to
   * specify the map output value class to be different than the final output
   * value class.
   *
   * @param theClass the map output value class.
   */
  public void setMapOutputValueClass(Class<?> theClass) {
    setClass("mapred.mapoutput.value.class", theClass, Object.class);
  }

  /**
   * Get the key class for the job output data.
   *
   * @return the key class for the job output data.
   */
  public Class<?> getOutputKeyClass() {
    return getClass("mapred.output.key.class",
                    LongWritable.class, Object.class);
  }

  /**
   * Set the key class for the job output data.
   *
   * @param theClass the key class for the job output data.
   */
  public void setOutputKeyClass(Class<?> theClass) {
    setClass("mapred.output.key.class", theClass, Object.class);
  }

  /**
   * Get the {@link RawComparator} comparator used to compare keys.
   *
   * @return the {@link RawComparator} comparator used to compare keys.
   */
  public RawComparator getOutputKeyComparator() {
    Class<? extends RawComparator> theClass = getClass("mapred.output.key.comparator.class",
	        null, RawComparator.class);
    if (theClass != null)
      return ReflectionUtils.newInstance(theClass, this);
    return WritableComparator.get(getMapOutputKeyClass().asSubclass(WritableComparable.class));
  }

  /**
   * Set the {@link RawComparator} comparator used to compare keys.
   *
   * @param theClass the {@link RawComparator} comparator used to
   *                 compare keys.
   * @see #setOutputValueGroupingComparator(Class)
   */
  public void setOutputKeyComparatorClass(Class<? extends RawComparator> theClass) {
    setClass("mapred.output.key.comparator.class",
             theClass, RawComparator.class);
  }

  /**
   * Set the {@link KeyFieldBasedComparator} options used to compare keys.
   *
   * @param keySpec the key specification of the form -k pos1[,pos2], where,
   *  pos is of the form f[.c][opts], where f is the number
   *  of the key field to use, and c is the number of the first character from
   *  the beginning of the field. Fields and character posns are numbered
   *  starting with 1; a character position of zero in pos2 indicates the
   *  field's last character. If '.c' is omitted from pos1, it defaults to 1
   *  (the beginning of the field); if omitted from pos2, it defaults to 0
   *  (the end of the field). opts are ordering options. The supported options
   *  are:
   *    -n, (Sort numerically)
   *    -r, (Reverse the result of comparison)
   */
  public void setKeyFieldComparatorOptions(String keySpec) {
    setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
    set("mapred.text.key.comparator.options", keySpec);
  }

  /**
   * Get the {@link KeyFieldBasedComparator} options
   */
  public String getKeyFieldComparatorOption() {
    return get("mapred.text.key.comparator.options");
  }

  /**
   * Set the {@link KeyFieldBasedPartitioner} options used for
   * {@link Partitioner}
   *
   * @param keySpec the key specification of the form -k pos1[,pos2], where,
   *  pos is of the form f[.c][opts], where f is the number
   *  of the key field to use, and c is the number of the first character from
   *  the beginning of the field. Fields and character posns are numbered
   *  starting with 1; a character position of zero in pos2 indicates the
   *  field's last character. If '.c' is omitted from pos1, it defaults to 1
   *  (the beginning of the field); if omitted from pos2, it defaults to 0
   *  (the end of the field).
   */
  public void setKeyFieldPartitionerOptions(String keySpec) {
    setPartitionerClass(KeyFieldBasedPartitioner.class);
    set("mapred.text.key.partitioner.options", keySpec);
  }

  /**
   * Get the {@link KeyFieldBasedPartitioner} options
   */
  public String getKeyFieldPartitionerOption() {
    return get("mapred.text.key.partitioner.options");
  }

  /**
   * Get the user defined {@link WritableComparable} comparator for
   * grouping keys of inputs to the reduce.
   *
   * @return comparator set by the user for grouping values.
   * @see #setOutputValueGroupingComparator(Class) for details.
   */
  public RawComparator getOutputValueGroupingComparator() {
    Class<? extends RawComparator> theClass = getClass("mapred.output.value.groupfn.class", null,
        RawComparator.class);
    if (theClass == null) {
      return getOutputKeyComparator();
    }

    return ReflectionUtils.newInstance(theClass, this);
  }

  /**
   * Set the user defined {@link RawComparator} comparator for
   * grouping keys in the input to the reduce.
   *
   * <p>This comparator should be provided if the equivalence rules for keys
   * for sorting the intermediates are different from those for grouping keys
   * before each call to
   * {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
   *
   * <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
   * in a single call to the reduce function if K1 and K2 compare as equal.</p>
   *
   * <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
   * how keys are sorted, this can be used in conjunction to simulate
   * <i>secondary sort on values</i>.</p>
   *
   * <p><i>Note</i>: This is not a guarantee of the reduce sort being
   * <i>stable</i> in any sense. (In any case, with the order of available
   * map-outputs to the reduce being non-deterministic, it wouldn't make
   * that much sense.)</p>
   *
   * @param theClass the comparator class to be used for grouping keys.
   *                 It should implement <code>RawComparator</code>.
   * @see #setOutputKeyComparatorClass(Class)
   */
  public void setOutputValueGroupingComparator(
		  Class<? extends RawComparator> theClass) {
    setClass("mapred.output.value.groupfn.class",
             theClass, RawComparator.class);
  }

  /**
   * Should the framework use the new context-object code for running
   * the mapper?
   * @return true, if the new api should be used
   */
  public boolean getUseNewMapper() {
    return getBoolean("mapred.mapper.new-api", false);
  }
  /**
   * Set whether the framework should use the new api for the mapper.
   * This is the default for jobs submitted with the new Job api.
   * @param flag true, if the new api should be used
   */
  public void setUseNewMapper(boolean flag) {
    setBoolean("mapred.mapper.new-api", flag);
  }

  /**
   * Should the framework use the new context-object code for running
   * the reducer?
   * @return true, if the new api should be used
   */
  public boolean getUseNewReducer() {
    return getBoolean("mapred.reducer.new-api", false);
  }
  /**
   * Set whether the framework should use the new api for the reducer.
   * This is the default for jobs submitted with the new Job api.
   * @param flag true, if the new api should be used
   */
  public void setUseNewReducer(boolean flag) {
    setBoolean("mapred.reducer.new-api", flag);
  }

  /**
   * Get the value class for job outputs.
   *
   * @return the value class for job outputs.
   */
  public Class<?> getOutputValueClass() {
    return getClass("mapred.output.value.class", Text.class, Object.class);
  }

  /**
   * Set the value class for job outputs.
   *
   * @param theClass the value class for job outputs.
   */
  public void setOutputValueClass(Class<?> theClass) {
    setClass("mapred.output.value.class", theClass, Object.class);
  }

  /**
   * Get the {@link Mapper} class for the job.
   *
   * @return the {@link Mapper} class for the job.
   */
  public Class<? extends Mapper> getMapperClass() {
    return getClass("mapred.mapper.class", IdentityMapper.class, Mapper.class);
  }

  /**
   * Set the {@link Mapper} class for the job.
   *
   * @param theClass the {@link Mapper} class for the job.
   */
  public void setMapperClass(Class<? extends Mapper> theClass) {
    setClass("mapred.mapper.class", theClass, Mapper.class);
  }

  /**
   * Get the {@link MapRunnable} class for the job.
   *
   * @return the {@link MapRunnable} class for the job.
   */
  public Class<? extends MapRunnable> getMapRunnerClass() {
    return getClass("mapred.map.runner.class",
                    MapRunner.class, MapRunnable.class);
  }

  /**
   * Expert: Set the {@link MapRunnable} class for the job.
   *
   * Typically used to exert greater control on {@link Mapper}s.
   *
   * @param theClass the {@link MapRunnable} class for the job.
   */
  public void setMapRunnerClass(Class<? extends MapRunnable> theClass) {
    setClass("mapred.map.runner.class", theClass, MapRunnable.class);
  }

  /**
   * Get the {@link Partitioner} used to partition {@link Mapper}-outputs
   * to be sent to the {@link Reducer}s.
   *
   * @return the {@link Partitioner} used to partition map-outputs.
   */
  public Class<? extends Partitioner> getPartitionerClass() {
    return getClass("mapred.partitioner.class",
                    HashPartitioner.class, Partitioner.class);
  }

  /**
   * Set the {@link Partitioner} class used to partition
   * {@link Mapper}-outputs to be sent to the {@link Reducer}s.
   *
   * @param theClass the {@link Partitioner} used to partition map-outputs.
   */
  public void setPartitionerClass(Class<? extends Partitioner> theClass) {
    setClass("mapred.partitioner.class", theClass, Partitioner.class);
  }

  /**
   * Get the {@link Reducer} class for the job.
   *
   * @return the {@link Reducer} class for the job.
   */
  public Class<? extends Reducer> getReducerClass() {
    return getClass("mapred.reducer.class",
                    IdentityReducer.class, Reducer.class);
  }

  /**
   * Set the {@link Reducer} class for the job.
   *
   * @param theClass the {@link Reducer} class for the job.
   */
  public void setReducerClass(Class<? extends Reducer> theClass) {
    setClass("mapred.reducer.class", theClass, Reducer.class);
  }

  /**
   * Get the user-defined <i>combiner</i> class used to combine map-outputs
   * before being sent to the reducers. Typically the combiner is same as the
   * the {@link Reducer} for the job i.e. {@link #getReducerClass()}.
   *
   * @return the user-defined combiner class used to combine map-outputs.
   */
  public Class<? extends Reducer> getCombinerClass() {
    return getClass("mapred.combiner.class", null, Reducer.class);
  }

  /**
   * Set the user-defined <i>combiner</i> class used to combine map-outputs
   * before being sent to the reducers.
   *
   * <p>The combiner is an application-specified aggregation operation, which
   * can help cut down the amount of data transferred between the
   * {@link Mapper} and the {@link Reducer}, leading to better performance.</p>
   *
   * <p>The framework may invoke the combiner 0, 1, or multiple times, in both
   * the mapper and reducer tasks. In general, the combiner is called as the
   * sort/merge result is written to disk. The combiner must:
   * <ul>
   *   <li> be side-effect free</li>
   *   <li> have the same input and output key types and the same input and
   *        output value types</li>
   * </ul></p>
   *
   * <p>Typically the combiner is same as the <code>Reducer</code> for the
   * job i.e. {@link #setReducerClass(Class)}.</p>
   *
   * @param theClass the user-defined combiner class used to combine
   *                 map-outputs.
   */
  public void setCombinerClass(Class<? extends Reducer> theClass) {
    setClass("mapred.combiner.class", theClass, Reducer.class);
  }

  /**
   * Should speculative execution be used for this job?
   * Defaults to <code>true</code>.
   *
   * @return <code>true</code> if speculative execution be used for this job,
   *         <code>false</code> otherwise.
   */
  public boolean getSpeculativeExecution() {
    return (getMapSpeculativeExecution() || getReduceSpeculativeExecution());
  }

  /**
   * Turn speculative execution on or off for this job.
   *
   * @param speculativeExecution <code>true</code> if speculative execution
   *                             should be turned on, else <code>false</code>.
   */
  public void setSpeculativeExecution(boolean speculativeExecution) {
    setMapSpeculativeExecution(speculativeExecution);
    setReduceSpeculativeExecution(speculativeExecution);
  }

  /**
   * Should speculative execution be used for this job for map tasks?
   * Defaults to <code>true</code>.
   *
   * @return <code>true</code> if speculative execution be
   *                           used for this job for map tasks,
   *         <code>false</code> otherwise.
   */
  public boolean getMapSpeculativeExecution() {
    return getBoolean("mapred.map.tasks.speculative.execution", true);
  }

  /**
   * Turn speculative execution on or off for this job for map tasks.
   *
   * @param speculativeExecution <code>true</code> if speculative execution
   *                             should be turned on for map tasks,
   *                             else <code>false</code>.
   */
  public void setMapSpeculativeExecution(boolean speculativeExecution) {
    setBoolean("mapred.map.tasks.speculative.execution", speculativeExecution);
  }

  /**
   * Should speculative execution be used for this job for reduce tasks?
   * Defaults to <code>true</code>.
   *
   * @return <code>true</code> if speculative execution be used
   *                           for reduce tasks for this job,
   *         <code>false</code> otherwise.
   */
  public boolean getReduceSpeculativeExecution() {
    return getBoolean("mapred.reduce.tasks.speculative.execution", true);
  }

  /**
   * Turn speculative execution on or off for this job for reduce tasks.
   *
   * @param speculativeExecution <code>true</code> if speculative execution
   *                             should be turned on for reduce tasks,
   *                             else <code>false</code>.
   */
  public void setReduceSpeculativeExecution(boolean speculativeExecution) {
    setBoolean("mapred.reduce.tasks.speculative.execution",
               speculativeExecution);
  }

  /**
   * Get time to wait before invoking speculative execution for maps.
   */
  public long getMapSpeculativeLag() {
    return getLong("mapred.speculative.map.lag", 60 * 1000);
  }

  /**
   * Set time to wait before invoking speculative execution for maps.
   *
   * @param mapSpeculativeLag New value for speculative lag.
   */
  public void setMapSpeculativeLag(long mapSpeculativeLag) {
    set("mapred.speculative.map.lag", "" + mapSpeculativeLag);
  }

  /**
   * Get time to wait before invoking speculative execution for reduces.
   */
  public long getReduceSpeculativeLag() {
    return getLong("mapred.speculative.reduce.lag", 60 * 1000);
  }

  /**
   * Set time to wait before invoking speculative execution for reduces.
   *
   * @param reduceSpeculativeLag New value for speculative lag.
   */
  public void setReduceSpeculativeLag(long reduceSpeculativeLag) {
    set("mapred.speculative.reduce.lag", "" + reduceSpeculativeLag);
  }

  /**
   * Set minimum projected task duration in seconds
   * before invoking speculative execution on mappers
   *
   * @param mapSpeculativeDuration New value for speculative duration
   */
  public void setMapSpeculativeDuration(long mapSpeculativeDuration) {
    set("mapred.speculative.map.duration", "" + mapSpeculativeDuration);
  }

  /**
   * Set minimum projected task duration in seconds
   * before invoking speculative execution on reducers
   *
   * @param reduceSpeculativeDuration New value for speculative duration
   */
  public void setReduceSpeculativeDuration(long reduceSpeculativeDuration) {
    set("mapred.speculative.reduce.duration", "" + reduceSpeculativeDuration);
  }


  /**
   * Get minimum projected task duration in seconds
   * before invoking speculative execution on mappers
   *
   * Disabled by default
   */
  public long getMapSpeculativeDuration() {
    return getLong("mapred.speculative.map.duration", 0L);
  }

  /**
   * Get minimum projected task duration in seconds
   * before invoking speculative execution on reducers
   *
   * Disabled by default
   */
  public long getReduceSpeculativeDuration() {
    return getLong("mapred.speculative.reduce.duration", 0L);
  }

  /**
   * Get configured the number of reduce tasks for this job.
   * Defaults to <code>1</code>.
   *
   * @return the number of reduce tasks for this job.
   */
  public int getNumMapTasks() { return getInt("mapred.map.tasks", 1); }

  /**
   * Set the number of map tasks for this job.
   *
   * <p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual
   * number of spawned map tasks depends on the number of {@link InputSplit}s
   * generated by the job's {@link InputFormat#getSplits(JobConf, int)}.
   *
   * A custom {@link InputFormat} is typically used to accurately control
   * the number of map tasks for the job.</p>
   *
   * <h4 id="NoOfMaps">How many maps?</h4>
   *
   * <p>The number of maps is usually driven by the total size of the inputs
   * i.e. total number of blocks of the input files.</p>
   *
   * <p>The right level of parallelism for maps seems to be around 10-100 maps
   * per-node, although it has been set up to 300 or so for very cpu-light map
   * tasks. Task setup takes awhile, so it is best if the maps take at least a
   * minute to execute.</p>
   *
   * <p>The default behavior of file-based {@link InputFormat}s is to split the
   * input into <i>logical</i> {@link InputSplit}s based on the total size, in
   * bytes, of input files. However, the {@link FileSystem} blocksize of the
   * input files is treated as an upper bound for input splits. A lower bound
   * on the split size can be set via
   * <a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
   * mapred.min.split.size</a>.</p>
   *
   * <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB,
   * you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is
   * used to set it even higher.</p>
   *
   * @param n the number of map tasks for this job.
   * @see InputFormat#getSplits(JobConf, int)
   * @see FileInputFormat
   * @see FileSystem#getDefaultBlockSize()
   * @see FileStatus#getBlockSize()
   */
  public void setNumMapTasks(int n) { setInt("mapred.map.tasks", n); }

  /**
   * Get configured the number of reduce tasks for this job. Defaults to
   * <code>1</code>.
   *
   * @return the number of reduce tasks for this job.
   */
  public int getNumReduceTasks() { return getInt("mapred.reduce.tasks", 1); }

  /**
   * Set the requisite number of reduce tasks for this job.
   *
   * <h4 id="NoOfReduces">How many reduces?</h4>
   *
   * <p>The right number of reduces seems to be <code>0.95</code> or
   * <code>1.75</code> multiplied by (<<i>no. of nodes</i>> *
   * <a href="{@docRoot}/../mapred-default.html#mapred.tasktracker.reduce.tasks.maximum">
   * mapred.tasktracker.reduce.tasks.maximum</a>).
   * </p>
   *
   * <p>With <code>0.95</code> all of the reduces can launch immediately and
   * start transfering map outputs as the maps finish. With <code>1.75</code>
   * the faster nodes will finish their first round of reduces and launch a
   * second wave of reduces doing a much better job of load balancing.</p>
   *
   * <p>Increasing the number of reduces increases the framework overhead, but
   * increases load balancing and lowers the cost of failures.</p>
   *
   * <p>The scaling factors above are slightly less than whole numbers to
   * reserve a few reduce slots in the framework for speculative-tasks, failures
   * etc.</p>
   *
   * <h4 id="ReducerNone">Reducer NONE</h4>
   *
   * <p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p>
   *
   * <p>In this case the output of the map-tasks directly go to distributed
   * file-system, to the path set by
   * {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the
   * framework doesn't sort the map-outputs before writing it out to HDFS.</p>
   *
   * @param n the number of reduce tasks for this job.
   */
  public void setNumReduceTasks(int n) { setInt("mapred.reduce.tasks", n); }


  /**
   * Specify whether job-setup and job-cleanup is needed for the job
   *
   * @param needed If <code>true</code>, job-setup and job-cleanup will be
   *               considered from {@link OutputCommitter}
   *               else ignored.
   */
  public void setJobSetupCleanupNeeded(boolean needed) {
    setBoolean("mapred.committer.job.setup.cleanup.needed", needed);
  }



  /**
   * Get whether job-setup and job-cleanup is needed for the job
   *
   * @return boolean
   */
  public boolean getJobSetupCleanupNeeded() {
    return getBoolean("mapred.committer.job.setup.cleanup.needed", true);
  }

  /**
   * Get whether job should finish when reduces are done. The unfinished
   * mappers will be killed.
   * @return true If the job finish when reduces are done
   */
  public boolean getJobFinishWhenReducesDone() {
    return getBoolean(MAPRED_JOB_FINISH_WHEN_REDUCES_DONE, false);
  }
  
  /**
   * Specify whether task-cleanup is needed for the job
   *
   * @param needed If <code>true</code>, task-cleanup will be considered
   *               from {@link OutputCommitter} else ignored.
   */
  public void setTaskCleanupNeeded(boolean needed) {
    setBoolean("mapred.committer.task.cleanup.needed", needed);
  }

  /**
   * Get whether task-cleanup is needed for the job
   * The purpose of the task-cleanup task is to perform OutputCommitter.abort().
   * If there is no need to run this method, we can disable task-cleanup to
   * improve latency.
   * 
   * @return boolean
   */
  public boolean getTaskCleanupNeeded() {
    return getBoolean("mapred.committer.task.cleanup.needed", true);
  }

  /**
   * Get the configured number of maximum attempts that will be made to run a
   * map task, as specified by the <code>mapred.map.max.attempts</code>
   * property. If this property is not already set, the default is 4 attempts.
   *
   * @return the max number of attempts per map task.
   */
  public int getMaxMapAttempts() {
    return getInt("mapred.map.max.attempts", 4);
  }

  /**
   * Expert: Set the number of maximum attempts that will be made to run a
   * map task.
   *
   * @param n the number of attempts per map task.
   */
  public void setMaxMapAttempts(int n) {
    setInt("mapred.map.max.attempts", n);
  }

  /**
   * Get the configured number of maximum attempts  that will be made to run a
   * reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
   * property. If this property is not already set, the default is 4 attempts.
   *
   * @return the max number of attempts per reduce task.
   */
  public int getMaxReduceAttempts() {
    return getInt("mapred.reduce.max.attempts", 4);
  }
  /**
   * Expert: Set the number of maximum attempts that will be made to run a
   * reduce task.
   *
   * @param n the number of attempts per reduce task.
   */
  public void setMaxReduceAttempts(int n) {
    setInt("mapred.reduce.max.attempts", n);
  }

  /**
   * Get the user-specified job name. This is only used to identify the
   * job to the user.
   *
   * @return the job's name, defaulting to "".
   */
  public String getJobName() {
    return get("mapred.job.name", "");
  }

  /**
   * Set the user-specified job name.
   *
   * @param name the job's new name.
   */
  public void setJobName(String name) {
    set("mapred.job.name", name);
  }

  /**
   * Get the user-specified session identifier. The default is the empty string.
   *
   * The session identifier is used to tag metric data that is reported to some
   * performance metrics system via the org.apache.hadoop.metrics API.  The
   * session identifier is intended, in particular, for use by Hadoop-On-Demand
   * (HOD) which allocates a virtual Hadoop cluster dynamically and transiently.
   * HOD will set the session identifier by modifying the mapred-site.xml file
   * before starting the cluster.
   *
   * When not running under HOD, this identifer is expected to remain set to
   * the empty string.
   *
   * @return the session identifier, defaulting to "".
   */
  public String getSessionId() {
      return get("session.id", "");
  }

  /**
   * Set the user-specified session identifier.
   *
   * @param sessionId the new session id.
   */
  public void setSessionId(String sessionId) {
      set("session.id", sessionId);
  }

  /**
   * Set the maximum no. of failures of a given job per tasktracker.
   * If the no. of task failures exceeds <code>noFailures</code>, the
   * tasktracker is <i>blacklisted</i> for this job.
   *
   * @param noFailures maximum no. of failures of a given job per tasktracker.
   */
  public void setMaxTaskFailuresPerTracker(int noFailures) {
    setInt("mapred.max.tracker.failures", noFailures);
  }

  /**
   * Expert: Get the maximum no. of failures of a given job per tasktracker.
   * If the no. of task failures exceeds this, the tasktracker is
   * <i>blacklisted</i> for this job.
   *
   * @return the maximum no. of failures of a given job per tasktracker.
   */
  public int getMaxTaskFailuresPerTracker() {
    return getInt(MAPRED_MAX_TRACKER_FAILURES_PROPERTY, 4);
  }
  
  /** The Child class used by TaskRunner */
  public static final String TASK_RUNNER_CHILD_CLASS_CONF =
      "mapred.taskrunner.child.class";

  /** Default Child class used by TaskRunner */
  public static final String TASK_RUNNER_CHILD_CLASS_DEFAULT =
      Child.class.getName();

  /**
   * Returns name of Child class used by TaskRunner
   * @return name of class
   */
  public String getTaskRunnerChildClassName() {
    return get(TASK_RUNNER_CHILD_CLASS_CONF, TASK_RUNNER_CHILD_CLASS_DEFAULT);
  }

  /**
   * Get the maximum percentage of map tasks that can fail without
   * the job being aborted.
   *
   * Each map task is executed a minimum of {@link #getMaxMapAttempts()}
   * attempts before being declared as <i>failed</i>.
   *
   * Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in
   * the job being declared as {@link JobStatus#FAILED}.
   *
   * @return the maximum percentage of map tasks that can fail without
   *         the job being aborted.
   */
  public int getMaxMapTaskFailuresPercent() {
    return getInt("mapred.max.map.failures.percent", 0);
  }

  /**
   * Expert: Set the maximum percentage of map tasks that can fail without the
   * job being aborted.
   *
   * Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts
   * before being declared as <i>failed</i>.
   *
   * @param percent the maximum percentage of map tasks that can fail without
   *                the job being aborted.
   */
  public void setMaxMapTaskFailuresPercent(int percent) {
    setInt("mapred.max.map.failures.percent", percent);
  }

  /**
   * Get the maximum percentage of reduce tasks that can fail without
   * the job being aborted.
   *
   * Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
   * attempts before being declared as <i>failed</i>.
   *
   * Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results
   * in the job being declared as {@link JobStatus#FAILED}.
   *
   * @return the maximum percentage of reduce tasks that can fail without
   *         the job being aborted.
   */
  public int getMaxReduceTaskFailuresPercent() {
    return getInt("mapred.max.reduce.failures.percent", 0);
  }

  /**
   * Set the maximum percentage of reduce tasks that can fail without the job
   * being aborted.
   *
   * Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
   * attempts before being declared as <i>failed</i>.
   *
   * @param percent the maximum percentage of reduce tasks that can fail without
   *                the job being aborted.
   */
  public void setMaxReduceTaskFailuresPercent(int percent) {
    setInt("mapred.max.reduce.failures.percent", percent);
  }

  /**
   * Set {@link JobPriority} for this job.
   *
   * @param prio the {@link JobPriority} for this job.
   */
  public void setJobPriority(JobPriority prio) {
    set("mapred.job.priority", prio.toString());
  }

  /**
   * Get the {@link JobPriority} for this job.
   *
   * @return the {@link JobPriority} for this job.
   */
  public JobPriority getJobPriority() {
    String prio = get("mapred.job.priority");
    if(prio == null) {
      return JobPriority.NORMAL;
    }

    return JobPriority.valueOf(prio);
  }

  /**
   * Get whether a custom runner is enabled.
   * @return true if a custom runner is enabled.
   */
  public boolean getCustomRunnerEnabled() {
    return getBoolean("mapred.taskrunner.custom.runner.enable", false);
  }

  /**
   * Set whether a custom runner is enabled.
   * @param newValue if a custom runner is enabled.
   */
  public void setCustomRunnerEnabled(boolean newValue) {
    setBoolean("mapred.taskrunner.custom.runner.enable", newValue);
  }

  /**
   * Get the custom runner command.
   * Called if {@link org.apache.hadoop.mapred.JobConf#getCustomRunnerCommand()} returns true.
   * This command will be used instead of ${java.home}/bin/java as the runner.
   * @return The custom runner command.
   */
  public String getCustomRunnerCommand() {
    return get("mapred.taskrunner.custom.runner");
  }

  /**
   * Get the task ranges for which a custom runner is needed.
   * Called if {@link org.apache.hadoop.mapred.JobConf#getCustomRunnerCommand()} returns true.
   * @return The ranges of tasks.
   */
  public IntegerRanges getCustomRunnerTaskRange(boolean isMap) {
    String param = isMap ? "mapred.taskrunner.custom.runner.maps" :
      "mapred.taskrunner.custom.runner.reduces";
    return getRange(param,  "0-2");
  }

  /**
   * Get whether the task profiling is enabled.
   * @return true if some tasks will be profiled
   */
  public boolean getProfileEnabled() {
    return getBoolean("mapred.task.profile", false);
  }

  /**
   * Set whether the system should collect profiler information for some of
   * the tasks in this job? The information is stored in the user log
   * directory.
   * @param newValue true means it should be gathered
   */
  public void setProfileEnabled(boolean newValue) {
    setBoolean("mapred.task.profile", newValue);
  }

  /**
   * Get the profiler configuration arguments.
   *
   * The default value for this property is
   * "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
   *
   * @return the parameters to pass to the task child to configure profiling
   */
  public String getProfileParams() {
    return get("mapred.task.profile.params",
               "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y," +
                 "verbose=n,file=%s");
  }

  /**
   * Set the profiler configuration arguments. If the string contains a '%s' it
   * will be replaced with the name of the profiling output file when the task
   * runs.
   *
   * This value is passed to the task child JVM on the command line.
   *
   * @param value the configuration string
   */
  public void setProfileParams(String value) {
    set("mapred.task.profile.params", value);
  }

  /**
   * Get the range of maps or reduces to profile.
   * @param isMap is the task a map?
   * @return the task ranges
   */
  public IntegerRanges getProfileTaskRange(boolean isMap) {
    return getRange((isMap ? "mapred.task.profile.maps" :
                       "mapred.task.profile.reduces"), "0-2");
  }

  /**
   * Set the ranges of maps or reduces to profile. setProfileEnabled(true)
   * must also be called.
   * @param newValue a set of integer ranges of the map ids
   */
  public void setProfileTaskRange(boolean isMap, String newValue) {
    // parse the value to make sure it is legal
    new Configuration.IntegerRanges(newValue);
    set((isMap ? "mapred.task.profile.maps" : "mapred.task.profile.reduces"),
        newValue);
  }

  /**
   * Set the debug script to run when the map tasks fail.
   *
   * <p>The debug script can aid debugging of failed map tasks. The script is
   * given task's stdout, stderr, syslog, jobconf files as arguments.</p>
   *
   * <p>The debug command, run on the node where the map failed, is:</p>
   * <p><pre><blockquote>
   * $script $stdout $stderr $syslog $jobconf.
   * </blockquote></pre></p>
   *
   * <p> The script file is distributed through {@link DistributedCache}
   * APIs. The script needs to be symlinked. </p>
   *
   * <p>Here is an example on how to submit a script
   * <p><blockquote><pre>
   * job.setMapDebugScript("./myscript");
   * DistributedCache.createSymlink(job);
   * DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
   * </pre></blockquote></p>
   *
   * @param mDbgScript the script name
   */
  public void  setMapDebugScript(String mDbgScript) {
    set("mapred.map.task.debug.script", mDbgScript);
  }

  /**
   * Get the map task's debug script.
   *
   * @return the debug Script for the mapred job for failed map tasks.
   * @see #setMapDebugScript(String)
   */
  public String getMapDebugScript() {
    return get("mapred.map.task.debug.script");
  }

  /**
   * Set the debug script to run when the reduce tasks fail.
   *
   * <p>The debug script can aid debugging of failed reduce tasks. The script
   * is given task's stdout, stderr, syslog, jobconf files as arguments.</p>
   *
   * <p>The debug command, run on the node where the map failed, is:</p>
   * <p><pre><blockquote>
   * $script $stdout $stderr $syslog $jobconf.
   * </blockquote></pre></p>
   *
   * <p> The script file is distributed through {@link DistributedCache}
   * APIs. The script file needs to be symlinked </p>
   *
   * <p>Here is an example on how to submit a script
   * <p><blockquote><pre>
   * job.setReduceDebugScript("./myscript");
   * DistributedCache.createSymlink(job);
   * DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
   * </pre></blockquote></p>
   *
   * @param rDbgScript the script name
   */
  public void  setReduceDebugScript(String rDbgScript) {
    set("mapred.reduce.task.debug.script", rDbgScript);
  }

  /**
   * Get the reduce task's debug Script
   *
   * @return the debug script for the mapred job for failed reduce tasks.
   * @see #setReduceDebugScript(String)
   */
  public String getReduceDebugScript() {
    return get("mapred.reduce.task.debug.script");
  }

  /**
   * Get the uri to be invoked in-order to send a notification after the job
   * has completed (success/failure).
   *
   * @return the job end notification uri, <code>null</code> if it hasn't
   *         been set.
   * @see #setJobEndNotificationURI(String)
   */
  public String getJobEndNotificationURI() {
    return get("job.end.notification.url");
  }

  /**
   * Set the uri to be invoked in-order to send a notification after the job
   * has completed (success/failure).
   *
   * <p>The uri can contain 2 special parameters: <tt>$jobId</tt> and
   * <tt>$jobStatus</tt>. Those, if present, are replaced by the job's
   * identifier and completion-status respectively.</p>
   *
   * <p>This is typically used by application-writers to implement chaining of
   * Map-Reduce jobs in an <i>asynchronous manner</i>.</p>
   *
   * @param uri the job end notification uri
   * @see JobStatus
   * @see <a href="{@docRoot}/org/apache/hadoop/mapred/JobClient.html#
   *       JobCompletionAndChaining">Job Completion and Chaining</a>
   */
  public void setJobEndNotificationURI(String uri) {
    set("job.end.notification.url", uri);
  }

  /**
   * Get job-specific shared directory for use as scratch space
   *
   * <p>
   * When a job starts, a shared directory is created at location
   * <code>
   * ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ </code>.
   * This directory is exposed to the users through
   * <code>job.local.dir </code>.
   * So, the tasks can use this space
   * as scratch space and share files among them. </p>
   * This value is available as System property also.
   *
   * @return The localized job specific shared directory
   */
  public String getJobLocalDir() {
    return get("job.local.dir");
  }

  /**
   * Get memory required to run a map task of the job, in MB.
   *
   * If a value is specified in the configuration, it is returned.
   * Else, it returns {@link #DISABLED_MEMORY_LIMIT}.
   * <p/>
   * For backward compatibility, if the job configuration sets the
   * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
   * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
   * after converting it from bytes to MB.
   * @return memory required to run a map task of the job, in MB,
   *          or {@link #DISABLED_MEMORY_LIMIT} if unset.
   */
  public long getMemoryForMapTask() {
    long value = getDeprecatedMemoryValue();
    if (value == DISABLED_MEMORY_LIMIT) {
      value = normalizeMemoryConfigValue(
                getLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY,
                          DISABLED_MEMORY_LIMIT));
    }
    return value;
  }

  public void setMemoryForMapTask(long mem) {
    setLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, mem);
  }

  /**
   * Get memory required to run a reduce task of the job, in MB.
   *
   * If a value is specified in the configuration, it is returned.
   * Else, it returns {@link #DISABLED_MEMORY_LIMIT}.
   * <p/>
   * For backward compatibility, if the job configuration sets the
   * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
   * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
   * after converting it from bytes to MB.
   * @return memory required to run a reduce task of the job, in MB,
   *          or {@link #DISABLED_MEMORY_LIMIT} if unset.
   */
  public long getMemoryForReduceTask() {
    long value = getDeprecatedMemoryValue();
    if (value == DISABLED_MEMORY_LIMIT) {
      value = normalizeMemoryConfigValue(
                getLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY,
                        DISABLED_MEMORY_LIMIT));
    }
    return value;
    }

  // Return the value set to the key MAPRED_TASK_MAXVMEM_PROPERTY,
  // converted into MBs.
  // Returns DISABLED_MEMORY_LIMIT if unset, or set to a negative
  // value.
  private long getDeprecatedMemoryValue() {
    long oldValue = getLong(MAPRED_TASK_MAXVMEM_PROPERTY,
      DISABLED_MEMORY_LIMIT);
    oldValue = normalizeMemoryConfigValue(oldValue);
    if (oldValue != DISABLED_MEMORY_LIMIT) {
      oldValue /= (1024*1024);
    }
    return oldValue;
  }

  public void setMemoryForReduceTask(long mem) {
    setLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, mem);
  }

  /**
   * Return the name of the queue to which this job is submitted.
   * Defaults to 'default'.
   *
   * @return name of the queue
   */
  public String getQueueName() {
    return get("mapred.job.queue.name", DEFAULT_QUEUE_NAME);
  }

  /**
   * Set the name of the queue to which this job should be submitted.
   *
   * @param queueName Name of the queue
   */
  public void setQueueName(String queueName) {
    set("mapred.job.queue.name", queueName);
  }

  /**
   * Get the source of the job.
   * Useful for getting the context that the job runs in.
   */
  public String getJobSource() {
    return get(JOB_SOURCE_CONF);
  }

  /**
   * Normalize the negative values in configuration
   *
   * @param val
   * @return normalized value
   */
  public static long normalizeMemoryConfigValue(long val) {
    if (val < 0) {
      val = DISABLED_MEMORY_LIMIT;
    }
    return val;
  }

  /**
   * Compute the number of slots required to run a single map task-attempt
   * of this job.
   * @param slotSizePerMap cluster-wide value of the amount of memory required
   *                       to run a map-task
   * @return the number of slots required to run a single map task-attempt
   *                1 if memory parameters are disabled.
   */
  int computeNumSlotsPerMap(long slotSizePerMap) {
    if ((slotSizePerMap==DISABLED_MEMORY_LIMIT) ||
        (getMemoryForMapTask()==DISABLED_MEMORY_LIMIT)) {
      return 1;
    }
    return (int)(Math.ceil((float)getMemoryForMapTask() / (float)slotSizePerMap));
  }

  /**
   * Compute the number of slots required to run a single reduce task-attempt
   * of this job.
   * @param slotSizePerReduce cluster-wide value of the amount of memory
   *                          required to run a reduce-task
   * @return the number of slots required to run a single reduce task-attempt
   *                1 if memory parameters are disabled.
   */
  int computeNumSlotsPerReduce(long slotSizePerReduce) {
    if ((slotSizePerReduce==DISABLED_MEMORY_LIMIT) ||
        (getMemoryForReduceTask()==DISABLED_MEMORY_LIMIT)) {
      return 1;
    }
    return
    (int)(Math.ceil((float)getMemoryForReduceTask() / (float)slotSizePerReduce));
  }

  /**
   * Find a jar that contains a class of the same name, if any.
   * It will return a jar file, even if that is not the first thing
   * on the class path that has a class with the same name.
   *
   * @param my_class the class to find.
   * @return a jar file that contains the class, or null.
   * @throws IOException
   */
  private static String findContainingJar(Class my_class) {
    ClassLoader loader = my_class.getClassLoader();
    String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
    try {
      for(Enumeration itr = loader.getResources(class_file);
          itr.hasMoreElements();) {
        URL url = (URL) itr.nextElement();
        if ("jar".equals(url.getProtocol())) {
          String toReturn = url.getPath();
          if (toReturn.startsWith("file:")) {
            toReturn = toReturn.substring("file:".length());
          }
          toReturn = URLDecoder.decode(toReturn, "UTF-8");
          return toReturn.replaceAll("!.*$", "");
        }
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
    return null;
  }


  /**
   * Get the memory required to run a task of this job, in bytes. See
   * {@link #MAPRED_TASK_MAXVMEM_PROPERTY}
   * <p/>
   * This method is deprecated. Now, different memory limits can be
   * set for map and reduce tasks of a job, in MB.
   * <p/>
   * For backward compatibility, if the job configuration sets the
   * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
   * from {@link #DISABLED_MEMORY_LIMIT}, that value is returned.
   * Otherwise, this method will return the larger of the values returned by
   * {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()}
   * after converting them into bytes.
   *
   * @return Memory required to run a task of this job, in bytes,
   *         or {@link #DISABLED_MEMORY_LIMIT}, if unset.
   * @see #setMaxVirtualMemoryForTask(long)
   * @deprecated Use {@link #getMemoryForMapTask()} and
   *             {@link #getMemoryForReduceTask()}
   */
  @Deprecated
  public long getMaxVirtualMemoryForTask() {
    LOG.warn(
      "getMaxVirtualMemoryForTask() is deprecated. " +
      "Instead use getMemoryForMapTask() and getMemoryForReduceTask()");

    long value = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, DISABLED_MEMORY_LIMIT);
    value = normalizeMemoryConfigValue(value);
    if (value == DISABLED_MEMORY_LIMIT) {
      value = Math.max(getMemoryForMapTask(), getMemoryForReduceTask());
      value = normalizeMemoryConfigValue(value);
      if (value != DISABLED_MEMORY_LIMIT) {
        value *= 1024*1024;
          }
        }
    return value;
  }

  /**
   * Set the maximum amount of memory any task of this job can use. See
   * {@link #MAPRED_TASK_MAXVMEM_PROPERTY}
   * <p/>
   * mapred.task.maxvmem is split into
   * mapred.job.map.memory.mb
   * and mapred.job.map.memory.mb,mapred
   * each of the new key are set
   * as mapred.task.maxvmem / 1024
   * as new values are in MB
   *
   * @param vmem Maximum amount of virtual memory in bytes any task of this job
   *             can use.
   * @see #getMaxVirtualMemoryForTask()
   * @deprecated
   *  Use {@link #setMemoryForMapTask(long mem)}  and
   *  Use {@link #setMemoryForReduceTask(long mem)}
   */
  @Deprecated
  public void setMaxVirtualMemoryForTask(long vmem) {
    LOG.warn("setMaxVirtualMemoryForTask() is deprecated."+
      "Instead use setMemoryForMapTask() and setMemoryForReduceTask()");
    if(vmem != DISABLED_MEMORY_LIMIT && vmem < 0) {
      setMemoryForMapTask(DISABLED_MEMORY_LIMIT);
      setMemoryForReduceTask(DISABLED_MEMORY_LIMIT);
    }

    if(get(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY) == null) {
      setMemoryForMapTask(vmem / (1024 * 1024)); //Changing bytes to mb
      setMemoryForReduceTask(vmem / (1024 * 1024));//Changing bytes to mb
    }else{
      this.setLong(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY,vmem);
    }
  }

  /**
   * @deprecated this variable is deprecated and nolonger in use.
   */
  @Deprecated
  public long getMaxPhysicalMemoryForTask() {
    LOG.warn("The API getMaxPhysicalMemoryForTask() is deprecated."
              + " Refer to the APIs getMemoryForMapTask() and"
              + " getMemoryForReduceTask() for details.");
    return -1;
  }

  /*
   * @deprecated this
   */
  @Deprecated
  public void setMaxPhysicalMemoryForTask(long mem) {
    LOG.warn("The API setMaxPhysicalMemoryForTask() is deprecated."
        + " The value set is ignored. Refer to "
        + " setMemoryForMapTask() and setMemoryForReduceTask() for details.");
  }

  static String deprecatedString(String key) {
    return "The variable " + key + " is no longer used.";
  }

  private void checkAndWarnDeprecation() {
    if(get(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY) != null) {
      LOG.warn(JobConf.deprecatedString(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY)
                + " Instead use " + JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY
                + " and " + JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
    }
  }

  /**
   * Replce the jobtracker configuration with the configuration of 0 or 1
   * instance. This allows switching two sets of configurations in the
   * command line option.
   * @param conf The jobConf to be overwritten
   * @param instance 0 or 1 instance of the jobtracker
   */
  public static void overrideConfiguration(JobConf conf, int instance) {
    final String CONFIG_KEYS[] =
        new String[]{"mapred.job.tracker", "mapred.local.dir",
                     "mapred.fairscheduler.server.address"};
    for (String configKey : CONFIG_KEYS) {
      String value = conf.get(configKey + "-" + instance);
      if (value != null) {
        conf.set(configKey, value);
      } else {
        LOG.warn("Configuration " + configKey + "-" + instance + " not found.");
      }
    }
  }
}