TestJobCounters.java example

Explorer
HDP-2.2-Patched-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.TaskCounter;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

/**
 * This is an wordcount application that tests the count of records
 * got spilled to disk. It generates simple text input files. Then
 * runs the wordcount map/reduce application on (1) 3 i/p files(with 3 maps
 * and 1 reduce) and verifies the counters and (2) 4 i/p files(with 4 maps
 * and 1 reduce) and verifies counters. Wordcount application reads the
 * text input files, breaks each line into words and counts them. The output
 * is a locally sorted list of words and the count of how often they occurred.
 *
 */
public class TestJobCounters {

  private void validateFileCounters(Counters counter, long fileBytesRead,
      long fileBytesWritten, long mapOutputBytes,
      long mapOutputMaterializedBytes) {
    assertTrue(counter.findCounter(FileInputFormatCounter.BYTES_READ)
        .getValue() != 0);
    assertEquals(fileBytesRead,
        counter.findCounter(FileInputFormatCounter.BYTES_READ).getValue());

    assertTrue(counter.findCounter(FileOutputFormatCounter.BYTES_WRITTEN)
        .getValue() != 0);

    if (mapOutputBytes >= 0) {
      assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_BYTES).getValue() != 0);
    }
    if (mapOutputMaterializedBytes >= 0) {
      assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_MATERIALIZED_BYTES)
          .getValue() != 0);
    }
  }

  @SuppressWarnings("deprecation")
  private void validateOldFileCounters(Counters counter, long fileBytesRead,
      long fileBytesWritten, long mapOutputBytes,
      long mapOutputMaterializedBytes) {

    assertEquals(fileBytesRead,
      counter.findCounter(FileInputFormat.Counter.BYTES_READ).getValue());

    assertEquals(
      fileBytesRead,
      counter
        .findCounter(
          org.apache.hadoop.mapreduce.lib.input.FileInputFormat.Counter.BYTES_READ)
        .getValue());

    assertEquals(fileBytesWritten,
      counter.findCounter(FileOutputFormat.Counter.BYTES_WRITTEN).getValue());

    assertEquals(
      fileBytesWritten,
      counter
        .findCounter(
          org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.Counter.BYTES_WRITTEN)
        .getValue());

    if (mapOutputBytes >= 0) {
      assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_BYTES).getValue() != 0);
    }
    if (mapOutputMaterializedBytes >= 0) {
      assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_MATERIALIZED_BYTES)
        .getValue() != 0);
    }
  }
  
  private void validateCounters(Counters counter, long spillRecCnt,
                                long mapInputRecords, long mapOutputRecords) {
      // Check if the numer of Spilled Records is same as expected
      assertEquals(spillRecCnt,
          counter.findCounter(TaskCounter.SPILLED_RECORDS).getCounter());
      assertEquals(mapInputRecords,
    		  counter.findCounter(TaskCounter.MAP_INPUT_RECORDS).getCounter());
      assertEquals(mapOutputRecords,
    		  counter.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter());
  }

  private void removeWordsFile(Path inpFile, Configuration conf)
      throws IOException {
    final FileSystem fs = inpFile.getFileSystem(conf);
    if (fs.exists(inpFile) && !fs.delete(inpFile, false)) {
      throw new IOException("Failed to delete " + inpFile);
    }
  }

  private static void createWordsFile(Path inpFile, Configuration conf)
      throws IOException {
    final FileSystem fs = inpFile.getFileSystem(conf);
    if (fs.exists(inpFile)) {
      return;
    }
    FSDataOutputStream out = fs.create(inpFile);
    try {
      // 1024*4 unique words --- repeated 5 times => 5*2K words
      int REPLICAS=5, NUMLINES=1024, NUMWORDSPERLINE=4;
      final String WORD = "zymurgy"; // 7 bytes + 4 id bytes
      final Formatter fmt = new Formatter(new StringBuilder());
      for (int i = 0; i < REPLICAS; i++) {
        for (int j = 1; j <= NUMLINES*NUMWORDSPERLINE; j+=NUMWORDSPERLINE) {
          ((StringBuilder)fmt.out()).setLength(0);
          for (int k = 0; k < NUMWORDSPERLINE; ++k) {
            fmt.format("%s%04d ", WORD, j + k);
          }
          ((StringBuilder)fmt.out()).append("\n");
          out.writeBytes(fmt.toString());
        }
      }
    } finally {
      out.close();
    }
  }

  private static Path IN_DIR = null;
  private static Path OUT_DIR = null;
  private static Path testdir = null;

  private static Path[] inFiles = new Path[5];

  private static long getFileSize(Path path) throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    long len = 0;
    len += fs.getFileStatus(path).getLen();
    Path crcPath = new Path(path.getParent(), "." + path.getName() + ".crc");
    if (fs.exists(crcPath)) {
      len += fs.getFileStatus(crcPath).getLen();
    }
    return len;
  }

  @BeforeClass
  public static void initPaths() throws IOException {
    final Configuration conf = new Configuration();
    final Path TEST_ROOT_DIR =
      new Path(System.getProperty("test.build.data", "/tmp"));
    testdir = new Path(TEST_ROOT_DIR, "spilledRecords.countertest");
    IN_DIR = new Path(testdir, "in");
    OUT_DIR = new Path(testdir, "out");

    FileSystem fs = FileSystem.getLocal(conf);
    testdir = new Path(TEST_ROOT_DIR, "spilledRecords.countertest");
    if (fs.exists(testdir) && !fs.delete(testdir, true)) {
      throw new IOException("Could not delete " + testdir);
    }
    if (!fs.mkdirs(IN_DIR)) {
      throw new IOException("Mkdirs failed to create " + IN_DIR);
    }

    for (int i = 0; i < inFiles.length; i++) {
      inFiles[i] = new Path(IN_DIR, "input5_2k_" + i);
    }

    // create 3 input files each with 5*2k words
    createWordsFile(inFiles[0], conf);
    createWordsFile(inFiles[1], conf);
    createWordsFile(inFiles[2], conf);
  }

  @AfterClass
  public static void cleanup() throws IOException {
    //clean up the input and output files
    final Configuration conf = new Configuration();
    final FileSystem fs = testdir.getFileSystem(conf);
    if (fs.exists(testdir)) {
      fs.delete(testdir, true);
    }
  }

  public static JobConf createConfiguration() throws IOException {
    JobConf baseConf = new JobConf(TestJobCounters.class);

    baseConf.setOutputKeyClass(Text.class);
    baseConf.setOutputValueClass(IntWritable.class);

    baseConf.setMapperClass(WordCount.MapClass.class);
    baseConf.setCombinerClass(WordCount.Reduce.class);
    baseConf.setReducerClass(WordCount.Reduce.class);

    baseConf.setNumReduceTasks(1);
    baseConf.setInt(JobContext.IO_SORT_MB, 1);
    baseConf.set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
    baseConf.setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
    return baseConf;
  }

  public static Job createJob() throws IOException {
    final Configuration conf = new Configuration();
    final Job baseJob = Job.getInstance(conf);
    baseJob.setOutputKeyClass(Text.class);
    baseJob.setOutputValueClass(IntWritable.class);
    baseJob.setMapperClass(NewMapTokenizer.class);
    baseJob.setCombinerClass(NewSummer.class);
    baseJob.setReducerClass(NewSummer.class);
    baseJob.setNumReduceTasks(1);
    baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
    baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
    baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize(
        baseJob, Long.MAX_VALUE);
    return baseJob;
  }

  @Test
  public void testOldCounterA() throws Exception {
    JobConf conf = createConfiguration();
    conf.setNumMapTasks(3);
    conf.setInt(JobContext.IO_SORT_FACTOR, 2);
    removeWordsFile(inFiles[3], conf);
    removeWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    FileInputFormat.setInputPaths(conf, IN_DIR);
    FileOutputFormat.setOutputPath(conf, new Path(OUT_DIR, "outputO0"));

    RunningJob myJob = JobClient.runJob(conf);
    Counters c1 = myJob.getCounters();
    // Each record requires 16 bytes of metadata, 16 bytes per serialized rec
    // (vint word len + word + IntWritable) = (1 + 11 + 4)
    // (2^20 buf * .5 spill pcnt) / 32 bytes/record = 2^14 recs per spill
    // Each file contains 5 replicas of 4096 words, so the first spill will
    // contain 4 (2^14 rec / 2^12 rec/replica) replicas, the second just one.

    // Each map spills twice, emitting 4096 records per spill from the
    // combiner per spill. The merge adds an additional 8192 records, as
    // there are too few spills to combine (2 < 3)
    // Each map spills 2^14 records, so maps spill 49152 records, combined.

    // The reduce spill count is composed of the read from one segment and
    // the intermediate merge of the other two. The intermediate merge
    // adds 8192 records per segment read; again, there are too few spills to
    // combine, so all 16834 are written to disk (total 32768 spilled records
    // for the intermediate merge). The merge into the reduce includes only
    // the unmerged segment, size 8192. Total spilled records in the reduce
    // is 32768 from the merge + 8192 unmerged segment = 40960 records

    // Total: map + reduce = 49152 + 40960 = 90112
    // 3 files, 5120 = 5 * 1024 rec/file = 15360 input records
    // 4 records/line = 61440 output records
    validateCounters(c1, 90112, 15360, 61440);
    validateFileCounters(c1, inputSize, 0, 0, 0);
    validateOldFileCounters(c1, inputSize, 61928, 0, 0);
  }

  @Test
  public void testOldCounterB() throws Exception {

    JobConf conf = createConfiguration();
    createWordsFile(inFiles[3], conf);
    removeWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    inputSize += getFileSize(inFiles[3]);
    conf.setNumMapTasks(4);
    conf.setInt(JobContext.IO_SORT_FACTOR, 2);
    FileInputFormat.setInputPaths(conf, IN_DIR);
    FileOutputFormat.setOutputPath(conf, new Path(OUT_DIR, "outputO1"));

    RunningJob myJob = JobClient.runJob(conf);
    Counters c1 = myJob.getCounters();
    // As above, each map spills 2^14 records, so 4 maps spill 2^16 records

    // In the reduce, there are two intermediate merges before the reduce.
    // 1st merge: read + write = 8192 * 4
    // 2nd merge: read + write = 8192 * 4
    // final merge: 0
    // Total reduce: 65536

    // Total: map + reduce = 2^16 + 2^16 = 131072
    // 4 files, 5120 = 5 * 1024 rec/file = 15360 input records
    // 4 records/line = 81920 output records
    validateCounters(c1, 131072, 20480, 81920);
    validateFileCounters(c1, inputSize, 0, 0, 0);
  }

  @Test
  public void testOldCounterC() throws Exception {
    JobConf conf = createConfiguration();
    createWordsFile(inFiles[3], conf);
    createWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    inputSize += getFileSize(inFiles[3]);
    inputSize += getFileSize(inFiles[4]);
    conf.setNumMapTasks(4);
    conf.setInt(JobContext.IO_SORT_FACTOR, 3);
    FileInputFormat.setInputPaths(conf, IN_DIR);
    FileOutputFormat.setOutputPath(conf, new Path(OUT_DIR, "outputO2"));
    RunningJob myJob = JobClient.runJob(conf);
    Counters c1 = myJob.getCounters();
    // As above, each map spills 2^14 records, so 5 maps spill 81920

    // 1st merge: read + write = 6 * 8192
    // final merge: unmerged = 2 * 8192
    // Total reduce: 45056
    // 5 files, 5120 = 5 * 1024 rec/file = 15360 input records
    // 4 records/line = 102400 output records
    validateCounters(c1, 147456, 25600, 102400);
    validateFileCounters(c1, inputSize, 0, 0, 0);
  }

  @Test
  public void testOldCounterD() throws Exception {
    JobConf conf = createConfiguration();
    conf.setNumMapTasks(3);
    conf.setInt(JobContext.IO_SORT_FACTOR, 2);
    conf.setNumReduceTasks(0);
    removeWordsFile(inFiles[3], conf);
    removeWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    FileInputFormat.setInputPaths(conf, IN_DIR);
    FileOutputFormat.setOutputPath(conf, new Path(OUT_DIR, "outputO3"));

    RunningJob myJob = JobClient.runJob(conf);
    Counters c1 = myJob.getCounters();

    // No Reduces. Will go through the direct output collector. Spills=0

    validateCounters(c1, 0, 15360, 61440);
    validateFileCounters(c1, inputSize, 0, -1, -1);
  }

  @Test
  public void testNewCounterA() throws Exception {
    final Job job = createJob();
    final Configuration conf = job.getConfiguration();
    conf.setInt(JobContext.IO_SORT_FACTOR, 2);
    removeWordsFile(inFiles[3], conf);
    removeWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
        job, IN_DIR);
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
        job, new Path(OUT_DIR, "outputN0"));
    assertTrue(job.waitForCompletion(true));
    final Counters c1 = Counters.downgrade(job.getCounters());
    validateCounters(c1, 90112, 15360, 61440);
    validateFileCounters(c1, inputSize, 0, 0, 0);    
  }

  @Test
  public void testNewCounterB() throws Exception {
    final Job job = createJob();
    final Configuration conf = job.getConfiguration();
    conf.setInt(JobContext.IO_SORT_FACTOR, 2);
    createWordsFile(inFiles[3], conf);
    removeWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    inputSize += getFileSize(inFiles[3]);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
        job, IN_DIR);
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
        job, new Path(OUT_DIR, "outputN1"));
    assertTrue(job.waitForCompletion(true));
    final Counters c1 = Counters.downgrade(job.getCounters());
    validateCounters(c1, 131072, 20480, 81920);
    validateFileCounters(c1, inputSize, 0, 0, 0);
  }

  @Test
  public void testNewCounterC() throws Exception {
    final Job job = createJob();
    final Configuration conf = job.getConfiguration();
    conf.setInt(JobContext.IO_SORT_FACTOR, 3);
    createWordsFile(inFiles[3], conf);
    createWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    inputSize += getFileSize(inFiles[3]);
    inputSize += getFileSize(inFiles[4]);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
        job, IN_DIR);
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
        job, new Path(OUT_DIR, "outputN2"));
    assertTrue(job.waitForCompletion(true));
    final Counters c1 = Counters.downgrade(job.getCounters());
    validateCounters(c1, 147456, 25600, 102400);
    validateFileCounters(c1, inputSize, 0, 0, 0);
  }

  @Test
  public void testNewCounterD() throws Exception {
    final Job job = createJob();
    final Configuration conf = job.getConfiguration();
    conf.setInt(JobContext.IO_SORT_FACTOR, 2);
    job.setNumReduceTasks(0);
    removeWordsFile(inFiles[3], conf);
    removeWordsFile(inFiles[4], conf);
    long inputSize = 0;
    inputSize += getFileSize(inFiles[0]);
    inputSize += getFileSize(inFiles[1]);
    inputSize += getFileSize(inFiles[2]);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job,
        IN_DIR);
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job,
        new Path(OUT_DIR, "outputN3"));
    assertTrue(job.waitForCompletion(true));
    final Counters c1 = Counters.downgrade(job.getCounters());
    validateCounters(c1, 0, 15360, 61440);
    validateFileCounters(c1, inputSize, 0, -1, -1);
  }

  @SuppressWarnings("deprecation")
  @Test
  public void testOldCounters() throws Exception {
    Counters c1 = new Counters();
    c1.incrCounter(FileInputFormat.Counter.BYTES_READ, 100);
    c1.incrCounter(FileOutputFormat.Counter.BYTES_WRITTEN, 200);
    c1.incrCounter(TaskCounter.MAP_OUTPUT_BYTES, 100);
    c1.incrCounter(TaskCounter.MAP_OUTPUT_MATERIALIZED_BYTES, 100);
    validateFileCounters(c1, 100, 200, 100, 100);
    validateOldFileCounters(c1, 100, 200, 100, 100);
  }

  /** 
   * Increases the JVM's heap usage to the specified target value.
   */
  static class MemoryLoader {
    private static final int DEFAULT_UNIT_LOAD_SIZE = 10 * 1024 * 1024; // 10mb
    
    // the target value to reach
    private long targetValue;
    // a list to hold the load objects
    private List<String> loadObjects = new ArrayList<String>();
    
    MemoryLoader(long targetValue) {
      this.targetValue = targetValue;
    }
    
    /**
     * Loads the memory to the target value.
     */
    void load() {
      while (Runtime.getRuntime().totalMemory() < targetValue) {
        System.out.println("Loading memory with " + DEFAULT_UNIT_LOAD_SIZE 
                           + " characters. Current usage : " 
                           + Runtime.getRuntime().totalMemory());
        // load some objects in the memory
        loadObjects.add(RandomStringUtils.random(DEFAULT_UNIT_LOAD_SIZE));

        // sleep for 100ms
        try {
          Thread.sleep(100);
        } catch (InterruptedException ie) {}
      }
    }
  }
  
  /**
   * A mapper that increases the JVM's heap usage to a target value configured 
   * via {@link MemoryLoaderMapper#TARGET_VALUE} using a {@link MemoryLoader}.
   */
  @SuppressWarnings({"deprecation", "unchecked"})
  static class MemoryLoaderMapper 
  extends MapReduceBase 
  implements Mapper<WritableComparable, Writable, 
                    WritableComparable, Writable> {
    static final String TARGET_VALUE = "map.memory-loader.target-value";
    
    private static MemoryLoader loader = null;
    
    public void map(WritableComparable key, Writable val, 
                    OutputCollector<WritableComparable, Writable> output,
                    Reporter reporter)
    throws IOException {
      assertNotNull("Mapper not configured!", loader);
      
      // load the memory
      loader.load();
      
      // work as identity mapper
      output.collect(key, val);
    }

    public void configure(JobConf conf) {
      loader = new MemoryLoader(conf.getLong(TARGET_VALUE, -1));
    }
  }
  
  /** 
   * A reducer that increases the JVM's heap usage to a target value configured 
   * via {@link MemoryLoaderReducer#TARGET_VALUE} using a {@link MemoryLoader}.
   */
  @SuppressWarnings({"deprecation", "unchecked"})
  static class MemoryLoaderReducer extends MapReduceBase 
  implements Reducer<WritableComparable, Writable, 
                     WritableComparable, Writable> {
    static final String TARGET_VALUE = "reduce.memory-loader.target-value";
    private static MemoryLoader loader = null;
    
    public void reduce(WritableComparable key, Iterator<Writable> val, 
                       OutputCollector<WritableComparable, Writable> output,
                       Reporter reporter)
    throws IOException {
      assertNotNull("Reducer not configured!", loader);
      
      // load the memory
      loader.load();
      
      // work as identity reducer
      output.collect(key, key);
    }

    public void configure(JobConf conf) {
      loader = new MemoryLoader(conf.getLong(TARGET_VALUE, -1));
    }
  }
  
  @SuppressWarnings("deprecation")
  private long getTaskCounterUsage (JobClient client, JobID id, int numReports,
                                    int taskId, TaskType type) 
  throws Exception {
    TaskReport[] reports = null;
    if (TaskType.MAP.equals(type)) {
      reports = client.getMapTaskReports(id);
    } else if (TaskType.REDUCE.equals(type)) {
      reports = client.getReduceTaskReports(id);
    }
    
    assertNotNull("No reports found for task type '" + type.name() 
                  + "' in job " + id, reports);
    // make sure that the total number of reports match the expected
    assertEquals("Mismatch in task id", numReports, reports.length);
    
    Counters counters = reports[taskId].getCounters();
    
    return counters.getCounter(TaskCounter.COMMITTED_HEAP_BYTES);
  }
  
  // set up heap options, target value for memory loader and the output 
  // directory before running the job
  @SuppressWarnings("deprecation")
  private static RunningJob runHeapUsageTestJob(JobConf conf, Path testRootDir,
                              String heapOptions, long targetMapValue,
                              long targetReduceValue, FileSystem fs, 
                              JobClient client, Path inDir) 
  throws IOException {
    // define a job
    JobConf jobConf = new JobConf(conf);
    
    // configure the jobs
    jobConf.setNumMapTasks(1);
    jobConf.setNumReduceTasks(1);
    jobConf.setMapperClass(MemoryLoaderMapper.class);
    jobConf.setReducerClass(MemoryLoaderReducer.class);
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, heapOptions);
    
    // set the targets
    jobConf.setLong(MemoryLoaderMapper.TARGET_VALUE, targetMapValue);
    jobConf.setLong(MemoryLoaderReducer.TARGET_VALUE, targetReduceValue);
    
    // set the input directory for the job
    FileInputFormat.setInputPaths(jobConf, inDir);
    
    // define job output folder
    Path outDir = new Path(testRootDir, "out");
    fs.delete(outDir, true);
    FileOutputFormat.setOutputPath(jobConf, outDir);
    
    // run the job
    RunningJob job = client.submitJob(jobConf);
    job.waitForCompletion();
    JobID jobID = job.getID();
    assertTrue("Job " + jobID + " failed!", job.isSuccessful());
    
    return job;
  }
  
  /**
   * Tests {@link TaskCounter}'s {@link TaskCounter.COMMITTED_HEAP_BYTES}. 
   * The test consists of running a low-memory job which consumes less heap 
   * memory and then running a high-memory job which consumes more heap memory, 
   * and then ensuring that COMMITTED_HEAP_BYTES of low-memory job is smaller 
   * than that of the high-memory job.
   * @throws IOException
   */
  @Test
  @SuppressWarnings("deprecation")
  public void testHeapUsageCounter() throws Exception {
    JobConf conf = new JobConf();
    // create a local filesystem handle
    FileSystem fileSystem = FileSystem.getLocal(conf);
    
    // define test root directories
    Path rootDir =
      new Path(System.getProperty("test.build.data", "/tmp"));
    Path testRootDir = new Path(rootDir, "testHeapUsageCounter");
    // cleanup the test root directory
    fileSystem.delete(testRootDir, true);
    // set the current working directory
    fileSystem.setWorkingDirectory(testRootDir);
    
    fileSystem.deleteOnExit(testRootDir);
    
    // create a mini cluster using the local file system
    MiniMRCluster mrCluster = 
      new MiniMRCluster(1, fileSystem.getUri().toString(), 1);
    
    try {
      conf = mrCluster.createJobConf();
      JobClient jobClient = new JobClient(conf);

      // define job input
      Path inDir = new Path(testRootDir, "in");
      // create input data
      createWordsFile(inDir, conf);

      // configure and run a low memory job which will run without loading the
      // jvm's heap
      RunningJob lowMemJob = 
        runHeapUsageTestJob(conf, testRootDir, "-Xms32m -Xmx1G", 
                            0, 0, fileSystem, jobClient, inDir);
      JobID lowMemJobID = lowMemJob.getID();
      long lowMemJobMapHeapUsage = getTaskCounterUsage(jobClient, lowMemJobID, 
                                                       1, 0, TaskType.MAP);
      System.out.println("Job1 (low memory job) map task heap usage: " 
                         + lowMemJobMapHeapUsage);
      long lowMemJobReduceHeapUsage =
        getTaskCounterUsage(jobClient, lowMemJobID, 1, 0, TaskType.REDUCE);
      System.out.println("Job1 (low memory job) reduce task heap usage: " 
                         + lowMemJobReduceHeapUsage);

      // configure and run a high memory job which will load the jvm's heap
      RunningJob highMemJob = 
        runHeapUsageTestJob(conf, testRootDir, "-Xms32m -Xmx1G", 
                            lowMemJobMapHeapUsage + 256*1024*1024, 
                            lowMemJobReduceHeapUsage + 256*1024*1024,
                            fileSystem, jobClient, inDir);
      JobID highMemJobID = highMemJob.getID();

      long highMemJobMapHeapUsage = getTaskCounterUsage(jobClient, highMemJobID,
                                                        1, 0, TaskType.MAP);
      System.out.println("Job2 (high memory job) map task heap usage: " 
                         + highMemJobMapHeapUsage);
      long highMemJobReduceHeapUsage =
        getTaskCounterUsage(jobClient, highMemJobID, 1, 0, TaskType.REDUCE);
      System.out.println("Job2 (high memory job) reduce task heap usage: " 
                         + highMemJobReduceHeapUsage);

      assertTrue("Incorrect map heap usage reported by the map task", 
                 lowMemJobMapHeapUsage < highMemJobMapHeapUsage);

      assertTrue("Incorrect reduce heap usage reported by the reduce task", 
                 lowMemJobReduceHeapUsage < highMemJobReduceHeapUsage);
    } finally {
      // shutdown the mr cluster
      mrCluster.shutdown();
      try {
        fileSystem.delete(testRootDir, true);
      } catch (IOException ioe) {} 
    }
  }

  public static class NewMapTokenizer
      extends org.apache.hadoop.mapreduce.Mapper<Object,Text,Text,IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(Object key, Text value, Context context)
    throws IOException, InterruptedException {
        StringTokenizer itr = new StringTokenizer(value.toString());
        while (itr.hasMoreTokens()) {
          word.set(itr.nextToken());
          context.write(word, one);
        }
      }
  }

  public static class NewSummer
      extends org.apache.hadoop.mapreduce.Reducer<Text,IntWritable,
                                                  Text,IntWritable> {
    private IntWritable result = new IntWritable();

    public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
      int sum = 0;
      for (IntWritable val : values) {
        sum += val.get();
      }
      result.set(sum);
      context.write(key, result);
    }
  }

}