TestMapProgress.java example

Explorer
HDP-2.2-Patched-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import junit.framework.TestCase;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.util.ReflectionUtils;

/**
 *  Validates map phase progress.
 *  Testcase uses newApi.
 *  We extend Task.TaskReporter class and override setProgress()
 *  to validate the map phase progress being set.
 *  We extend MapTask and override startReporter() method that creates
 *  TestTaskReporter instead of TaskReporter and call mapTask.run().
 *  Similar to LocalJobRunner, we set up splits and call mapTask.run()
 *  directly. No job is run, only map task is run.
 *  As the reporter's setProgress() validates progress after
 *  every record is read, we are done with the validation of map phase progress
 *  once mapTask.run() is finished. Sort phase progress in map task is not
 *  validated here.
 */
public class TestMapProgress extends TestCase {
  public static final Log LOG = LogFactory.getLog(TestMapProgress.class);
  private static String TEST_ROOT_DIR;
  static {
    String root = new File(System.getProperty("test.build.data", "/tmp"))
      .getAbsolutePath();
    TEST_ROOT_DIR = new Path(root, "mapPhaseprogress").toString();
  }

  static class FakeUmbilical implements TaskUmbilicalProtocol {

    public long getProtocolVersion(String protocol, long clientVersion) {
      return TaskUmbilicalProtocol.versionID;
    }
    
    @Override
    public ProtocolSignature getProtocolSignature(String protocol,
        long clientVersion, int clientMethodsHash) throws IOException {
      return ProtocolSignature.getProtocolSignature(
          this, protocol, clientVersion, clientMethodsHash);
    }

    public void done(TaskAttemptID taskid) throws IOException {
      LOG.info("Task " + taskid + " reporting done.");
    }

    public void fsError(TaskAttemptID taskId, String message) throws IOException {
      LOG.info("Task " + taskId + " reporting file system error: " + message);
    }

    public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
      LOG.info("Task " + taskId + " reporting shuffle error: " + message);
    }

    public void fatalError(TaskAttemptID taskId, String msg) throws IOException {
      LOG.info("Task " + taskId + " reporting fatal error: " + msg);
    }

    public JvmTask getTask(JvmContext context) throws IOException {
      return null;
    }

    public boolean ping(TaskAttemptID taskid) throws IOException {
      return true;
    }

    public void commitPending(TaskAttemptID taskId, TaskStatus taskStatus) 
    throws IOException, InterruptedException {
      statusUpdate(taskId, taskStatus);
    }
    
    public boolean canCommit(TaskAttemptID taskid) throws IOException {
      return true;
    }
    
    public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
    throws IOException, InterruptedException {
      StringBuffer buf = new StringBuffer("Task ");
      buf.append(taskId);
      buf.append(" making progress to ");
      buf.append(taskStatus.getProgress());
      String state = taskStatus.getStateString();
      if (state != null) {
        buf.append(" and state of ");
        buf.append(state);
      }
      LOG.info(buf.toString());
      // ignore phase
      // ignore counters
      return true;
    }

    public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
      LOG.info("Task " + taskid + " has problem " + trace);
    }
    
    public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId, 
        int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
      return new MapTaskCompletionEventsUpdate(TaskCompletionEvent.EMPTY_ARRAY, 
                                               false);
    }

    public void reportNextRecordRange(TaskAttemptID taskid, 
        SortedRanges.Range range) throws IOException {
      LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
    }
  }
  
  private FileSystem fs = null;
  private TestMapTask map = null;
  private JobID jobId = null;
  private FakeUmbilical fakeUmbilical = new FakeUmbilical();

  /**
   *  Task Reporter that validates map phase progress after each record is
   *  processed by map task
   */ 
  public class TestTaskReporter extends Task.TaskReporter {
    private int recordNum = 0; // number of records processed
    TestTaskReporter(Task task) {
      task.super(task.getProgress(), fakeUmbilical);
    }

    @Override
    public void setProgress(float progress) {
      super.setProgress(progress);
      float mapTaskProgress = map.getProgress().getProgress();
      LOG.info("Map task progress is " + mapTaskProgress);
      if (recordNum < 3) {
        // only 3 records are there; Ignore validating progress after 3 times
        recordNum++;
      }
      else {
        return;
      }
      // validate map task progress when the map task is in map phase
      assertTrue("Map progress is not the expected value.",
                 Math.abs(mapTaskProgress - ((float)recordNum/3)) < 0.001);
    }
  }

  /**
   * Map Task that overrides run method and uses TestTaskReporter instead of
   * TaskReporter and uses FakeUmbilical.
   */
  class TestMapTask extends MapTask {
    public TestMapTask(String jobFile, TaskAttemptID taskId, 
        int partition, TaskSplitIndex splitIndex,
        int numSlotsRequired) {
      super(jobFile, taskId, partition, splitIndex, numSlotsRequired);
    }
    
    /**
     * Create a TestTaskReporter and use it for validating map phase progress
     */
    @Override
    TaskReporter startReporter(final TaskUmbilicalProtocol umbilical) {  
      // start thread that will handle communication with parent
      TaskReporter reporter = new TestTaskReporter(map);
      return reporter;
    }
  }
  
  // In the given dir, creates part-0 file with 3 records of same size
  private void createInputFile(Path rootDir) throws IOException {
    if(fs.exists(rootDir)){
      fs.delete(rootDir, true);
    }
    
    String str = "The quick brown fox\n" + "The brown quick fox\n"
    + "The fox brown quick\n";
    DataOutputStream inpFile = fs.create(new Path(rootDir, "part-0"));
    inpFile.writeBytes(str);
    inpFile.close();
  }

  /**
   *  Validates map phase progress after each record is processed by map task
   *  using custom task reporter.
   */ 
  public void testMapProgress() throws Exception {
    JobConf job = new JobConf();
    fs = FileSystem.getLocal(job);
    Path rootDir = new Path(TEST_ROOT_DIR);
    createInputFile(rootDir);

    job.setNumReduceTasks(0);
    TaskAttemptID taskId = TaskAttemptID.forName(
                                  "attempt_200907082313_0424_m_000000_0");
    job.setClass("mapreduce.job.outputformat.class",
                 NullOutputFormat.class, OutputFormat.class);
    job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
            TEST_ROOT_DIR);
    jobId = taskId.getJobID();
    
    JobContext jContext = new JobContextImpl(job, jobId);
    InputFormat<?, ?> input =
      ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

    List<InputSplit> splits = input.getSplits(jContext);
    JobSplitWriter.createSplitFiles(new Path(TEST_ROOT_DIR), job, 
                   new Path(TEST_ROOT_DIR).getFileSystem(job),
                   splits);
    TaskSplitMetaInfo[] splitMetaInfo = 
      SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
    job.setUseNewMapper(true); // use new api    
    for (int i = 0; i < splitMetaInfo.length; i++) {// rawSplits.length is 1
      map = new TestMapTask(
          job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") +
          jobId + "job.xml",  
          taskId, i,
          splitMetaInfo[i].getSplitIndex(), 1);

      JobConf localConf = new JobConf(job);
      map.localizeConfiguration(localConf);
      map.setConf(localConf);
      map.run(localConf, fakeUmbilical);
    }
    // clean up
    fs.delete(rootDir, true);
  }
}