TestJobInProgress.java example

Explorer
hadoop_ekg-master
- src
package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.examples.RandomWriter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.UtilsForTests;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.net.Node;

import junit.framework.TestCase;

public class TestJobInProgress extends TestCase {
  static final Log LOG = LogFactory.getLog(TestJobInProgress.class);

  private MiniMRCluster mrCluster;

  private MiniDFSCluster dfsCluster;
  JobTracker jt;
  private static Path TEST_DIR = 
    new Path(System.getProperty("test.build.data","/tmp"), "jip-testing");
  private static int numSlaves = 4;

  public static class FailMapTaskJob extends MapReduceBase implements
      Mapper<LongWritable, Text, Text, IntWritable> {

    @Override
    public void map(LongWritable key, Text value,
        OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
      // reporter.incrCounter(TaskCounts.LaunchedTask, 1);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException e) {
        throw new IllegalArgumentException("Interrupted MAP task");
      }
      throw new IllegalArgumentException("Failing MAP task");
    }
  }

  // Suppressing waring as we just need to write a failing reduce task job
  // We don't need to bother about the actual key value pairs which are passed.
  @SuppressWarnings("unchecked")
  public static class FailReduceTaskJob extends MapReduceBase implements
      Reducer {

    @Override
    public void reduce(Object key, Iterator values, OutputCollector output,
        Reporter reporter) throws IOException {
      // reporter.incrCounter(TaskCounts.LaunchedTask, 1);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException e) {
        throw new IllegalArgumentException("Failing Reduce task");
      }
      throw new IllegalArgumentException("Failing Reduce task");
    }

  }

  @Override
  protected void setUp() throws Exception {
    // TODO Auto-generated method stub
    super.setUp();
    Configuration conf = new Configuration();
    dfsCluster = new MiniDFSCluster(conf, numSlaves, true, null);
    mrCluster = new MiniMRCluster(numSlaves, dfsCluster.getFileSystem()
        .getUri().toString(), 1);
    jt = mrCluster.getJobTrackerRunner().getJobTracker();
  }

  public void testPendingMapTaskCount() throws Exception {
    launchTask(FailMapTaskJob.class, IdentityReducer.class);
    checkTaskCounts();
  }
  
  public void testPendingReduceTaskCount() throws Exception {
    launchTask(IdentityMapper.class, FailReduceTaskJob.class);
    checkTaskCounts();
  }

  /**
   * Test if running tasks are correctly maintained for various types of jobs
   */
  private void testRunningTaskCount(boolean speculation, boolean locality)
  throws Exception {
    LOG.info("Testing running jobs with speculation : " + speculation 
             + ", locality : " + locality);
    // cleanup
    dfsCluster.getFileSystem().delete(TEST_DIR, true);
    
    final Path mapSignalFile = new Path(TEST_DIR, "map-signal");
    final Path redSignalFile = new Path(TEST_DIR, "reduce-signal");
    
    // configure a waiting job with 2 maps and 2 reducers
    JobConf job = 
      configure(UtilsForTests.WaitingMapper.class, IdentityReducer.class, 1, 1,
                locality);
    job.set(UtilsForTests.getTaskSignalParameter(true), mapSignalFile.toString());
    job.set(UtilsForTests.getTaskSignalParameter(false), redSignalFile.toString());
    
    // Disable slow-start for reduces since this maps don't complete 
    // in these test-cases...
    job.setFloat("mapred.reduce.slowstart.completed.maps", 0.0f);
    
    // test jobs with speculation
    job.setSpeculativeExecution(speculation);
    JobClient jc = new JobClient(job);
    RunningJob running = jc.submitJob(job);
    JobTracker jobtracker = mrCluster.getJobTrackerRunner().getJobTracker();
    JobInProgress jip = jobtracker.getJob(running.getID());
    LOG.info("Running job " + jip.getJobID());
    
    // wait
    LOG.info("Waiting for job " + jip.getJobID() + " to be ready");
    waitTillReady(jip, job);
    
    // check if the running structures are populated
    Set<TaskInProgress> uniqueTasks = new HashSet<TaskInProgress>();
    for (Map.Entry<Node, Set<TaskInProgress>> s : 
           jip.getRunningMapCache().entrySet()) {
      uniqueTasks.addAll(s.getValue());
    }
    
    // add non local map tasks
    uniqueTasks.addAll(jip.getNonLocalRunningMaps());
    
    assertEquals("Running map count doesnt match for jobs with speculation " 
                 + speculation + ", and locality " + locality,
                 jip.runningMaps(), uniqueTasks.size());

    assertEquals("Running reducer count doesnt match for jobs with speculation "
                 + speculation + ", and locality " + locality,
                 jip.runningReduces(), jip.getRunningReduces().size());
    
    // signal the tasks
    LOG.info("Signaling the tasks");
    UtilsForTests.signalTasks(dfsCluster, dfsCluster.getFileSystem(),
                              mapSignalFile.toString(), 
                              redSignalFile.toString(), numSlaves);
    
    // wait for the job to complete
    LOG.info("Waiting for job " + jip.getJobID() + " to be complete");
    UtilsForTests.waitTillDone(jc);
    
    // cleanup
    dfsCluster.getFileSystem().delete(TEST_DIR, true);
  }
  
  // wait for the job to start
  private void waitTillReady(JobInProgress jip, JobConf job) {
    // wait for all the maps to get scheduled
    while (jip.runningMaps() < job.getNumMapTasks()) {
      UtilsForTests.waitFor(10);
    }
    
    // wait for all the reducers to get scheduled
    while (jip.runningReduces() < job.getNumReduceTasks()) {
      UtilsForTests.waitFor(10);
    }
  }
  
  public void testRunningTaskCount() throws Exception {
    // test with spec = false and locality=true
    testRunningTaskCount(false, true);
    
    // test with spec = true and locality=true
    testRunningTaskCount(true, true);
    
    // test with spec = false and locality=false
    testRunningTaskCount(false, false);
    
    // test with spec = true and locality=false
    testRunningTaskCount(true, false);
  }
  
  @Override
  protected void tearDown() throws Exception {
    mrCluster.shutdown();
    dfsCluster.shutdown();
    super.tearDown();
  }
  

  void launchTask(Class MapClass,Class ReduceClass) throws Exception{
    JobConf job = configure(MapClass, ReduceClass, 5, 10, true);
    try {
      JobClient.runJob(job);
    } catch (IOException ioe) {}
  }
  
  @SuppressWarnings("unchecked")
  JobConf configure(Class MapClass,Class ReduceClass, int maps, int reducers,
                    boolean locality) 
  throws Exception {
    JobConf jobConf = mrCluster.createJobConf();
    final Path inDir = new Path("./failjob/input");
    final Path outDir = new Path("./failjob/output");
    String input = "Test failing job.\n One more line";
    FileSystem inFs = inDir.getFileSystem(jobConf);
    FileSystem outFs = outDir.getFileSystem(jobConf);
    outFs.delete(outDir, true);
    if (!inFs.mkdirs(inDir)) {
      throw new IOException("create directory failed" + inDir.toString());
    }

    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
    jobConf.setJobName("failmaptask");
    if (locality) {
      jobConf.setInputFormat(TextInputFormat.class);
    } else {
      jobConf.setInputFormat(UtilsForTests.RandomInputFormat.class);
    }
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setMapperClass(MapClass);
    jobConf.setCombinerClass(ReduceClass);
    jobConf.setReducerClass(ReduceClass);
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);
    jobConf.setNumMapTasks(maps);
    jobConf.setNumReduceTasks(reducers);
    return jobConf; 
  }

  void checkTaskCounts() {
    JobStatus[] status = jt.getAllJobs();
    for (JobStatus js : status) {
      JobInProgress jip = jt.getJob(js.getJobID());
      Counters counter = jip.getJobCounters();
      long totalTaskCount = counter
          .getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS)
          + counter.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES);
      while (jip.getNumTaskCompletionEvents() < totalTaskCount) {
        assertEquals(true, (jip.runningMaps() >= 0));
        assertEquals(true, (jip.pendingMaps() >= 0));
        assertEquals(true, (jip.runningReduces() >= 0));
        assertEquals(true, (jip.pendingReduces() >= 0));
      }
    }
  }
  
}