TestJobTrackerRestart.java example

Explorer
Quatrain-MapReduce-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat;
import org.apache.hadoop.mapred.ThreadedMapBenchmark.RandomInputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.security.UserGroupInformation;

import junit.framework.TestCase;
import java.io.*;
import java.util.Iterator;

/** 
 * TestJobTrackerRestart checks if the jobtracker can restart. JobTracker 
 * should be able to continue running the previously running jobs and also 
 * recover previosuly submitted jobs.
 */
public class TestJobTrackerRestart extends TestCase {
  final static Object waitLock = new Object();
  final Path testDir = new Path("/jt-restart-testing");
  final Path inDir = new Path(testDir, "input");
  final Path shareDir = new Path(testDir, "share");
  final Path outputDir = new Path(testDir, "output");
  private static int numJobsSubmitted = 0;
  
  /**
   * Gets job status from the jobtracker given the jobclient and the job id
   */
  static JobStatus getJobStatus(JobClient jc, JobID id) throws IOException {
    JobStatus[] statuses = jc.getAllJobs();
    for (JobStatus jobStatus : statuses) {
      if (jobStatus.getJobID().equals(id)) {
        return jobStatus;
      }
    }
    return null;
  }

  /**
   * Return the job conf configured with the priorities and mappers as passed.
   * @param conf The default conf
   * @param priorities priorities for the jobs
   * @param numMaps number of maps for the jobs
   * @param numReds number of reducers for the jobs
   * @param outputDir output dir
   * @param inDir input dir
   * @param mapSignalFile filename thats acts as a signal for maps
   * @param reduceSignalFile filename thats acts as a signal for reducers
   * @return a array of jobconfs configured as needed
   * @throws IOException
   */
  static JobConf[] getJobs(JobConf conf, JobPriority[] priorities, 
                           int[] numMaps, int[] numReds,
                           Path outputDir, Path inDir,
                           String mapSignalFile, String reduceSignalFile) 
  throws IOException {
    JobConf[] jobs = new JobConf[priorities.length];
    for (int i = 0; i < jobs.length; ++i) {
      jobs[i] = new JobConf(conf);
      Path newOutputDir = outputDir.suffix(String.valueOf(numJobsSubmitted++));
      configureWaitingJobConf(jobs[i], inDir, newOutputDir, 
                              numMaps[i], numReds[i], "jt-restart-test-job", 
                              mapSignalFile, reduceSignalFile);
      jobs[i].setJobPriority(priorities[i]);
    }
    return jobs;
  }

  /**
   * A utility that waits for specified amount of time
   */
  static void waitFor(long duration) {
    try {
      synchronized (waitLock) {
        waitLock.wait(duration);
      }
    } catch (InterruptedException ie) {}
  }
  
  /**
   * Wait for the jobtracker to be RUNNING.
   */
  static void waitForJobTracker(JobClient jobClient) {
    while (true) {
      try {
        ClusterStatus status = jobClient.getClusterStatus();
        while (status.getJobTrackerState() != JobTracker.State.RUNNING) {
          waitFor(100);
          status = jobClient.getClusterStatus();
        }
        break; // means that the jt is ready
      } catch (IOException ioe) {}
    }
  }
  
  /**
   * Signal the maps/reduces to start.
   */
  static void signalTasks(MiniDFSCluster dfs, FileSystem fileSys, 
                          boolean isMap, String mapSignalFile, 
                          String reduceSignalFile)
  throws IOException {
    //  signal the maps to complete
    TestRackAwareTaskPlacement.writeFile(dfs.getNameNode(), fileSys.getConf(),
                                         isMap 
                                         ? new Path(mapSignalFile)
                                         : new Path(reduceSignalFile), 
                                         (short)1);
  }
  
  /**
   * Waits until all the jobs at the jobtracker complete.
   */
  static void waitTillDone(JobClient jobClient) throws IOException {
    // Wait for the last job to complete
    while (true) {
      boolean shouldWait = false;
      for (JobStatus jobStatuses : jobClient.getAllJobs()) {
        if (jobStatuses.getRunState() == JobStatus.RUNNING) {
          shouldWait = true;
          break;
        }
      }
      if (shouldWait) {
        waitFor(1000);
      } else {
        break;
      }
    }
  }
  
  /**
   * Clean up the signals.
   */
  static void cleanUp(FileSystem fileSys, Path dir) throws IOException {
    // Delete the map signal file
    fileSys.delete(new Path(getMapSignalFile(dir)), false);
    // Delete the reduce signal file
    fileSys.delete(new Path(getReduceSignalFile(dir)), false);
  }
  
 /**
   * Tests multiple jobs on jobtracker with restart-recovery turned on.
   * Preparation :
   *    - Configure 3 jobs as follows [format {prio, maps, reducers}]
   *       - job1 : {normal, 50, 1}
   *       - job2 : {low, 1, 1}
   *       - job3 : {high, 1, 1}
   *    - Configure the cluster to run 1 reducer
   *    - Lower the history file block size and buffer
   *    
   * Submit these 3 jobs but make sure that job1's priority is changed and job1
   * is RUNNING before submitting other jobs
   * The order in which the jobs will be executed will be job1, job3 and job2.
   * 
   * Above ordering makes sure that job1 runs before everyone else.
   * Wait for job1 to complete 50%. Note that all the jobs are configured to 
   * use {@link HalfWaitingMapper} and {@link WaitingReducer}. So job1 will 
   * eventually wait on 50%
   * 
   * Make a note of the following things
   *    - Job start times
   *    
   * Restart the jobtracker
   * 
   * Wait for job1 to finish all the maps and note the TaskCompletion events at
   * the tracker.
   * 
   * Wait for all the jobs to finish
   * 
   * Also make sure that the order in which the jobs were sorted before restart
   * remains same. For this check the follwoing
   *   job1.start-time < job2.start-time < job3.start-time and 
   *   job1.finish-time < job3.finish-time < job2.finish-time
   * This ordering makes sure that the change of priority is logged and 
   * recovered back
   */
  public void testRecoveryWithMultipleJobs(MiniDFSCluster dfs,
                                           MiniMRCluster mr) 
  throws IOException {
    FileSystem fileSys = dfs.getFileSystem();
    JobConf jobConf = mr.createJobConf();
    JobPriority[] priorities = {JobPriority.NORMAL, JobPriority.LOW, 
                                JobPriority.HIGH};
    // Note that there is only 1 tracker
    int[] numMaps = {50, 1, 1};
    int[] numReds = {1, 1, 1};

    cleanUp(fileSys, shareDir);
    
    // Configure the jobs
    JobConf[] jobs = getJobs(jobConf, priorities, numMaps, numReds,
                             outputDir, inDir, 
                             getMapSignalFile(shareDir), 
                             getReduceSignalFile(shareDir));

    // Master job parameters
    int masterJob = 0;
    JobPriority masterJobNewPriority = JobPriority.HIGH;

    // Submit a master job   
    JobClient jobClient = new JobClient(jobs[masterJob]);
    RunningJob job = jobClient.submitJob(jobs[masterJob]);
    JobID id = job.getID();

    // Wait for the job to be inited
    mr.initializeJob(id);

    // Change the master job's priority so that priority logging is tested
    mr.setJobPriority(id, masterJobNewPriority);

    // Submit the remaining jobs and find the last job id
    for (int i = 1; i < jobs.length; ++i) {
      RunningJob rJob = (new JobClient(jobs[i])).submitJob(jobs[i]);
      mr.initializeJob(rJob.getID());
    }

    // Make sure that the master job is 50% completed
    while (getJobStatus(jobClient, id).mapProgress() < 0.5f) {
      waitFor(100);
    }

    // Note the data that needs to be tested upon restart
    long jobStartTime = getJobStatus(jobClient, id).getStartTime();

    // Kill the jobtracker
    mr.stopJobTracker();

    // Signal the maps to complete
    signalTasks(dfs, fileSys, true, getMapSignalFile(shareDir), 
                getReduceSignalFile(shareDir));

    // Signal the reducers to complete
    signalTasks(dfs, fileSys, false, getMapSignalFile(shareDir), 
                getReduceSignalFile(shareDir));
    
    // Enable recovery on restart
    mr.getJobTrackerConf().setBoolean("mapred.jobtracker.restart.recover", 
                                      true);

    //  Wait for a minute before submitting a job
    waitFor(60 * 1000);
    
    // Restart the jobtracker
    mr.startJobTracker();

    // Check if the jobs are still running

    // Wait for the JT to be ready
    waitForJobTracker(jobClient);

    // Check if the job recovered
    assertEquals("Restart failed as previously submitted job was missing", 
                 true, getJobStatus(jobClient, id) != null);

    // check if the job's priority got changed
    assertEquals("Restart failed as job's priority did not match", 
                 true, mr.getJobPriority(id).equals(masterJobNewPriority));

    

    waitTillDone(jobClient);

    // Check if the jobs are in order .. the order is 1->3->2
    JobStatus[] newStatuses = jobClient.getAllJobs();
    // Check if the jobs are in the order of submission
    //   This is important for the following checks
    boolean jobOrder = newStatuses[0].getJobID().getId() == 1
                       && newStatuses[1].getJobID().getId() == 2
                       && newStatuses[2].getJobID().getId() == 3;
    assertTrue("Job submission order changed", jobOrder);
    
    // Start times are in order and non zero
    boolean startTimeOrder = newStatuses[0].getStartTime() > 0
                             && newStatuses[0].getStartTime() 
                                < newStatuses[1].getStartTime()
                             && newStatuses[1].getStartTime() 
                                < newStatuses[2].getStartTime();
    assertTrue("Job start-times are out of order", startTimeOrder);
    
    boolean finishTimeOrder = 
      mr.getJobFinishTime(newStatuses[0].getJobID()) > 0
      && mr.getJobFinishTime(newStatuses[0].getJobID()) 
         < mr.getJobFinishTime(newStatuses[2].getJobID())
      && mr.getJobFinishTime(newStatuses[2].getJobID()) 
         < mr.getJobFinishTime(newStatuses[1].getJobID());
    assertTrue("Jobs finish-times are out of order", finishTimeOrder);
            
    
    // This should be used for testing job counters
    job.getCounters();

    // check if the job was successful
    assertTrue("Previously submitted job was not successful", 
               job.isSuccessful());

    // Check if the start time was recovered
    assertTrue("Previously submitted job's start time has changed", 
               getJobStatus(jobClient, id).getStartTime() == jobStartTime);

    // Test history files
    testJobHistoryFiles(id, jobs[masterJob]);
  }
  
  /**
   * Tests the jobtracker with restart-recovery turned off.
   * Submit a job with normal priority, maps = 2, reducers = 0}
   * 
   * Wait for the job to complete 50%
   * 
   * Restart the jobtracker with recovery turned off
   * 
   * Check if the job is missing
   */
  public void testRestartWithoutRecovery(MiniDFSCluster dfs, 
                                         MiniMRCluster mr) 
  throws IOException {
    // III. Test a job with waiting mapper and recovery turned off
    
    FileSystem fileSys = dfs.getFileSystem();
    
    cleanUp(fileSys, shareDir);
    
    JobConf newConf = getJobs(mr.createJobConf(), 
                              new JobPriority[] {JobPriority.NORMAL}, 
                              new int[] {2}, new int[] {0},
                              outputDir, inDir, 
                              getMapSignalFile(shareDir), 
                              getReduceSignalFile(shareDir))[0];
    
    JobClient jobClient = new JobClient(newConf);
    RunningJob job = jobClient.submitJob(newConf);
    JobID id = job.getID();
    
    //  make sure that the job is 50% completed
    while (getJobStatus(jobClient, id).mapProgress() < 0.5f) {
      waitFor(100);
    }
    
    mr.stopJobTracker();
    
    // Turn off the recovery
    mr.getJobTrackerConf().setBoolean("mapred.jobtracker.restart.recover", 
                                      false);
    
    // Wait for a minute before submitting a job
    waitFor(60 * 1000);
    
    mr.startJobTracker();
    
    // Signal the tasks
    signalTasks(dfs, fileSys, true, getMapSignalFile(shareDir), 
                getReduceSignalFile(shareDir));
    
    // Wait for the JT to be ready
    waitForJobTracker(jobClient);
    
    waitTillDone(jobClient);
    
    // The submitted job should not exist
    assertTrue("Submitted job was detected with recovery disabled", 
               getJobStatus(jobClient, id) == null);
  }

  /** Tests a job on jobtracker with restart-recovery turned on.
   * Preparation :
   *    - Configure a job with
   *       - num-maps : 50
   *       - num-reducers : 1
   *    - Configure the cluster to run 1 reducer
   *    - Lower the history file block size and buffer
   *    
   * Wait for the job to complete 50%. Note that all the job is configured to 
   * use {@link HalfWaitingMapper} and {@link WaitingReducer}. So job will 
   * eventually wait on 50%
   * 
   * Make a note of the following things
   *    - Task completion events
   *    - Cluster status
   *    - Task Reports
   *    - Job start time
   *    
   * Restart the jobtracker
   * 
   * Wait for job to finish all the maps and note the TaskCompletion events at
   * the tracker.
   * 
   * Wait for all the jobs to finish and note the following
   *    - New task completion events at the jobtracker
   *    - Task reports
   *    - Cluster status
   * 
   * Check for the following
   *    - Task completion events for recovered tasks should match 
   *    - Task completion events at the tasktracker and the restarted 
   *      jobtracker should be same
   *    - Cluster status should be fine.
   *    - Task Reports for recovered tasks should match
   *      Checks
   *        - start time
   *        - finish time
   *        - counters
   *        - http-location
   *        - task-id
   *    - Job start time should match
   *    - Check if the counters can be accessed
   *    - Check if the history files are (re)named properly
   */
  public void testTaskEventsAndReportsWithRecovery(MiniDFSCluster dfs, 
                                                   MiniMRCluster mr) 
  throws IOException {
    // II. Test a tasktracker with waiting mapper and recovery turned on.
    //     Ideally the tracker should SYNC with the new/restarted jobtracker
    
    FileSystem fileSys = dfs.getFileSystem();
    final int numMaps = 50;
    final int numReducers = 1;
    
    
    cleanUp(fileSys, shareDir);
    
    JobConf newConf = getJobs(mr.createJobConf(), 
                              new JobPriority[] {JobPriority.NORMAL}, 
                              new int[] {numMaps}, new int[] {numReducers},
                              outputDir, inDir, 
                              getMapSignalFile(shareDir), 
                              getReduceSignalFile(shareDir))[0];
    
    JobClient jobClient = new JobClient(newConf);
    RunningJob job = jobClient.submitJob(newConf);
    JobID id = job.getID();
    
    mr.initializeJob(id);
    
    //  make sure that atleast on reducer is spawned
    while (jobClient.getClusterStatus().getReduceTasks() == 0) {
      waitFor(100);
    }
    
    while(true) {
      // Since we are using a half waiting mapper, maps should be stuck at 50%
      TaskCompletionEvent[] trackerEvents = 
        mr.getMapTaskCompletionEventsUpdates(0, id, numMaps)
          .getMapTaskCompletionEvents();
      if (trackerEvents.length < numMaps / 2) {
        waitFor(1000);
      } else {
        break;
      }
    }
    
    TaskCompletionEvent[] prevEvents = 
      mr.getTaskCompletionEvents(id, 0, numMaps);
    TaskReport[] prevSetupReports = jobClient.getSetupTaskReports(id);
    TaskReport[] prevMapReports = jobClient.getMapTaskReports(id);
    ClusterStatus prevStatus = jobClient.getClusterStatus();
    
    mr.stopJobTracker();
    
    // Turn off the recovery
    mr.getJobTrackerConf().setBoolean("mapred.jobtracker.restart.recover", 
                                      true);
    
    //  Wait for a minute before submitting a job
    waitFor(60 * 1000);
    
    mr.startJobTracker();
    
    // Signal the map tasks
    signalTasks(dfs, fileSys, true, getMapSignalFile(shareDir), 
                getReduceSignalFile(shareDir));
    
    // Wait for the JT to be ready
    waitForJobTracker(jobClient);
    
    int numToMatch = mr.getNumEventsRecovered() / 2;
    
    //  make sure that the maps are completed
    while (getJobStatus(jobClient, id).mapProgress() < 1.0f) {
      waitFor(100);
    }
    
    // Get the new jobtrackers events
    TaskCompletionEvent[] jtEvents =  
      mr.getTaskCompletionEvents(id, 0, 2 * numMaps);
    
    // Test if all the events that were recovered match exactly
    testTaskCompletionEvents(prevEvents, jtEvents, false, numToMatch);
    
    TaskCompletionEvent[] trackerEvents;
    while(true) {
      trackerEvents = 
        mr.getMapTaskCompletionEventsUpdates(0, id, 2 * numMaps)
          .getMapTaskCompletionEvents();
      if (trackerEvents.length < jtEvents.length) {
        waitFor(1000);
      } else {
        break;
      }
    }
    
    // Check the task reports
    // The reports should match exactly if the attempts are same
    TaskReport[] afterMapReports = jobClient.getMapTaskReports(id);
    TaskReport[] afterSetupReports = jobClient.getSetupTaskReports(id);
    testTaskReports(prevMapReports, afterMapReports, numToMatch - 1);
    testTaskReports(prevSetupReports, afterSetupReports, 1);
    
    //  Signal the reduce tasks
    signalTasks(dfs, fileSys, false, getMapSignalFile(shareDir), 
                getReduceSignalFile(shareDir));
    
    waitTillDone(jobClient);
    
    testTaskCompletionEvents(jtEvents, trackerEvents, true, 2 * numMaps);
    
    // check if the cluster status is insane
    ClusterStatus status = jobClient.getClusterStatus();
    assertTrue("Cluster status is insane", 
               checkClusterStatusOnCompletion(status, prevStatus));
  }
  
  /**
   * Checks if the history files are as expected
   * @param id job id
   * @param conf job conf
   */
  private void testJobHistoryFiles(JobID id, JobConf conf) 
  throws IOException  {
    // Get the history files for users
    String logFileName = JobHistory.JobInfo.getJobHistoryFileName(conf, id);
    String tempLogFileName = 
      JobHistory.JobInfo.getSecondaryJobHistoryFile(logFileName);
    
    // I. User files
    Path logFile = 
      JobHistory.JobInfo.getJobHistoryLogLocationForUser(logFileName, conf);
    FileSystem fileSys = logFile.getFileSystem(conf);
    
    // Check if the history file exists
    assertTrue("User log file does not exist", fileSys.exists(logFile));
    
    // Check if the temporary file is deleted
    Path tempLogFile = 
      JobHistory.JobInfo.getJobHistoryLogLocationForUser(tempLogFileName, 
                                                         conf);
    assertFalse("User temporary log file exists", fileSys.exists(tempLogFile));
    
    // II. Framework files
    // Get the history file
    logFile = JobHistory.JobInfo.getJobHistoryLogLocation(logFileName);
    fileSys = logFile.getFileSystem(conf);
    
    // Check if the history file exists
    assertTrue("Log file does not exist", fileSys.exists(logFile));
    
    // Check if the temporary file is deleted
    tempLogFile = JobHistory.JobInfo.getJobHistoryLogLocation(tempLogFileName);
    assertFalse("Temporary log file exists", fileSys.exists(tempLogFile));
  }
  
  /**
   * Matches specified number of task reports.
   * @param source the reports to be matched
   * @param target reports to match with
   * @param numToMatch num reports to match
   * @param mismatchSet reports that should not match
   */
  private void testTaskReports(TaskReport[] source, TaskReport[] target, 
                               int numToMatch) {
    for (int i = 0; i < numToMatch; ++i) {
      // Check if the task reports was recovered correctly
      assertTrue("Task reports for same attempt has changed", 
                 source[i].equals(target[i]));
    }
  }
  
  /**
   * Matches the task completion events.
   * @param source the events to be matched
   * @param target events to match with
   * @param fullMatch whether to match the events completely or partially
   * @param numToMatch number of events to match in case full match is not 
   *        desired
   * @param ignoreSet a set of taskids to ignore
   */
  private void testTaskCompletionEvents(TaskCompletionEvent[] source, 
                                       TaskCompletionEvent[] target, 
                                       boolean fullMatch,
                                       int numToMatch) {
    //  Check if the event list size matches
    // The lengths should match only incase of full match
    if (fullMatch) {
      assertEquals("Map task completion events mismatch", 
                   source.length, target.length);
      numToMatch = source.length;
    }
    // Check if the events match
    for (int i = 0; i < numToMatch; ++i) {
      if (source[i].getTaskAttemptId().equals(target[i].getTaskAttemptId())){
        assertTrue("Map task completion events ordering mismatch", 
                   source[i].equals(target[i]));
      }
    }
  }
  
  private boolean checkClusterStatusOnCompletion(ClusterStatus status, 
                                                 ClusterStatus prevStatus) {
    return status.getJobTrackerState() == prevStatus.getJobTrackerState()
           && status.getMapTasks() == 0
           && status.getReduceTasks() == 0;
  }
  
  public void testJobTrackerRestart() throws IOException {
    String namenode = null;
    MiniDFSCluster dfs = null;
    MiniMRCluster mr = null;
    FileSystem fileSys = null;

    try {
      Configuration conf = new Configuration();
      conf.setBoolean("dfs.replication.considerLoad", false);
      dfs = new MiniDFSCluster(conf, 1, true, null, null);
      dfs.waitActive();
      fileSys = dfs.getFileSystem();
      
      // clean up
      fileSys.delete(testDir, true);
      
      if (!fileSys.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
      }

      // Write the input file
      TestRackAwareTaskPlacement.writeFile(dfs.getNameNode(), conf, 
                                           new Path(inDir + "/file"), 
                                           (short)1);

      dfs.startDataNodes(conf, 1, true, null, null, null, null);
      dfs.waitActive();

      namenode = (dfs.getFileSystem()).getUri().getHost() + ":" 
                 + (dfs.getFileSystem()).getUri().getPort();

      // Make sure that jobhistory leads to a proper job restart
      // So keep the blocksize and the buffer size small
      JobConf jtConf = new JobConf();
      jtConf.set("mapred.jobtracker.job.history.block.size", "1024");
      jtConf.set("mapred.jobtracker.job.history.buffer.size", "1024");
      jtConf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
      jtConf.setLong("mapred.tasktracker.expiry.interval", 25 * 1000);
      jtConf.setBoolean("mapred.acls.enabled", true);
      // get the user group info
      UserGroupInformation ugi = UserGroupInformation.getCurrentUGI();
      jtConf.set("mapred.queue.default.acl-submit-job", ugi.getUserName());
      
      mr = new MiniMRCluster(1, namenode, 1, null, null, jtConf);
      
      // Test multiple jobs on jobtracker with restart-recovery turned on
      testRecoveryWithMultipleJobs(dfs, mr);
      
      // Test the tasktracker SYNC
      testTaskEventsAndReportsWithRecovery(dfs, mr);
      
      // Test jobtracker with restart-recovery turned off
      testRestartWithoutRecovery(dfs, mr);
    } finally {
      if (mr != null) {
        try {
          mr.shutdown();
        } catch (Exception e) {}
      }
      if (dfs != null) {
        try {
          dfs.shutdown();
        } catch (Exception e) {}
      }
    }
  }

  static String getMapSignalFile(Path dir) {
    return dir.suffix("/jt-restart-map-signal").toString();
  }

  static String getReduceSignalFile(Path dir) {
    return dir.suffix("/jt-restart-reduce-signal").toString();
  }
  
  /** 
   * Map is a Mapper that just waits for a file to be created on the dfs. The 
   * file creation is a signal to the mappers and hence acts as a waiting job. 
   * Only the later half of the maps wait for the signal while the rest 
   * complete immediately.
   */

  static class HalfWaitingMapper 
  extends MapReduceBase 
  implements Mapper<WritableComparable, Writable, 
                    WritableComparable, Writable> {

    FileSystem fs = null;
    Path signal;
    int id = 0;
    int totalMaps = 0;

    /** The waiting function.  The map exits once it gets a signal. Here the 
     * signal is the file existence. 
     */
    public void map(WritableComparable key, Writable val, 
                    OutputCollector<WritableComparable, Writable> output,
                    Reporter reporter)
    throws IOException {
      if (id > totalMaps / 2) {
        if (fs != null) {
          while (!fs.exists(signal)) {
            try {
              reporter.progress();
              synchronized (this) {
                this.wait(1000); // wait for 1 sec
              }
            } catch (InterruptedException ie) {
              System.out.println("Interrupted while the map was waiting for "
                                 + " the signal.");
              break;
            }
          }
        } else {
          throw new IOException("Could not get the DFS!!");
        }
      }
    }

    public void configure(JobConf conf) {
      try {
        String taskId = conf.get("mapred.task.id");
        id = Integer.parseInt(taskId.split("_")[4]);
        totalMaps = Integer.parseInt(conf.get("mapred.map.tasks"));
        fs = FileSystem.get(conf);
        signal = new Path(conf.get("test.mapred.map.waiting.target"));
      } catch (IOException ioe) {
        System.out.println("Got an exception while obtaining the filesystem");
      }
    }
  }
  
  /** 
   * Reduce that just waits for a file to be created on the dfs. The 
   * file creation is a signal to the reduce.
   */

  static class WaitingReducer extends MapReduceBase 
  implements Reducer<WritableComparable, Writable, 
                     WritableComparable, Writable> {

    FileSystem fs = null;
    Path signal;
    
    /** The waiting function.  The reduce exits once it gets a signal. Here the
     * signal is the file existence. 
     */
    public void reduce(WritableComparable key, Iterator<Writable> val, 
                       OutputCollector<WritableComparable, Writable> output,
                       Reporter reporter)
    throws IOException {
      if (fs != null) {
        while (!fs.exists(signal)) {
          try {
            reporter.progress();
            synchronized (this) {
              this.wait(1000); // wait for 1 sec
            }
          } catch (InterruptedException ie) {
            System.out.println("Interrupted while the map was waiting for the"
                               + " signal.");
            break;
          }
        }
      } else {
        throw new IOException("Could not get the DFS!!");
      }
    }

    public void configure(JobConf conf) {
      try {
        fs = FileSystem.get(conf);
        signal = new Path(conf.get("test.mapred.reduce.waiting.target"));
      } catch (IOException ioe) {
        System.out.println("Got an exception while obtaining the filesystem");
      }
    }
  }
  
  static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                      Path outputPath, int numMaps, int numRed,
                                      String jobName, String mapSignalFilename,
                                      String redSignalFilename)
  throws IOException {
    jobConf.setJobName(jobName);
    jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outputPath);
    jobConf.setMapperClass(HalfWaitingMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);
    jobConf.setOutputKeyClass(BytesWritable.class);
    jobConf.setOutputValueClass(BytesWritable.class);
    jobConf.setInputFormat(RandomInputFormat.class);
    jobConf.setNumMapTasks(numMaps);
    jobConf.setNumReduceTasks(numRed);
    jobConf.setJar("build/test/testjar/testjob.jar");
    jobConf.set("test.mapred.map.waiting.target", mapSignalFilename);
    jobConf.set("test.mapred.reduce.waiting.target", redSignalFilename);
  }

  public static void main(String[] args) throws IOException {
    new TestJobTrackerRestart().testJobTrackerRestart();
  }
}