TestReporter.java example

Explorer
HDP-2.2-Patched-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

import static org.junit.Assert.*;

/**
 * Tests the old mapred APIs with {@link Reporter#getProgress()}.
 */
public class TestReporter {
  private static final Path rootTempDir =
    new Path(System.getProperty("test.build.data", "/tmp"));
  private static final Path testRootTempDir = 
    new Path(rootTempDir, "TestReporter");
  
  private static FileSystem fs = null;

  @BeforeClass
  public static void setup() throws Exception {
    fs = FileSystem.getLocal(new Configuration());
    fs.delete(testRootTempDir, true);
    fs.mkdirs(testRootTempDir);
  }

  @AfterClass
  public static void cleanup() throws Exception {
    fs.delete(testRootTempDir, true);
  }
  
  // an input with 4 lines
  private static final String INPUT = "Hi\nHi\nHi\nHi\n";
  private static final int INPUT_LINES = INPUT.split("\n").length;
  
  @SuppressWarnings("deprecation")
  static class ProgressTesterMapper extends MapReduceBase 
  implements Mapper<LongWritable, Text, Text, Text> {
    private float progressRange = 0;
    private int numRecords = 0;
    private Reporter reporter = null;
    
    @Override
    public void configure(JobConf job) {
      super.configure(job);
      // set the progress range accordingly
      if (job.getNumReduceTasks() == 0) {
        progressRange = 1f;
      } else {
        progressRange = 0.667f;
      }
    }
    
    @Override
    public void map(LongWritable key, Text value, 
                    OutputCollector<Text, Text> output, Reporter reporter) 
    throws IOException {
      this.reporter = reporter;
      
      // calculate the actual map progress
      float mapProgress = ((float)++numRecords)/INPUT_LINES;
      // calculate the attempt progress based on the progress range
      float attemptProgress = progressRange * mapProgress;
      assertEquals("Invalid progress in map", 
                   attemptProgress, reporter.getProgress(), 0f);
      output.collect(new Text(value.toString() + numRecords), value);
    }
    
    @Override
    public void close() throws IOException {
      super.close();
      assertEquals("Invalid progress in map cleanup", 
                   progressRange, reporter.getProgress(), 0f);
    }
  }

  static class StatusLimitMapper extends
      org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, Text> {

    @Override
    public void map(LongWritable key, Text value, Context context)
        throws IOException {
      StringBuilder sb = new StringBuilder(512);
      for (int i = 0; i < 1000; i++) {
        sb.append("a");
      }
      context.setStatus(sb.toString());
      int progressStatusLength = context.getConfiguration().getInt(
          MRConfig.PROGRESS_STATUS_LEN_LIMIT_KEY,
          MRConfig.PROGRESS_STATUS_LEN_LIMIT_DEFAULT);

      if (context.getStatus().length() > progressStatusLength) {
        throw new IOException("Status is not truncated");
      }
    }
  }

  /**
   * Test {@link Reporter}'s progress for a map-only job.
   * This will make sure that only the map phase decides the attempt's progress.
   */
  @SuppressWarnings("deprecation")
  @Test
  public void testReporterProgressForMapOnlyJob() throws IOException {
    Path test = new Path(testRootTempDir, "testReporterProgressForMapOnlyJob");
    
    JobConf conf = new JobConf();
    conf.setMapperClass(ProgressTesterMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    // fail early
    conf.setMaxMapAttempts(1);
    conf.setMaxReduceAttempts(0);
    
    RunningJob job = 
      UtilsForTests.runJob(conf, new Path(test, "in"), new Path(test, "out"), 
                           1, 0, INPUT);
    job.waitForCompletion();
    
    assertTrue("Job failed", job.isSuccessful());
  }
  
  /**
   * A {@link Reducer} implementation that checks the progress on every call
   * to {@link Reducer#reduce(Object, Iterator, OutputCollector, Reporter)}.
   */
  @SuppressWarnings("deprecation")
  static class ProgressTestingReducer extends MapReduceBase 
  implements Reducer<Text, Text, Text, Text> {
    private int recordCount = 0;
    private Reporter reporter = null;
    // reduce task has a fixed split of progress amongst copy, shuffle and 
    // reduce phases.
    private final float REDUCE_PROGRESS_RANGE = 1.0f/3;
    private final float SHUFFLE_PROGRESS_RANGE = 1 - REDUCE_PROGRESS_RANGE;
    
    @Override
    public void configure(JobConf job) {
      super.configure(job);
    }
    
    @Override
    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
    throws IOException {
      float reducePhaseProgress = ((float)++recordCount)/INPUT_LINES;
      float weightedReducePhaseProgress = 
              reducePhaseProgress * REDUCE_PROGRESS_RANGE;
      assertEquals("Invalid progress in reduce", 
                   SHUFFLE_PROGRESS_RANGE + weightedReducePhaseProgress, 
                   reporter.getProgress(), 0.02f);
      this.reporter = reporter;
    }
    
    @Override
    public void close() throws IOException {
      super.close();
      assertEquals("Invalid progress in reduce cleanup", 
                   1.0f, reporter.getProgress(), 0f);
    }
  }
  
  /**
   * Test {@link Reporter}'s progress for map-reduce job.
   */
  @Test
  public void testReporterProgressForMRJob() throws IOException {
    Path test = new Path(testRootTempDir, "testReporterProgressForMRJob");
    
    JobConf conf = new JobConf();
    conf.setMapperClass(ProgressTesterMapper.class);
    conf.setReducerClass(ProgressTestingReducer.class);
    conf.setMapOutputKeyClass(Text.class);
    // fail early
    conf.setMaxMapAttempts(1);
    conf.setMaxReduceAttempts(1);

    RunningJob job = 
      UtilsForTests.runJob(conf, new Path(test, "in"), new Path(test, "out"), 
                           1, 1, INPUT);
    job.waitForCompletion();
    
    assertTrue("Job failed", job.isSuccessful());
  }

  @Test
  public void testStatusLimit() throws IOException, InterruptedException,
      ClassNotFoundException {
    Path test = new Path(testRootTempDir, "testStatusLimit");

    Configuration conf = new Configuration();
    Path inDir = new Path(test, "in");
    Path outDir = new Path(test, "out");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(inDir)) {
      fs.delete(inDir, true);
    }
    fs.mkdirs(inDir);
    DataOutputStream file = fs.create(new Path(inDir, "part-" + 0));
    file.writeBytes("testStatusLimit");
    file.close();

    if (fs.exists(outDir)) {
      fs.delete(outDir, true);
    }

    Job job = Job.getInstance(conf, "testStatusLimit");

    job.setMapperClass(StatusLimitMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);

    assertTrue("Job failed", job.isSuccessful());
  }

}