TestMapRed.java example

Explorer
hadoop_ekg-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Random;

import junit.framework.TestCase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;

/**********************************************************
 * MapredLoadTest generates a bunch of work that exercises
 * a Hadoop Map-Reduce system (and DFS, too).  It goes through
 * the following steps:
 *
 * 1) Take inputs 'range' and 'counts'.
 * 2) Generate 'counts' random integers between 0 and range-1.
 * 3) Create a file that lists each integer between 0 and range-1,
 *    and lists the number of times that integer was generated.
 * 4) Emit a (very large) file that contains all the integers
 *    in the order generated.
 * 5) After the file has been generated, read it back and count
 *    how many times each int was generated.
 * 6) Compare this big count-map against the original one.  If
 *    they match, then SUCCESS!  Otherwise, FAILURE!
 *
 * OK, that's how we can think about it.  What are the map-reduce
 * steps that get the job done?
 *
 * 1) In a non-mapred thread, take the inputs 'range' and 'counts'.
 * 2) In a non-mapread thread, generate the answer-key and write to disk.
 * 3) In a mapred job, divide the answer key into K jobs.
 * 4) A mapred 'generator' task consists of K map jobs.  Each reads
 *    an individual "sub-key", and generates integers according to
 *    to it (though with a random ordering).
 * 5) The generator's reduce task agglomerates all of those files
 *    into a single one.
 * 6) A mapred 'reader' task consists of M map jobs.  The output
 *    file is cut into M pieces. Each of the M jobs counts the 
 *    individual ints in its chunk and creates a map of all seen ints.
 * 7) A mapred job integrates all the count files into a single one.
 *
 **********************************************************/
public class TestMapRed extends TestCase {
  /**
   * Modified to make it a junit test.
   * The RandomGen Job does the actual work of creating
   * a huge file of assorted numbers.  It receives instructions
   * as to how many times each number should be counted.  Then
   * it emits those numbers in a crazy order.
   *
   * The map() function takes a key/val pair that describes
   * a value-to-be-emitted (the key) and how many times it 
   * should be emitted (the value), aka "numtimes".  map() then
   * emits a series of intermediate key/val pairs.  It emits
   * 'numtimes' of these.  The key is a random number and the
   * value is the 'value-to-be-emitted'.
   *
   * The system collates and merges these pairs according to
   * the random number.  reduce() function takes in a key/value
   * pair that consists of a crazy random number and a series
   * of values that should be emitted.  The random number key
   * is now dropped, and reduce() emits a pair for every intermediate value.
   * The emitted key is an intermediate value.  The emitted value
   * is just a blank string.  Thus, we've created a huge file
   * of numbers in random order, but where each number appears
   * as many times as we were instructed.
   */
  static class RandomGenMapper
    implements Mapper<IntWritable, IntWritable, IntWritable, IntWritable> {
    
    public void configure(JobConf job) {
    }

    public void map(IntWritable key, IntWritable val,
                    OutputCollector<IntWritable, IntWritable> out,
                    Reporter reporter) throws IOException {
      int randomVal = key.get();
      int randomCount = val.get();

      for (int i = 0; i < randomCount; i++) {
        out.collect(new IntWritable(Math.abs(r.nextInt())), new IntWritable(randomVal));
      }
    }
    public void close() {
    }
  }
  /**
   */
  static class RandomGenReducer
    implements Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
    
    public void configure(JobConf job) {
    }

    public void reduce(IntWritable key, Iterator<IntWritable> it,
                       OutputCollector<IntWritable, IntWritable> out,
                       Reporter reporter) throws IOException {
      while (it.hasNext()) {
        out.collect(it.next(), null);
      }
    }
    public void close() {
    }
  }

  /**
   * The RandomCheck Job does a lot of our work.  It takes
   * in a num/string keyspace, and transforms it into a
   * key/count(int) keyspace.
   *
   * The map() function just emits a num/1 pair for every
   * num/string input pair.
   *
   * The reduce() function sums up all the 1s that were
   * emitted for a single key.  It then emits the key/total
   * pair.
   *
   * This is used to regenerate the random number "answer key".
   * Each key here is a random number, and the count is the
   * number of times the number was emitted.
   */
  static class RandomCheckMapper
    implements Mapper<WritableComparable, Text, IntWritable, IntWritable> {
    
    public void configure(JobConf job) {
    }

    public void map(WritableComparable key, Text val,
                    OutputCollector<IntWritable, IntWritable> out,
                    Reporter reporter) throws IOException {
      out.collect(new IntWritable(Integer.parseInt(val.toString().trim())), new IntWritable(1));
    }
    public void close() {
    }
  }
  /**
   */
  static class RandomCheckReducer
      implements Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
    public void configure(JobConf job) {
    }
        
    public void reduce(IntWritable key, Iterator<IntWritable> it,
                       OutputCollector<IntWritable, IntWritable> out,
                       Reporter reporter) throws IOException {
      int keyint = key.get();
      int count = 0;
      while (it.hasNext()) {
        it.next();
        count++;
      }
      out.collect(new IntWritable(keyint), new IntWritable(count));
    }
    public void close() {
    }
  }

  /**
   * The Merge Job is a really simple one.  It takes in
   * an int/int key-value set, and emits the same set.
   * But it merges identical keys by adding their values.
   *
   * Thus, the map() function is just the identity function
   * and reduce() just sums.  Nothing to see here!
   */
  static class MergeMapper
    implements Mapper<IntWritable, IntWritable, IntWritable, IntWritable> {
    
    public void configure(JobConf job) {
    }

    public void map(IntWritable key, IntWritable val,
                    OutputCollector<IntWritable, IntWritable> out,
                    Reporter reporter) throws IOException {
      int keyint = key.get();
      int valint = val.get();

      out.collect(new IntWritable(keyint), new IntWritable(valint));
    }
    public void close() {
    }
  }
  static class MergeReducer
    implements Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
    public void configure(JobConf job) {
    }
        
    public void reduce(IntWritable key, Iterator<IntWritable> it,
                       OutputCollector<IntWritable, IntWritable> out,
                       Reporter reporter) throws IOException {
      int keyint = key.get();
      int total = 0;
      while (it.hasNext()) {
        total += it.next().get();
      }
      out.collect(new IntWritable(keyint), new IntWritable(total));
    }
    public void close() {
    }
  }

  private static int range = 10;
  private static int counts = 100;
  private static Random r = new Random();

  /**
     public TestMapRed(int range, int counts, Configuration conf) throws IOException {
     this.range = range;
     this.counts = counts;
     this.conf = conf;
     }
  **/

  public void testMapred() throws Exception {
    launch();
  }

  private static class MyMap
    implements Mapper<WritableComparable, Text, Text, Text> {
      
    public void configure(JobConf conf) {
    }
      
    public void map(WritableComparable key, Text value,
                    OutputCollector<Text, Text> output,
                    Reporter reporter) throws IOException {
      String str = value.toString().toLowerCase();
      output.collect(new Text(str), value);
    }

    public void close() throws IOException {
    }
  }
    
  private static class MyReduce extends IdentityReducer {
    private JobConf conf;
    private boolean compressInput;
    private TaskAttemptID taskId;
    private boolean first = true;
      
    @Override
    public void configure(JobConf conf) {
      this.conf = conf;
      compressInput = conf.getCompressMapOutput();
      taskId = TaskAttemptID.forName(conf.get("mapred.task.id"));
    }
      
    public void reduce(WritableComparable key, Iterator values,
                       OutputCollector output, Reporter reporter
                       ) throws IOException {
      if (first) {
        first = false;
        MapOutputFile mapOutputFile = new MapOutputFile(taskId.getJobID());
        mapOutputFile.setConf(conf);
        Path input = mapOutputFile.getInputFile(0, taskId);
        FileSystem fs = FileSystem.get(conf);
        assertTrue("reduce input exists " + input, fs.exists(input));
        SequenceFile.Reader rdr = 
          new SequenceFile.Reader(fs, input, conf);
        assertEquals("is reduce input compressed " + input, 
                     compressInput, 
                     rdr.isCompressed());
        rdr.close();          
      }
    }
      
  }

  private static class BadPartitioner
      implements Partitioner<LongWritable,Text> {
    boolean low;
    public void configure(JobConf conf) {
      low = conf.getBoolean("test.testmapred.badpartition", true);
    }
    public int getPartition(LongWritable k, Text v, int numPartitions) {
      return low ? -1 : numPartitions;
    }
  }

  public void testPartitioner() throws Exception {
    JobConf conf = new JobConf(TestMapRed.class);
    conf.setPartitionerClass(BadPartitioner.class);
    FileSystem fs = FileSystem.getLocal(conf);
    Path testdir = new Path(
        System.getProperty("test.build.data","/tmp")).makeQualified(fs);
    Path inFile = new Path(testdir, "blah/blah");
    DataOutputStream f = fs.create(inFile);
    f.writeBytes("blah blah blah\n");
    f.close();
    FileInputFormat.setInputPaths(conf, inFile);
    FileOutputFormat.setOutputPath(conf, new Path(testdir, "out"));
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    // partition too low
    conf.setBoolean("test.testmapred.badpartition", true);
    boolean pass = true;
    try {
      JobClient.runJob(conf);
    } catch (IOException e) {
      pass = false;
    }
    assertFalse("should fail for partition < 0", pass);

    // partition too high
    conf.setBoolean("test.testmapred.badpartition", false);
    pass = true;
    try {
      JobClient.runJob(conf);
    } catch (IOException e) {
      pass = false;
    }
    assertFalse("should fail for partition >= numPartitions", pass);
  }

  public static class NullMapper
      implements Mapper<NullWritable,Text,NullWritable,Text> {
    public void map(NullWritable key, Text val,
        OutputCollector<NullWritable,Text> output, Reporter reporter)
        throws IOException {
      output.collect(NullWritable.get(), val);
    }
    public void configure(JobConf conf) { }
    public void close() { }
  }

  public void testNullKeys() throws Exception {
    JobConf conf = new JobConf(TestMapRed.class);
    FileSystem fs = FileSystem.getLocal(conf);
    Path testdir = new Path(
        System.getProperty("test.build.data","/tmp")).makeQualified(fs);
    fs.delete(testdir, true);
    Path inFile = new Path(testdir, "nullin/blah");
    SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile,
        NullWritable.class, Text.class, SequenceFile.CompressionType.NONE);
    Text t = new Text();
    t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t);
    t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t);
    t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t);
    t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t);
    t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t);
    t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t);
    t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t);
    t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t);
    w.close();
    FileInputFormat.setInputPaths(conf, inFile);
    FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout"));
    conf.setMapperClass(NullMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setNumReduceTasks(1);

    JobClient.runJob(conf);

    SequenceFile.Reader r = new SequenceFile.Reader(fs,
        new Path(testdir, "nullout/part-00000"), conf);
    String m = "AAAAAAAAAAAAAA";
    for (int i = 1; r.next(NullWritable.get(), t); ++i) {
      assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString()));
      m = m.replace((char)('A' + i - 1), (char)('A' + i));
    }
  }

  private void checkCompression(boolean compressMapOutputs,
                                CompressionType redCompression,
                                boolean includeCombine
                                ) throws Exception {
    JobConf conf = new JobConf(TestMapRed.class);
    Path testdir = new Path("build/test/test.mapred.compress");
    Path inDir = new Path(testdir, "in");
    Path outDir = new Path(testdir, "out");
    FileSystem fs = FileSystem.get(conf);
    fs.delete(testdir, true);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setMapperClass(MyMap.class);
    conf.setReducerClass(MyReduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    if (includeCombine) {
      conf.setCombinerClass(IdentityReducer.class);
    }
    conf.setCompressMapOutput(compressMapOutputs);
    SequenceFileOutputFormat.setOutputCompressionType(conf, redCompression);
    try {
      if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
      }
      if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
      }
      Path inFile = new Path(inDir, "part0");
      DataOutputStream f = fs.create(inFile);
      f.writeBytes("Owen was here\n");
      f.writeBytes("Hadoop is fun\n");
      f.writeBytes("Is this done, yet?\n");
      f.close();
      RunningJob rj = JobClient.runJob(conf);
      assertTrue("job was complete", rj.isComplete());
      assertTrue("job was successful", rj.isSuccessful());
      Path output = new Path(outDir,
                             Task.getOutputName(0));
      assertTrue("reduce output exists " + output, fs.exists(output));
      SequenceFile.Reader rdr = 
        new SequenceFile.Reader(fs, output, conf);
      assertEquals("is reduce output compressed " + output, 
                   redCompression != CompressionType.NONE, 
                   rdr.isCompressed());
      rdr.close();
    } finally {
      fs.delete(testdir, true);
    }
  }
    
  public void testCompression() throws Exception {
    EnumSet<SequenceFile.CompressionType> seq =
      EnumSet.allOf(SequenceFile.CompressionType.class);
    for (CompressionType redCompression : seq) {
      for(int combine=0; combine < 2; ++combine) {
        checkCompression(false, redCompression, combine == 1);
        checkCompression(true, redCompression, combine == 1);
      }
    }
  }
    
    
  /**
   * 
   */
  public static void launch() throws Exception {
    //
    // Generate distribution of ints.  This is the answer key.
    //
    JobConf conf = new JobConf(TestMapRed.class);
    int countsToGo = counts;
    int dist[] = new int[range];
    for (int i = 0; i < range; i++) {
      double avgInts = (1.0 * countsToGo) / (range - i);
      dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
      countsToGo -= dist[i];
    }
    if (countsToGo > 0) {
      dist[dist.length-1] += countsToGo;
    }

    //
    // Write the answer key to a file.  
    //
    FileSystem fs = FileSystem.get(conf);
    Path testdir = new Path("mapred.loadtest");
    if (!fs.mkdirs(testdir)) {
      throw new IOException("Mkdirs failed to create " + testdir.toString());
    }

    Path randomIns = new Path(testdir, "genins");
    if (!fs.mkdirs(randomIns)) {
      throw new IOException("Mkdirs failed to create " + randomIns.toString());
    }

    Path answerkey = new Path(randomIns, "answer.key");
    SequenceFile.Writer out = 
      SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class,
                                IntWritable.class, 
                                SequenceFile.CompressionType.NONE);
    try {
      for (int i = 0; i < range; i++) {
        out.append(new IntWritable(i), new IntWritable(dist[i]));
      }
    } finally {
      out.close();
    }
    //printFiles(randomIns, conf);

    //
    // Now we need to generate the random numbers according to
    // the above distribution.
    //
    // We create a lot of map tasks, each of which takes at least
    // one "line" of the distribution.  (That is, a certain number
    // X is to be generated Y number of times.)
    //
    // A map task emits Y key/val pairs.  The val is X.  The key
    // is a randomly-generated number.
    //
    // The reduce task gets its input sorted by key.  That is, sorted
    // in random order.  It then emits a single line of text that
    // for the given values.  It does not emit the key.
    //
    // Because there's just one reduce task, we emit a single big
    // file of random numbers.
    //
    Path randomOuts = new Path(testdir, "genouts");
    fs.delete(randomOuts, true);


    JobConf genJob = new JobConf(conf, TestMapRed.class);
    FileInputFormat.setInputPaths(genJob, randomIns);
    genJob.setInputFormat(SequenceFileInputFormat.class);
    genJob.setMapperClass(RandomGenMapper.class);

    FileOutputFormat.setOutputPath(genJob, randomOuts);
    genJob.setOutputKeyClass(IntWritable.class);
    genJob.setOutputValueClass(IntWritable.class);
    genJob.setOutputFormat(TextOutputFormat.class);
    genJob.setReducerClass(RandomGenReducer.class);
    genJob.setNumReduceTasks(1);

    JobClient.runJob(genJob);
    //printFiles(randomOuts, conf);

    //
    // Next, we read the big file in and regenerate the 
    // original map.  It's split into a number of parts.
    // (That number is 'intermediateReduces'.)
    //
    // We have many map tasks, each of which read at least one
    // of the output numbers.  For each number read in, the
    // map task emits a key/value pair where the key is the
    // number and the value is "1".
    //
    // We have a single reduce task, which receives its input
    // sorted by the key emitted above.  For each key, there will
    // be a certain number of "1" values.  The reduce task sums
    // these values to compute how many times the given key was
    // emitted.
    //
    // The reduce task then emits a key/val pair where the key
    // is the number in question, and the value is the number of
    // times the key was emitted.  This is the same format as the
    // original answer key (except that numbers emitted zero times
    // will not appear in the regenerated key.)  The answer set
    // is split into a number of pieces.  A final MapReduce job
    // will merge them.
    //
    // There's not really a need to go to 10 reduces here 
    // instead of 1.  But we want to test what happens when
    // you have multiple reduces at once.
    //
    int intermediateReduces = 10;
    Path intermediateOuts = new Path(testdir, "intermediateouts");
    fs.delete(intermediateOuts, true);
    JobConf checkJob = new JobConf(conf, TestMapRed.class);
    FileInputFormat.setInputPaths(checkJob, randomOuts);
    checkJob.setInputFormat(TextInputFormat.class);
    checkJob.setMapperClass(RandomCheckMapper.class);

    FileOutputFormat.setOutputPath(checkJob, intermediateOuts);
    checkJob.setOutputKeyClass(IntWritable.class);
    checkJob.setOutputValueClass(IntWritable.class);
    checkJob.setOutputFormat(MapFileOutputFormat.class);
    checkJob.setReducerClass(RandomCheckReducer.class);
    checkJob.setNumReduceTasks(intermediateReduces);

    JobClient.runJob(checkJob);
    //printFiles(intermediateOuts, conf); 

    //
    // OK, now we take the output from the last job and
    // merge it down to a single file.  The map() and reduce()
    // functions don't really do anything except reemit tuples.
    // But by having a single reduce task here, we end up merging
    // all the files.
    //
    Path finalOuts = new Path(testdir, "finalouts");
    fs.delete(finalOuts, true);
    JobConf mergeJob = new JobConf(conf, TestMapRed.class);
    FileInputFormat.setInputPaths(mergeJob, intermediateOuts);
    mergeJob.setInputFormat(SequenceFileInputFormat.class);
    mergeJob.setMapperClass(MergeMapper.class);
        
    FileOutputFormat.setOutputPath(mergeJob, finalOuts);
    mergeJob.setOutputKeyClass(IntWritable.class);
    mergeJob.setOutputValueClass(IntWritable.class);
    mergeJob.setOutputFormat(SequenceFileOutputFormat.class);
    mergeJob.setReducerClass(MergeReducer.class);
    mergeJob.setNumReduceTasks(1);
        
    JobClient.runJob(mergeJob);
    //printFiles(finalOuts, conf); 
 
    //
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    //
    boolean success = true;
    Path recomputedkey = new Path(finalOuts, "part-00000");
    SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
    int totalseen = 0;
    try {
      IntWritable key = new IntWritable();
      IntWritable val = new IntWritable();            
      for (int i = 0; i < range; i++) {
        if (dist[i] == 0) {
          continue;
        }
        if (!in.next(key, val)) {
          System.err.println("Cannot read entry " + i);
          success = false;
          break;
        } else {
          if (!((key.get() == i) && (val.get() == dist[i]))) {
            System.err.println("Mismatch!  Pos=" + key.get() + ", i=" + i + 
                               ", val=" + val.get() + ", dist[i]=" + dist[i]);
            success = false;
          }
          totalseen += val.get();
        }
      }
      if (success) {
        if (in.next(key, val)) {
          System.err.println("Unnecessary lines in recomputed key!");
          success = false;
        }
      }
    } finally {
      in.close();
    }
    int originalTotal = 0;
    for (int i = 0; i < dist.length; i++) {
      originalTotal += dist[i];
    }
    System.out.println("Original sum: " + originalTotal);
    System.out.println("Recomputed sum: " + totalseen);

    //
    // Write to "results" whether the test succeeded or not.
    //
    Path resultFile = new Path(testdir, "results");
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
    try {
      bw.write("Success=" + success + "\n");
      System.out.println("Success=" + success);
    } finally {
      bw.close();
    }
    assertTrue("testMapRed failed", success);
    fs.delete(testdir, true);
  }

  private static void printTextFile(FileSystem fs, Path p) throws IOException {
    BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p)));
    String line;
    while ((line = in.readLine()) != null) {
      System.out.println("  Row: " + line);
    }
    in.close();
  }

  private static void printSequenceFile(FileSystem fs, Path p, 
                                        Configuration conf) throws IOException {
    SequenceFile.Reader r = new SequenceFile.Reader(fs, p, conf);
    Object key = null;
    Object value = null;
    while ((key = r.next(key)) != null) {
      value = r.getCurrentValue(value);
      System.out.println("  Row: " + key + ", " + value);
    }
    r.close();    
  }

  private static boolean isSequenceFile(FileSystem fs,
                                        Path f) throws IOException {
    DataInputStream in = fs.open(f);
    byte[] seq = "SEQ".getBytes();
    for(int i=0; i < seq.length; ++i) {
      if (seq[i] != in.read()) {
        return false;
      }
    }
    return true;
  }

  private static void printFiles(Path dir, 
                                 Configuration conf) throws IOException {
    FileSystem fs = dir.getFileSystem(conf);
    for(FileStatus f: fs.listStatus(dir)) {
      System.out.println("Reading " + f.getPath() + ": ");
      if (f.isDir()) {
        System.out.println("  it is a map file.");
        printSequenceFile(fs, new Path(f.getPath(), "data"), conf);
      } else if (isSequenceFile(fs, f.getPath())) {
        System.out.println("  it is a sequence file.");
        printSequenceFile(fs, f.getPath(), conf);
      } else {
        System.out.println("  it is a text file.");
        printTextFile(fs, f.getPath());
      }
    }
  }

  /**
   * Launches all the tasks in order.
   */
  public static void main(String[] argv) throws Exception {
    if (argv.length < 2) {
      System.err.println("Usage: TestMapRed <range> <counts>");
      System.err.println();
      System.err.println("Note: a good test will have a <counts> value that is substantially larger than the <range>");
      return;
    }

    int i = 0;
    range = Integer.parseInt(argv[i++]);
    counts = Integer.parseInt(argv[i++]);
    launch();
  }
    
  public void testSmallInput(){
    runJob(100);
  }

  public void testBiggerInput(){
    runJob(1000);
  }

  public void runJob(int items) {
    try {
      JobConf conf = new JobConf(TestMapRed.class);
      Path testdir = new Path("build/test/test.mapred.spill");
      Path inDir = new Path(testdir, "in");
      Path outDir = new Path(testdir, "out");
      FileSystem fs = FileSystem.get(conf);
      fs.delete(testdir, true);
      conf.setInt("io.sort.mb", 1);
      conf.setInputFormat(SequenceFileInputFormat.class);
      FileInputFormat.setInputPaths(conf, inDir);
      FileOutputFormat.setOutputPath(conf, outDir);
      conf.setMapperClass(IdentityMapper.class);
      conf.setReducerClass(IdentityReducer.class);
      conf.setOutputKeyClass(Text.class);
      conf.setOutputValueClass(Text.class);
      conf.setOutputFormat(SequenceFileOutputFormat.class);
      if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
      }
      if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
      }
      Path inFile = new Path(inDir, "part0");
      SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile,
                                                             Text.class, Text.class);

      StringBuffer content = new StringBuffer();

      for (int i = 0; i < 1000; i++) {
        content.append(i).append(": This is one more line of content\n");
      }

      Text text = new Text(content.toString());

      for (int i = 0; i < items; i++) {
        writer.append(new Text("rec:" + i), text);
      }
      writer.close();

      JobClient.runJob(conf);
    } catch (Exception e) {
      fail("Threw exception:" + e);
    }
  }
}