TestHarWithCombineFileInputFormat.java example

Explorer
hadoop-20-master
- src
/**
 * 
 */
package org.apache.hadoop.mapred.lib;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.tools.HadoopArchives;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;

import junit.framework.TestCase;


public class TestHarWithCombineFileInputFormat extends TestCase {
  private Path hdfsInputPath;
  private Path archiveInputPath;
  private MiniDFSCluster dfscluster;
  private MiniMRCluster mapred;
  private FileSystem fs;
  private Path filea, fileb;
  private Path archiveDir;
  private JobConf conf;
  
  private class DummyInputFormat extends CombineFileInputFormat<Text, Text> {
    @Override
    public RecordReader<Text,Text> getRecordReader(InputSplit split, JobConf job
        , Reporter reporter) throws IOException {
      return null;
    }
  }
  
  protected void setUp() throws Exception {
    super.setUp();
    conf = new JobConf();
    dfscluster = new MiniDFSCluster(conf, 1, true, null);
    fs = dfscluster.getFileSystem();
    mapred = new MiniMRCluster(1, fs.getUri().toString(), 1);
    
    hdfsInputPath = new Path(fs.getHomeDirectory(), "test"); 
    archiveDir = new Path(fs.getHomeDirectory(), "test-archive");
    
    filea = new Path(hdfsInputPath, "a");
    fileb = new Path(hdfsInputPath, "b");
    
    // Create the following directory structure
    // ~/test/a
    // ~/test/b/
    // ~/test-archive/foo.har/a (in HAR)
    // ~/test-archive/foo.har/b (in HAR)
    
    fs.mkdirs(hdfsInputPath);
    FSDataOutputStream out = fs.create(filea); 
    out.write("a".getBytes());
    out.close();
    out = fs.create(fileb);
    out.write("b".getBytes());
    out.close();
    
    HadoopArchives har = new HadoopArchives(conf);

    String archiveName = "foo.har";
    String[] args = new String[5];
    args[0] = "-archiveName";
    args[1] = "foo.har";
    args[2] = "-p";
    args[3] = hdfsInputPath.toString();
    args[4] = archiveDir.toString();
    int ret = ToolRunner.run(har, args);
    assertTrue("Failed to create HAR", ret == 0);

    archiveInputPath = 
      new Path("har://" + archiveDir.toUri().getPath(), archiveName);
  }  
  
  protected void tearDown() throws Exception {
    try {
      if (mapred != null) {
        mapred.shutdown();
      }
      if (dfscluster != null) {
        dfscluster.shutdown();
      }
    } catch(Exception e) {
      System.err.println(e);
    }
    super.tearDown();
  }
  
  @SuppressWarnings("deprecation")
  public void testGetSplits() throws IOException {
    hdfsInputPath.getFileSystem(conf);
    DummyInputFormat inFormat = new DummyInputFormat();
    DummyInputFormat.setInputPaths(conf, hdfsInputPath, archiveInputPath);
    InputSplit[] splits = inFormat.getSplits(conf, 1);

    assertTrue("Number of splits is incorrect", splits.length == 1);
  }
}