/**
*
*/
package org.apache.hadoop.mapred.lib;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.tools.HadoopArchives;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import junit.framework.TestCase;
public class TestHarWithCombineFileInputFormat extends TestCase {
private Path hdfsInputPath;
private Path archiveInputPath;
private MiniDFSCluster dfscluster;
private MiniMRCluster mapred;
private FileSystem fs;
private Path filea, fileb;
private Path archiveDir;
private JobConf conf;
private class DummyInputFormat extends CombineFileInputFormat<Text, Text> {
@Override
public RecordReader<Text,Text> getRecordReader(InputSplit split, JobConf job
, Reporter reporter) throws IOException {
return null;
}
}
protected void setUp() throws Exception {
super.setUp();
conf = new JobConf();
dfscluster = new MiniDFSCluster(conf, 1, true, null);
fs = dfscluster.getFileSystem();
mapred = new MiniMRCluster(1, fs.getUri().toString(), 1);
hdfsInputPath = new Path(fs.getHomeDirectory(), "test");
archiveDir = new Path(fs.getHomeDirectory(), "test-archive");
filea = new Path(hdfsInputPath, "a");
fileb = new Path(hdfsInputPath, "b");
// Create the following directory structure
// ~/test/a
// ~/test/b/
// ~/test-archive/foo.har/a (in HAR)
// ~/test-archive/foo.har/b (in HAR)
fs.mkdirs(hdfsInputPath);
FSDataOutputStream out = fs.create(filea);
out.write("a".getBytes());
out.close();
out = fs.create(fileb);
out.write("b".getBytes());
out.close();
HadoopArchives har = new HadoopArchives(conf);
String archiveName = "foo.har";
String[] args = new String[5];
args[0] = "-archiveName";
args[1] = "foo.har";
args[2] = "-p";
args[3] = hdfsInputPath.toString();
args[4] = archiveDir.toString();
int ret = ToolRunner.run(har, args);
assertTrue("Failed to create HAR", ret == 0);
archiveInputPath =
new Path("har://" + archiveDir.toUri().getPath(), archiveName);
}
protected void tearDown() throws Exception {
try {
if (mapred != null) {
mapred.shutdown();
}
if (dfscluster != null) {
dfscluster.shutdown();
}
} catch(Exception e) {
System.err.println(e);
}
super.tearDown();
}
@SuppressWarnings("deprecation")
public void testGetSplits() throws IOException {
hdfsInputPath.getFileSystem(conf);
DummyInputFormat inFormat = new DummyInputFormat();
DummyInputFormat.setInputPaths(conf, hdfsInputPath, archiveInputPath);
InputSplit[] splits = inFormat.getSplits(conf, 1);
assertTrue("Number of splits is incorrect", splits.length == 1);
}
}