package hip.ch8.localjobrunner; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.junit.Test; import java.io.DataOutputStream; import java.io.IOException; import java.util.List; import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.assertTrue; public class IdentityTest { @Test public void run() throws Exception { Path inputPath = new Path("/tmp/mrtest/input"); Path outputPath = new Path("/tmp/mrtest/output"); Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } if (fs.exists(inputPath)) { fs.delete(inputPath, true); } fs.mkdirs(inputPath); String input = "foo\tbar"; DataOutputStream file = fs.create(new Path(inputPath, "part-" + 0)); file.writeBytes(input); file.close(); Job job = runJob(conf, inputPath, outputPath); assertTrue(job.isSuccessful()); List<String> lines = IOUtils.readLines(fs.open(new Path(outputPath, "part-r-00000"))); assertEquals(1, lines.size()); String[] parts = StringUtils.split(lines.get(0), "\t"); assertEquals("foo", parts[0]); assertEquals("bar", parts[1]); } public Job runJob(Configuration conf, Path inputPath, Path outputPath) throws ClassNotFoundException, IOException, InterruptedException { Job job = new Job(conf); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(false); return job; } }