/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStreamReader; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.examples.MultiFileWordCount; import org.apache.hadoop.examples.WordCount; import org.apache.hadoop.examples.WordCount.IntSumReducer; import org.apache.hadoop.examples.WordCount.TokenizerMapper; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter; import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.ToolRunner; /** * A JUnit test to test min map-reduce cluster with local file system. */ public class TestMapReduceLocal extends TestCase { private static Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data","/tmp")); private static Configuration conf = new Configuration(); private static FileSystem localFs; static { try { localFs = FileSystem.getLocal(conf); } catch (IOException io) { throw new RuntimeException("problem getting local fs", io); } } public static Path writeFile(String name, String data) throws IOException { Path file = new Path(TEST_ROOT_DIR + "/" + name); localFs.delete(file, false); DataOutputStream f = localFs.create(file); f.write(data.getBytes()); f.close(); return file; } public static String readFile(String name) throws IOException { DataInputStream f = localFs.open(new Path(TEST_ROOT_DIR + "/" + name)); BufferedReader b = new BufferedReader(new InputStreamReader(f)); StringBuilder result = new StringBuilder(); String line = b.readLine(); while (line != null) { result.append(line); result.append('\n'); line = b.readLine(); } b.close(); return result.toString(); } public void testWithLocal() throws Exception { MiniMRCluster mr = null; try { mr = new MiniMRCluster(2, "file:///", 3); Configuration conf = mr.createJobConf(); runWordCount(conf); runMultiFileWordCount(conf); } finally { if (mr != null) { mr.shutdown(); } } } public static class TrackingTextInputFormat extends TextInputFormat { public static class MonoProgressRecordReader extends LineRecordReader { private float last = 0.0f; private boolean progressCalled = false; @Override public float getProgress() throws IOException { progressCalled = true; final float ret = super.getProgress(); assertTrue("getProgress decreased", ret >= last); last = ret; return ret; } @Override public synchronized void close() throws IOException { assertTrue("getProgress never called", progressCalled); super.close(); } } @Override public RecordReader<LongWritable, Text> createRecordReader( InputSplit split, TaskAttemptContext context) { return new MonoProgressRecordReader(); } } private void runWordCount(Configuration conf ) throws IOException, InterruptedException, ClassNotFoundException { final String COUNTER_GROUP = "org.apache.hadoop.mapreduce.TaskCounter"; localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true); localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true); writeFile("in/part1", "this is a test\nof word count test\ntest\n"); writeFile("in/part2", "more test"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TrackingTextInputFormat.class); FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in")); FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out")); assertTrue(job.waitForCompletion(false)); String out = readFile("out/part-r-00000"); System.out.println(out); assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", out); Counters ctrs = job.getCounters(); System.out.println("Counters: " + ctrs); long mapIn = ctrs.findCounter(FileInputFormatCounter.BYTES_READ).getValue(); assertTrue(mapIn != 0); long combineIn = ctrs.findCounter(COUNTER_GROUP, "COMBINE_INPUT_RECORDS").getValue(); long combineOut = ctrs.findCounter(COUNTER_GROUP, "COMBINE_OUTPUT_RECORDS").getValue(); long reduceIn = ctrs.findCounter(COUNTER_GROUP, "REDUCE_INPUT_RECORDS").getValue(); long mapOut = ctrs.findCounter(COUNTER_GROUP, "MAP_OUTPUT_RECORDS").getValue(); long reduceOut = ctrs.findCounter(COUNTER_GROUP, "REDUCE_OUTPUT_RECORDS").getValue(); long reduceGrps = ctrs.findCounter(COUNTER_GROUP, "REDUCE_INPUT_GROUPS").getValue(); long mergedMapOutputs = ctrs.findCounter(COUNTER_GROUP, "MERGED_MAP_OUTPUTS").getValue(); long shuffledMaps = ctrs.findCounter(COUNTER_GROUP, "SHUFFLED_MAPS").getValue(); assertEquals("map out = combine in", mapOut, combineIn); assertEquals("combine out = reduce in", combineOut, reduceIn); assertTrue("combine in > combine out", combineIn > combineOut); assertEquals("reduce groups = reduce out", reduceGrps, reduceOut); assertEquals("Mismatch in mergedMapOutputs", mergedMapOutputs, 2); assertEquals("Mismatch in shuffledMaps", shuffledMaps, 2); String group = "Random Group"; CounterGroup ctrGrp = ctrs.getGroup(group); assertEquals(0, ctrGrp.size()); } public void runMultiFileWordCount(Configuration conf) throws Exception { localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true); localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true); writeFile("in/part1", "this is a test\nof " + "multi file word count test\ntest\n"); writeFile("in/part2", "more test"); int ret = ToolRunner.run(conf, new MultiFileWordCount(), new String[] {TEST_ROOT_DIR + "/in", TEST_ROOT_DIR + "/out"}); assertTrue("MultiFileWordCount failed", ret == 0); String out = readFile("out/part-r-00000"); System.out.println(out); assertEquals("a\t1\ncount\t1\nfile\t1\nis\t1\n" + "more\t1\nmulti\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", out); } }