/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.mapreduce.simple; import static org.hamcrest.Matchers.*; import static org.junit.Assert.*; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.PrintWriter; import java.text.MessageFormat; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Map; import java.util.Scanner; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import com.asakusafw.runtime.windows.WindowsSupport; /** * Test for {@link SimpleJobRunner}. */ public class SimpleJobRunnerTest { /** * Windows platform support. */ @ClassRule public static final WindowsSupport WINDOWS_SUPPORT = new WindowsSupport(); /** * A temporary folder. */ @Rule public final TemporaryFolder folder = new TemporaryFolder(); /** * Test for map only job. * @throws Exception if failed */ @Test public void map_only() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "Hello, world!"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(trimHead(read(outputDir)), is(set("Hello, world!"))); } /** * Test for map-reduce job. * @throws Exception if failed */ @Test public void map_reduce() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setSortComparatorClass(Text.Comparator.class); job.setGroupingComparatorClass(Text.Comparator.class); job.setReducerClass(WordCountReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, new String[] { "a b c d", "a a b c", "c", }); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", }))); } /** * Simple stress testing. * @throws Exception if failed */ @Test public void map_only_stress() throws Exception { int count = 50; map_only(); long t0 = System.currentTimeMillis(); for (int i = 0; i < count; i++) { map_only(); } long t1 = System.currentTimeMillis(); System.out.println(MessageFormat.format( "{0} map_only: {1}ms ({2}ms/attempt)", count, t1 - t0, (t1 - t0) / count)); } /** * Simple stress testing. * @throws Exception if failed */ @Test public void map_reduce_stress() throws Exception { int count = 50; map_reduce(); long t0 = System.currentTimeMillis(); for (int i = 0; i < count; i++) { map_reduce(); } long t1 = System.currentTimeMillis(); System.out.println(MessageFormat.format( "{0} map_reduce: {1}ms ({2}ms/attempt)", count, t1 - t0, (t1 - t0) / count)); } /** * Test for wrong job. * @throws Exception if failed */ @Test public void exception() throws Exception { Job job = newJob(); job.setJobName("w/ exception"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(InvalidMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "testing"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(false)); } private Job newJob() throws IOException { Job job = Job.getInstance(new Configuration()); job.getConfiguration().setInt(SimpleJobRunner.KEY_BUFFER_SIZE, 16 * 1024 * 1024); job.getConfiguration().set( SimpleJobRunner.KEY_TEMPORARY_LOCATION, new File(folder.getRoot(), "spill-out").getAbsolutePath()); return job; } private Set<String> set(String... values) { return new LinkedHashSet<>(Arrays.asList(values)); } private Map<String, String> map(String... keyValuePairs) { assert keyValuePairs.length % 2 == 0; Map<String, String> results = new LinkedHashMap<>(); for (int i = 0; i < keyValuePairs.length; i += 2) { results.put(keyValuePairs[i + 0], keyValuePairs[i + 1]); } return results; } private Set<String> trimHead(Set<String> values) { Set<String> results = new LinkedHashSet<>(); for (String string : values) { int index = string.indexOf('\t'); if (index >= 0) { results.add(string.substring(index + 1)); } else { results.add(string); } } return results; } private Map<String, String> toMap(Set<String> values) { Map<String, String> results = new LinkedHashMap<>(); for (String string : values) { int index = string.indexOf('\t'); assertThat(string, index, greaterThanOrEqualTo(0)); String key = string.substring(0, index); assertThat(results, not(hasKey(key))); results.put(key, string.substring(index + 1)); } return results; } private void write(File file, String... lines) throws IOException { try (PrintWriter writer = new PrintWriter(file, "UTF-8")) { for (String line : lines) { writer.println(line); } } } private Set<String> read(File file) throws IOException { return read(file, pathname -> { String name = pathname.getName(); if (name.startsWith(".") || name.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) { return false; } return true; }); } private Set<String> read(File file, FileFilter filter) throws IOException { if (filter.accept(file) == false) { return Collections.emptySet(); } Set<String> results = new LinkedHashSet<>(); if (file.isDirectory()) { for (File f : file.listFiles()) { results.addAll(read(f, filter)); } } else { try (Scanner scanner = new Scanner(file, "UTF-8")) { while (scanner.hasNextLine()) { String line = scanner.nextLine().trim(); if (line.isEmpty() == false) { results.add(line); } } } } return results; } /** * through. */ public static final class SimpleMapper extends Mapper<LongWritable, Text, LongWritable, Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); } } /** * tokenize. */ public static final class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> { LongWritable one = new LongWritable(1); Text out = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { for (String token : value.toString().split("\\s+")) { if (token.isEmpty()) { continue; } out.set(token); context.write(out, one); } } } /** * aggregate. */ public static final class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> { LongWritable count = new LongWritable(1); @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long total = 0; for (LongWritable value : values) { total += value.get(); } count.set(total); context.write(key, count); } } /** * raise I/O error. */ public static final class InvalidMapper extends Mapper<LongWritable, Text, LongWritable, Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { throw new IOException(); } } }