/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.mapreduce.simple;
import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.MessageFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import com.asakusafw.runtime.windows.WindowsSupport;
/**
* Test for {@link SimpleJobRunner}.
*/
public class SimpleJobRunnerTest {
/**
* Windows platform support.
*/
@ClassRule
public static final WindowsSupport WINDOWS_SUPPORT = new WindowsSupport();
/**
* A temporary folder.
*/
@Rule
public final TemporaryFolder folder = new TemporaryFolder();
/**
* Test for map only job.
* @throws Exception if failed
*/
@Test
public void map_only() throws Exception {
Job job = newJob();
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(SimpleMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
File inputDir = folder.newFolder();
File inputFile = new File(inputDir, "input.txt");
write(inputFile, "Hello, world!");
File outputDir = folder.newFolder();
outputDir.delete();
FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
assertThat(new SimpleJobRunner().run(job), is(true));
assertThat(trimHead(read(outputDir)), is(set("Hello, world!")));
}
/**
* Test for map-reduce job.
* @throws Exception if failed
*/
@Test
public void map_reduce() throws Exception {
Job job = newJob();
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setSortComparatorClass(Text.Comparator.class);
job.setGroupingComparatorClass(Text.Comparator.class);
job.setReducerClass(WordCountReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
File inputDir = folder.newFolder();
File inputFile = new File(inputDir, "input.txt");
write(inputFile, new String[] {
"a b c d",
"a a b c",
"c",
});
File outputDir = folder.newFolder();
outputDir.delete();
FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
assertThat(new SimpleJobRunner().run(job), is(true));
assertThat(toMap(read(outputDir)), is(map(new String[] {
"a", "3",
"b", "2",
"c", "3",
"d", "1",
})));
}
/**
* Simple stress testing.
* @throws Exception if failed
*/
@Test
public void map_only_stress() throws Exception {
int count = 50;
map_only();
long t0 = System.currentTimeMillis();
for (int i = 0; i < count; i++) {
map_only();
}
long t1 = System.currentTimeMillis();
System.out.println(MessageFormat.format(
"{0} map_only: {1}ms ({2}ms/attempt)",
count,
t1 - t0,
(t1 - t0) / count));
}
/**
* Simple stress testing.
* @throws Exception if failed
*/
@Test
public void map_reduce_stress() throws Exception {
int count = 50;
map_reduce();
long t0 = System.currentTimeMillis();
for (int i = 0; i < count; i++) {
map_reduce();
}
long t1 = System.currentTimeMillis();
System.out.println(MessageFormat.format(
"{0} map_reduce: {1}ms ({2}ms/attempt)",
count,
t1 - t0,
(t1 - t0) / count));
}
/**
* Test for wrong job.
* @throws Exception if failed
*/
@Test
public void exception() throws Exception {
Job job = newJob();
job.setJobName("w/ exception");
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(InvalidMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
File inputDir = folder.newFolder();
File inputFile = new File(inputDir, "input.txt");
write(inputFile, "testing");
File outputDir = folder.newFolder();
outputDir.delete();
FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
assertThat(new SimpleJobRunner().run(job), is(false));
}
private Job newJob() throws IOException {
Job job = Job.getInstance(new Configuration());
job.getConfiguration().setInt(SimpleJobRunner.KEY_BUFFER_SIZE, 16 * 1024 * 1024);
job.getConfiguration().set(
SimpleJobRunner.KEY_TEMPORARY_LOCATION,
new File(folder.getRoot(), "spill-out").getAbsolutePath());
return job;
}
private Set<String> set(String... values) {
return new LinkedHashSet<>(Arrays.asList(values));
}
private Map<String, String> map(String... keyValuePairs) {
assert keyValuePairs.length % 2 == 0;
Map<String, String> results = new LinkedHashMap<>();
for (int i = 0; i < keyValuePairs.length; i += 2) {
results.put(keyValuePairs[i + 0], keyValuePairs[i + 1]);
}
return results;
}
private Set<String> trimHead(Set<String> values) {
Set<String> results = new LinkedHashSet<>();
for (String string : values) {
int index = string.indexOf('\t');
if (index >= 0) {
results.add(string.substring(index + 1));
} else {
results.add(string);
}
}
return results;
}
private Map<String, String> toMap(Set<String> values) {
Map<String, String> results = new LinkedHashMap<>();
for (String string : values) {
int index = string.indexOf('\t');
assertThat(string, index, greaterThanOrEqualTo(0));
String key = string.substring(0, index);
assertThat(results, not(hasKey(key)));
results.put(key, string.substring(index + 1));
}
return results;
}
private void write(File file, String... lines) throws IOException {
try (PrintWriter writer = new PrintWriter(file, "UTF-8")) {
for (String line : lines) {
writer.println(line);
}
}
}
private Set<String> read(File file) throws IOException {
return read(file, pathname -> {
String name = pathname.getName();
if (name.startsWith(".") || name.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
return false;
}
return true;
});
}
private Set<String> read(File file, FileFilter filter) throws IOException {
if (filter.accept(file) == false) {
return Collections.emptySet();
}
Set<String> results = new LinkedHashSet<>();
if (file.isDirectory()) {
for (File f : file.listFiles()) {
results.addAll(read(f, filter));
}
} else {
try (Scanner scanner = new Scanner(file, "UTF-8")) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine().trim();
if (line.isEmpty() == false) {
results.add(line);
}
}
}
}
return results;
}
/**
* through.
*/
public static final class SimpleMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(key, value);
}
}
/**
* tokenize.
*/
public static final class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
LongWritable one = new LongWritable(1);
Text out = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
for (String token : value.toString().split("\\s+")) {
if (token.isEmpty()) {
continue;
}
out.set(token);
context.write(out, one);
}
}
}
/**
* aggregate.
*/
public static final class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
LongWritable count = new LongWritable(1);
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long total = 0;
for (LongWritable value : values) {
total += value.get();
}
count.set(total);
context.write(key, count);
}
}
/**
* raise I/O error.
*/
public static final class InvalidMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
throw new IOException();
}
}
}