package com.github.elazarl.multireducers; import com.github.elazarl.multireducers.example.*; import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableMultiset; import com.google.common.collect.Multiset; import com.google.common.io.ByteStreams; import com.google.common.io.Files; import com.google.common.io.InputSupplier; import com.google.common.io.Resources; import org.apache.commons.io.filefilter.WildcardFileFilter; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.ToolRunner; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.Scanner; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertThat; /** * Test a full flow of an example MultiJob with the job local runner */ public class MultiJobTest { @Rule public TemporaryFolder folder = new TemporaryFolder(); @Rule public ExpectedException exception = ExpectedException.none(); @Test public void testBadCombinerConfiguration() throws Exception { exception.expect(IllegalArgumentException.class); Job job = new Job(); MultiJob.create(). withMapper(SelectFirstField.class, Text.class, IntWritable.class). withReducer(CountFirstField.class, 1). withCombiner(CountFirstField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); MultiJob.create(). withMapper(SelectSecondField.class, IntWritable.class, IntWritable.class). withReducer(CountSecondField.class, 1). withCombiner(CountFirstField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); } @Test public void testBadReducerConfiguration() throws Exception { exception.expect(IllegalArgumentException.class); Job job = new Job(); MultiJob.create(). withMapper(SelectFirstField.class, Text.class, IntWritable.class). withReducer(CountFirstField.class, 1). withCombiner(CountFirstField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); MultiJob.create(). withMapper(SelectSecondField.class, IntWritable.class, IntWritable.class). withReducer(CountFirstField.class, 1). withCombiner(CountSecondField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); } static class MapperValueText extends Reducer<Object, Text, Object, Object>{ @Override protected void reduce(Object key, Iterable<Text> values, Context context) throws IOException, InterruptedException { } } @Test public void testBadReducerValueConfiguration() throws Exception { Job job = new Job(); MultiJob.create(). withMapper(SelectSecondField.class, IntWritable.class, IntWritable.class). withReducer(MapperValueText.class, 1). withCombiner(CountSecondField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). skipJobVerificationCanCauseRuntimeErrorsIKnowWhatImDoing(). addTo(job); exception.expect(IllegalArgumentException.class); MultiJob.create(). withMapper(SelectFirstField.class, Text.class, IntWritable.class). withReducer(CountFirstField.class, 1). withCombiner(CountFirstField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); MultiJob.create(). withMapper(SelectSecondField.class, IntWritable.class, IntWritable.class). withReducer(MapperValueText.class, 1). withCombiner(CountSecondField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); } static class InheritCountFirstField extends CountFirstField{} @Test public void testBadReducerKeyByInheriance() throws Exception { exception.expect(IllegalArgumentException.class); Job job = new Job(); MultiJob.create(). withMapper(SelectFirstField.class, Text.class, IntWritable.class). withReducer(CountFirstField.class, 1). withCombiner(CountFirstField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); MultiJob.create(). withMapper(SelectSecondField.class, IntWritable.class, IntWritable.class). withReducer(InheritCountFirstField.class, 1). withCombiner(CountSecondField.class). withOutputFormat(TextOutputFormat.class, Text.class, IntWritable.class). addTo(job); } @Test public void testExampleJob() throws Exception { File input = createInputFile(); File output = new File(folder.getRoot(), "output"); int exitCode = ToolRunner.run(new ExampleRunner(), new String[]{"file://" + input.getAbsolutePath(), "file://" + output.getAbsolutePath()}); assertThat(exitCode, is(0)); File[] firstFieldFiles = new File(output, "first").listFiles((FilenameFilter) new WildcardFileFilter("part-r-*")); File[] secondFieldFiles = new File(output, "second").listFiles((FilenameFilter) new WildcardFileFilter("part-r-*")); assertThat(firstFieldFiles.length, is(1)); assertThat(firstFieldFiles[0].length(), greaterThan(0l)); assertThat(secondFieldFiles.length, is(1)); assertThat(secondFieldFiles[0].length(), greaterThan(0l)); Multiset<String> countFirstField = toMap(Files.newInputStreamSupplier(firstFieldFiles[0])); Multiset<String> countSecondField = toMap(Files.newInputStreamSupplier(secondFieldFiles[0])); assertThat(ImmutableMultiset.copyOf(countFirstField), is(new ImmutableMultiset.Builder<String>() .addCopies("john", 2) .add("dough") .add("joe") .add("moe") .addCopies("prefix_john", 2) .add("prefix_dough") .add("prefix_joe") .add("prefix_moe").build())); assertThat(ImmutableMultiset.copyOf(countSecondField), is(new ImmutableMultiset.Builder<String>() .add("120") .addCopies("130", 2) .add("180") .add("190").build())); } public static Multiset<String> toMap(InputSupplier<? extends InputStream> supplier) throws IOException { InputStream input = supplier.getInput(); try { Scanner scanner = new Scanner(input); Multiset<String> m = HashMultiset.create(); while (scanner.hasNext()) { String line = scanner.nextLine(); String[] parts = line.split("\t"); m.add(parts[0], Integer.parseInt(parts[1])); } return m; } finally { input.close(); } } private File createInputFile() throws IOException { File input = folder.newFile("input.txt"); URL inputResource = getClass().getClassLoader().getResource("example_input.txt"); assert(inputResource != null); ByteStreams.copy(Resources.newInputStreamSupplier(inputResource), Files.newOutputStreamSupplier(input)); return input; } }