/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.stage.input; import static org.hamcrest.Matchers.*; import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TreeSet; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.hamcrest.BaseMatcher; import org.hamcrest.Description; import org.hamcrest.Matcher; import org.junit.Test; import com.asakusafw.runtime.stage.input.StageInputSplit.Source; /** * Test for {@link DefaultSplitCombiner}. */ public class DefaultSplitCombinerTest { /** * Simple testing. * @throws Exception if failed */ @Test public void simple() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 1, list(split(1, 1, "a"))); assertThat(combined.size(), is(1)); assertSan(combined); } /** * with single slot. * @throws Exception if failed */ @Test public void single_slot() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 1, list(split(1, 1, "a"), split(2, 1, "b"))); assertThat(combined.get(0).getLocations(), is(locations("a", "b"))); assertThat(combined.size(), is(1)); assertSan(combined); } /** * with over slot. * @throws Exception if failed */ @Test public void over_slot() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 10, list(split(1, 1, "a"), split(2, 1, "b"))); assertThat(combined.size(), is(2)); assertSan(combined); } /** * with tiny inputs. * @throws Exception if failed */ @Test public void tiny() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 10, 10L, list( split(0, 1, "a"), split(1, 1, "a"), split(2, 1, "a"), split(3, 1, "b"), split(4, 1, "b"), split(5, 1, "b"), split(6, 1, "b"), split(7, 1, "b"), split(8, 1, "b"), split(9, 1, "b"))); assertThat(combined.size(), is(1)); assertSan(combined); } /** * with non-tiny inputs. * @throws Exception if failed */ @Test public void tiny_over() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 10, 9L, list( split(0, 1, "a"), split(1, 1, "a"), split(2, 1, "a"), split(3, 1, "b"), split(4, 1, "b"), split(5, 1, "b"), split(6, 1, "b"), split(7, 1, "b"), split(8, 1, "b"), split(9, 1, "b"))); assertThat(combined.size(), is(10)); assertSan(combined); } /** * with simple GA. * @throws Exception if failed */ @Test public void ga_simple() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 2, list( split(1, 1, "a"), split(2, 1, "a"), split(3, 1, "b"), split(4, 1, "b"))); assertThat(combined.size(), is(2)); assertSan(combined); StageInputSplit tag1 = find(combined, 1); assertTags(tag1, 1, 2); StageInputSplit tag3 = find(combined, 3); assertTags(tag3, 3, 4); } /** * with simple GA. * @throws Exception if failed */ @Test public void ga_nolocation() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 2, list( split(1, 1, (String[]) null), split(2, 2, (String[]) null), split(3, 3, (String[]) null), split(4, 6, (String[]) null))); assertThat(combined.size(), is(2)); assertSan(combined); StageInputSplit tag1 = find(combined, 1); assertTags(tag1, 1, 2, 3); StageInputSplit tag4 = find(combined, 4); assertTags(tag4, 4); } /** * with simple GA. * @throws Exception if failed */ @Test public void ga_minimize() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 2, list( split(1, 100, "a"), split(2, 100, "b"), split(3, 100, "c"), split(4, 100, "d"))); assertThat(combined.size(), is(2)); assertSan(combined); assertThat(combined.get(0).getSources().size(), is(2)); assertThat(combined.get(1).getSources().size(), is(2)); } /** * with simple GA. * @throws Exception if failed */ @Test public void ga_locality() throws Exception { DefaultSplitCombiner combiner = new DefaultSplitCombiner(); List<StageInputSplit> combined = combine(combiner, 2, list( split(1, 100, "a"), split(2, 100, "b"), split(3, 1, "c"), split(4, 1, "d"))); assertThat(combined.size(), is(2)); assertSan(combined); assertThat(find(combined, 1), is(not(find(combined, 2)))); } /** * with simple GA. * @throws Exception if failed */ @Test public void ga_many() throws Exception { String[][] locations = { { }, { "a", "b" }, { "b", "c" }, { "a" }, { "c" }, { "b", "c" }, { "d" }, { "e", "f" }, { "a", "g" }, }; List<StageInputSplit> splits = new ArrayList<>(); long total = 0; for (int i = 0; i < 1000; i++) { long size = i * 10 + 100; splits.add(split(i, size, locations[i % locations.length])); total += size; } DefaultSplitCombiner combiner = new DefaultSplitCombiner(); for (int i = 1; i < 12; i += 2) { int slots = i * 5; int prefSlots = i * 10 / 9; List<StageInputSplit> combined = combine(combiner, slots, splits); assertThat(combined.size(), is(greaterThan(prefSlots))); assertSan(combined); long prefMaxSize = total * 2 / prefSlots; for (StageInputSplit split : combined) { assertThat(split.getLength(), is(lessThan(prefMaxSize))); } } } private List<StageInputSplit> combine( DefaultSplitCombiner combiner, int slots, List<StageInputSplit> splits) throws IOException, InterruptedException { return combiner.combine( new DefaultSplitCombiner.Configuration() .withSlotsPerInput(slots) .withGenerations(1000) .withNonLocalPenaltyRatio(10), splits); } private List<StageInputSplit> combine( DefaultSplitCombiner combiner, int slots, long limit, List<StageInputSplit> splits) throws IOException, InterruptedException { return combiner.combine( new DefaultSplitCombiner.Configuration() .withSlotsPerInput(slots) .withTinyLimit(limit), splits); } private void assertSan(List<StageInputSplit> splits) { Set<Integer> saw = new HashSet<>(); for (StageInputSplit stage : splits) { for (Source source : stage.getSources()) { MockInputSplit split = (MockInputSplit) source.getSplit(); assertThat(saw, not(hasItem(split.tag))); saw.add(split.tag); } } } private StageInputSplit find(List<StageInputSplit> list, int tag) { for (StageInputSplit stage : list) { for (Source source : stage.getSources()) { MockInputSplit mock = (MockInputSplit) source.getSplit(); if (mock.tag == tag) { return stage; } } } throw new AssertionError(tag); } private void assertTags(StageInputSplit split, int... tags) { Set<Integer> expected = new TreeSet<>(); for (int tag : tags) { expected.add(tag); } Set<Integer> actual = new TreeSet<>(); for (Source source : split.getSources()) { MockInputSplit mock = (MockInputSplit) source.getSplit(); actual.add(mock.tag); } assertThat(actual, is(expected)); } private Matcher<String[]> locations(String... locations) { Set<String> set = new TreeSet<>(); Collections.addAll(set, locations); return new BaseMatcher<String[]>() { @Override public boolean matches(Object arg) { String[] actualArray = (String[]) arg; Set<String> actual = new TreeSet<>(); if (actualArray != null) { Collections.addAll(actual, actualArray); } return set.equals(actual); } @Override public void describeTo(Description desc) { desc.appendValue(set); } }; } private List<StageInputSplit> list(StageInputSplit... splits) { return Arrays.asList(splits); } private StageInputSplit split(int tag, long length, String... locations) { Class<? extends Mapper<?, ?, ?, ?>> mapper = A.class; return split(tag, mapper, length, locations); } private StageInputSplit split(int tag, Class<? extends Mapper<?, ?, ?, ?>> mapper, long length, String... locations) { InputSplit split = new MockInputSplit(tag, length, locations); return new StageInputSplit(mapper, Collections.singletonList(new StageInputSplit.Source(split, F.class))); } private static final class A extends Mapper<Object, Object, Object, Object> { // nothing } private static final class F extends InputFormat<Object, Object> { @Override public List<InputSplit> getSplits(JobContext context) { return null; } @Override public RecordReader<Object, Object> createRecordReader(InputSplit split, TaskAttemptContext context) { return null; } } }