package com.twitter.elephantbird.util;
import com.twitter.elephantbird.mapreduce.input.combine.CompositeInputSplit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.*;
public class TestSplitUtil {
private Configuration conf;
class DummyInputSplit extends InputSplit {
private final long length;
private final String[] locations;
public DummyInputSplit(long length, String[] locations) {
this.length = length;
this.locations = locations;
}
@Override
public long getLength() {
return length;
}
@Override
public String[] getLocations() {
return locations;
}
}
@Before
public void setUp() throws Exception {
conf = new Configuration();
conf.setLong(SplitUtil.COMBINE_SPLIT_SIZE, 1000);
}
@Test
public void test1() throws IOException, InterruptedException {
List<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(500, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(400, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(400, new String[] { "l1", "l4", "l5" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(result.size(), 2);
int index = 0;
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
if (index == 0) {
Assert.assertEquals(2, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(500, split.getLength(0));
Assert.assertEquals(400, split.getLength(1));
} else {
Assert.assertEquals(1, len);
checkLocations(split.getLocations(), new String[] { "l1", "l4", "l5" });
Assert.assertEquals(400, split.getLength(0));
}
index++;
}
}
@Test
public void test2() throws IOException, InterruptedException {
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(600, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(700, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(800, new String[] { "l1", "l4", "l5" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(result.size(), 3);
int index = 0;
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
if (index == 0) {
checkLocations(split.getLocations(), new String[] { "l1", "l4", "l5" });
Assert.assertEquals(1, len);
Assert.assertEquals(800, split.getLength(0));
} else if (index == 1) {
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(1, len);
Assert.assertEquals(700, split.getLength(0));
} else {
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(1, len);
Assert.assertEquals(600, split.getLength(0));
}
index++;
}
}
@Test
public void test3() throws IOException, InterruptedException {
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(500, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l4", "l5" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(1, result.size());
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
Assert.assertEquals(3, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3", "l4", "l5" });
Assert.assertEquals(500, split.getLength(0));
Assert.assertEquals(200, split.getLength(1));
Assert.assertEquals(100, split.getLength(2));
}
}
@Test
public void test4() throws IOException, InterruptedException {
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(500, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l4", "l5" }));
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(500, new String[] { "l1", "l4", "l5" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(2, result.size());
int idx = 0;
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
if (idx == 0) {
Assert.assertEquals(2, len);
checkLocations(split.getLocations(), new String[] { "l1", "l4", "l5" });
Assert.assertEquals(500, split.getLength(0));
Assert.assertEquals(100, split.getLength(1));
} else {
Assert.assertEquals(4, len);
Assert.assertEquals(500, split.getLength(0));
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(200, split.getLength(1));
Assert.assertEquals(200, split.getLength(2));
Assert.assertEquals(100, split.getLength(3));
}
idx++;
}
}
@Test
public void test5() throws IOException, InterruptedException {
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(600, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(500, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(400, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(300, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l2", "l3" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(3, result.size());
int idx = 0;
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
if (idx == 0) {
Assert.assertEquals(2, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(600, split.getLength(0));
Assert.assertEquals(400, split.getLength(1));
} else if (idx == 1) {
Assert.assertEquals(3, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(500, split.getLength(0));
Assert.assertEquals(300, split.getLength(1));
Assert.assertEquals(200, split.getLength(2));
} else {
Assert.assertEquals(1, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(100, split.getLength(0));
}
idx++;
}
}
@Test
public void test6() throws IOException, InterruptedException {
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(300, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(400, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(500, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(600, new String[] { "l1", "l2", "l3" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(3, result.size());
int idx = 0;
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
if (idx == 0) {
Assert.assertEquals(2, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(600, split.getLength(0));
Assert.assertEquals(400, split.getLength(1));
} else if (idx == 1) {
Assert.assertEquals(3, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(500, split.getLength(0));
Assert.assertEquals(300, split.getLength(1));
Assert.assertEquals(200, split.getLength(2));
} else {
Assert.assertEquals(1, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3" });
Assert.assertEquals(100, split.getLength(0));
}
idx++;
}
}
@Test
public void test7() throws IOException, InterruptedException {
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l1", "l4", "l5" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(result.size(), 1);
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
Assert.assertEquals(3, len);
checkLocations(split.getLocations(), new String[] { "l1", "l2", "l3", "l4", "l5" });
Assert.assertEquals(200, split.getLength(0));
Assert.assertEquals(100, split.getLength(1));
Assert.assertEquals(100, split.getLength(2));
}
}
@Test
public void test8() throws IOException, InterruptedException {
// verify locations in order
ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();
rawSplits.add(new DummyInputSplit(100, new String[] { "l1", "l2", "l3" }));
rawSplits.add(new DummyInputSplit(200, new String[] { "l3", "l4", "l5" }));
rawSplits.add(new DummyInputSplit(400, new String[] { "l5", "l6", "l1" }));
List<CompositeInputSplit> result = SplitUtil.getCombinedCompositeSplits(rawSplits, conf);
Assert.assertEquals(result.size(), 1);
for (CompositeInputSplit split : result) {
int len = split.getSplits().size();
Assert.assertEquals(3, len);
checkLocationOrdering(split.getLocations(), new String[] { "l5", "l3", "l1", "l2", "l4" });
Assert.assertEquals(400, split.getLength(0));
Assert.assertEquals(200, split.getLength(1));
Assert.assertEquals(100, split.getLength(2));
}
}
private void checkLocations(String[] actual, String[] expected) {
Set<String> expectedSet = new HashSet<String>();
Collections.addAll(expectedSet, expected);
int count = 0;
for (String str : actual) {
if (expectedSet.contains(str)) {
count++;
}
}
Assert.assertEquals(count, expected.length);
}
private void checkLocationOrdering(String[] actual, String[] expected) {
actual = Arrays.copyOf(actual, actual.length);
Arrays.sort(actual);
expected = Arrays.copyOf(expected, expected.length);
Arrays.sort(expected);
Assert.assertEquals(expected.length, actual.length);
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(expected[i], actual[i]);
}
}
}