package com.skp.experiment.common.join;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import com.skp.experiment.cf.als.hadoop.ALSMatrixUtil;
import com.skp.experiment.cf.evaluate.hadoop.EvaluatorUtil;
import com.skp.experiment.common.MahoutTestCase;
import com.skp.experiment.common.join.ImprovedRepartitionJoinAndFilterJob.ImprovedRepartitionJoinMapper;
import com.skp.experiment.common.join.ImprovedRepartitionJoinAndFilterJob.ImprovedRepartitionJoinReducer;
public class ImprovedRepartitionJoinAndFilterJobTest extends MahoutTestCase {
private File inputFile;
private File tgtFile;
private File outputDir;
ImprovedRepartitionJoinMapper mapper = new ImprovedRepartitionJoinMapper();
ImprovedRepartitionJoinReducer reducer = new ImprovedRepartitionJoinReducer();
MapDriver<LongWritable, Text, CompositeJoinKey, CompositeJoinValue> mapDriver;
ReduceDriver<CompositeJoinKey, CompositeJoinValue, NullWritable, Text> reduceDriver;
MapReduceDriver<LongWritable, Text, CompositeJoinKey, CompositeJoinValue, NullWritable, Text> mapReduceDriver;
Map<String, Map<String, Boolean>> expected;
@Before
public void setUp() throws Exception {
super.setUp();
mapDriver = new MapDriver<LongWritable, Text, CompositeJoinKey, CompositeJoinValue>();
mapDriver.setMapper(mapper);
reduceDriver = new ReduceDriver<CompositeJoinKey, CompositeJoinValue, NullWritable, Text>();
reduceDriver.setReducer(reducer);
inputFile = getTestTempFile("join_input.txt");
tgtFile = getTestTempFile("join_tgt_input.txt");
initTestData();
expectedOutput();
}
protected void initTestData() throws IOException {
String testString =
"A,I1,0.43,-1,4\n" +
"A,I2,0.32,0,4\n" +
"A,I3,0.23,0,4\n" +
"B,I2,0.32,0,2\n" +
"B,I4,0.21,0,2\n" +
"B,I5,0.12,-1,2\n" +
"B,I6,0.11,-1,2\n" +
"B,I7,0.09,-1,2\n" +
"C,I2,0.32,0,3\n" +
"C,I4,0.21,0,3\n" +
"C,I5,0.12,-1,3\n" +
"C,I6,0.11,2,3\n" +
"C,I7,0.09,-1,3";
String tgtString =
"A,I1\n" +
"B,I3\n" +
"B,I4\n" +
"A,I2\n" +
"C,I2\n";
writeLines(inputFile, testString);
writeLines(tgtFile, tgtString);
}
protected void expectedOutput() {
expected = new HashMap<String, Map<String, Boolean>>();
expected.put("inner", new HashMap<String, Boolean>());
expected.put("outer", new HashMap<String, Boolean>());
expected.put("filter", new HashMap<String, Boolean>());
expected.get("inner").put("A,I1,0.43,-1,4,A", true);
expected.get("inner").put("B,I4,0.21,0,2,B", true);
expected.get("inner").put("A,I2,0.32,0,4,A", true);
expected.get("inner").put("C,I2,0.32,0,3,C", true);
expected.get("outer").put("A,I1,0.43,-1,4,A", true);
expected.get("outer").put("A,I2,0.32,0,4,A", true);
expected.get("outer").put("A,I3,0.23,0,4,null", true);
expected.get("outer").put("B,I2,0.32,0,2,null", true);
expected.get("outer").put("B,I4,0.21,0,2,B", true);
expected.get("outer").put("B,I5,0.12,-1,2,null", true);
expected.get("outer").put("B,I6,0.11,-1,2,null", true);
expected.get("outer").put("B,I7,0.09,-1,2,null", true);
expected.get("outer").put("C,I2,0.32,0,3,C", true);
expected.get("outer").put("C,I4,0.21,0,3,null", true);
expected.get("outer").put("C,I5,0.12,-1,3,null", true);
expected.get("outer").put("C,I6,0.11,2,3,null", true);
expected.get("outer").put("C,I7,0.09,-1,3,null", true);
expected.get("filter").put("A,I3,0.23,0,4", true);
expected.get("filter").put("B,I2,0.32,0,2", true);
expected.get("filter").put("B,I5,0.12,-1,2", true);
expected.get("filter").put("B,I6,0.11,-1,2", true);
expected.get("filter").put("B,I7,0.09,-1,2", true);
expected.get("filter").put("C,I4,0.21,0,3", true);
expected.get("filter").put("C,I5,0.12,-1,3", true);
expected.get("filter").put("C,I6,0.11,2,3", true);
expected.get("filter").put("C,I7,0.09,-1,3", true);
}
@Test
public void ImprovedRepartitionJoinMapperTest() throws Exception {
String[] joinTypes = new String[]{"inner", "filter", "outer"};
String[] mapOnlyOptions = new String[] {"true", "false"};
for (int j = 0; j < mapOnlyOptions.length; j++) {
for (int i = 0; i < joinTypes.length; i++) {
outputDir = getTestTempDir(joinTypes[i] + "_" + mapOnlyOptions[j]);
outputDir.delete();
ImprovedRepartitionJoinAndFilterJob job = new ImprovedRepartitionJoinAndFilterJob();
job.setConf(new Configuration());
job.run(new String[]{
"--input", inputFile.toString(), "--output", outputDir.toString(),
"--srcKeyIndex", "0,1", "--tgtTableOptions",
tgtFile.toString() + ":0,1:0,1:0:" + joinTypes[i], "--defaultValue", "null",
"--mapOnly", mapOnlyOptions[j]
});
Map<String, String> lines =
ALSMatrixUtil.fetchTextFiles(new Path(outputDir.toString()), ",", Arrays.asList(0,1,2,3,4,5), Arrays.asList(0));
assertTrue(expected.get(joinTypes[i]).size() == lines.size());
for (Entry<String, String> line : lines.entrySet()) {
assertTrue(expected.get(joinTypes[i]).containsKey(line.getKey()));
}
}
}
}
}