package com.skp.experiment.common;
import java.io.File;
import java.util.Arrays;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import com.skp.experiment.cf.als.hadoop.ALSMatrixUtil;
import com.skp.experiment.cf.evaluate.hadoop.EvaluatorUtil;
import com.skp.experiment.common.DistinctColumnValuesJob.DistinctColumnValuesMapper;
import com.skp.experiment.common.DistinctColumnValuesJob.DistinctColumnValuesReducer;
public class DistinctColumnValuesJobTest extends MahoutTestCase {
private File inputFile;
private File outputDir;
DistinctColumnValuesMapper mapper = new DistinctColumnValuesMapper();
DistinctColumnValuesReducer reducer = new DistinctColumnValuesReducer();
MapDriver<LongWritable, Text, Text, NullWritable> mapDriver;
ReduceDriver<Text, NullWritable, NullWritable, Text> reduceDriver;
MapReduceDriver<LongWritable, Text, Text, NullWritable, NullWritable, Text> mapReduceDriver;
@Before
public void setUp() throws Exception {
super.setUp();
mapper = new DistinctColumnValuesMapper();
reducer = new DistinctColumnValuesReducer();
mapDriver = new MapDriver<LongWritable, Text, Text, NullWritable>();
mapDriver.setMapper(mapper);
reduceDriver = new ReduceDriver<Text, NullWritable, NullWritable, Text>();
reduceDriver.setReducer(reducer);
mapReduceDriver = new MapReduceDriver<LongWritable, Text, Text, NullWritable, NullWritable, Text>(mapper, reducer);
inputFile = getTestTempFile("distinct_column_values_job_input.txt");
outputDir = getTestTempDir("distinct_column_values_job_ouput");
outputDir.delete();
}
@Test
public void testMapper() {
String testString = "A,1,0.3,30,XY";
mapDriver.getConfiguration().set(DistinctColumnValuesJob.COLUMN_INDEXS, "0,1,5");
mapDriver.withInput(new LongWritable(), new Text(testString));
mapDriver.withOutput(new Text("A" + DistinctColumnValuesJob.DELIMETER + 0), NullWritable.get());
mapDriver.withOutput(new Text("1" + DistinctColumnValuesJob.DELIMETER + 1), NullWritable.get());
mapDriver.runTest();
}
@Test
public void testJob() throws Exception {
Configuration conf = new Configuration();
String testString = "A,1,0.3,30,XY\nB,2,0.6,23,Z";
writeLines(inputFile, testString);
DistinctColumnValuesJob job = new DistinctColumnValuesJob();
job.setConf(conf);
job.run(new String[] {
"--input", inputFile.toString(), "--output", outputDir.toString(),
"--columnIndexs", "0,1,5"
});
Map<String, String> lines =
ALSMatrixUtil.fetchTextFiles(new Path(outputDir + "/0"), ",", Arrays.asList(0), Arrays.asList(0));
assertTrue(lines.size() == 2);
assertTrue(lines.containsKey("A"));
assertTrue(lines.containsKey("B"));
lines =
ALSMatrixUtil.fetchTextFiles(new Path(outputDir + "/1"), ",", Arrays.asList(0), Arrays.asList(0));
assertTrue(lines.size() == 2);
assertTrue(lines.containsKey("1"));
assertTrue(lines.containsKey("2"));
}
}