package com.skp.experiment.integration.common; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.mahout.common.Pair; import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterator; import org.junit.Before; import org.junit.Test; import com.skp.experiment.cf.als.hadoop.ALSMatrixUtil; import com.skp.experiment.cf.evaluate.hadoop.EvaluatorUtil; import com.skp.experiment.common.MahoutTestCase; import com.skp.experiment.integeration.common.SequentialIdGeneratorJob; import com.skp.experiment.integeration.common.SequentialIdGeneratorJob.CountPartitionRecordNumMapper; import com.skp.experiment.integeration.common.SequentialIdGeneratorJob.CountPartitionRecordNumReducer; public class SequentialIdGeneratorJobTest extends MahoutTestCase { private File inputFile; private File outputDir; private File tempDir; private Map<String, String> expected; MapDriver<LongWritable, Text, IntWritable, LongWritable> mapDriver; ReduceDriver<IntWritable, LongWritable, IntWritable, LongWritable> reduceDriver; CountPartitionRecordNumMapper mapper = new CountPartitionRecordNumMapper(); CountPartitionRecordNumReducer reducer = new CountPartitionRecordNumReducer(); @Before public void setUp() throws IOException { mapDriver = new MapDriver<LongWritable, Text, IntWritable, LongWritable>(); reduceDriver = new ReduceDriver<IntWritable, LongWritable, IntWritable, LongWritable>(); mapDriver.setMapper(mapper); reduceDriver.setReducer(reducer); inputFile = getTestTempFile("sequential_id_input.txt"); outputDir = getTestTempDir("sequential_id_output"); tempDir = getTestTempDir("sequential_id_tmp"); outputDir.delete(); tempDir.delete(); } private void writeTestCase(File testFile, int startIndex) throws IOException { expected = new HashMap<String, String>(); StringBuffer testStr = new StringBuffer(); int testSize = 10; int offset = 1000; for (int i = 0; i < testSize; i++) { if (i != 0) { testStr.append("\n"); } testStr.append(i + offset); expected.put(String.valueOf(i + startIndex), String.valueOf(i + offset)); } writeLines(testFile, testStr.toString()); } @Test public void testCountPartitionRecordNumMapper() throws IOException { int partitionId = 11111; File recordPath = getTestTempDir("records"); Configuration conf = new Configuration(); conf.setInt("mapred.task.partition", partitionId); conf.set(SequentialIdGeneratorJob.RECORDS_PATH, recordPath.toString()); mapDriver.withConfiguration(conf); mapDriver.withInput(new LongWritable(), new Text("line")); mapDriver.withOutput(new IntWritable(partitionId), new LongWritable(1)); mapDriver.runTest(); mapDriver.run(); Path mapOutputPath = new Path(recordPath + "/" + String.format("records%05d", partitionId)); SequenceFileIterator<Text, Text> iter = new SequenceFileIterator<Text, Text>(mapOutputPath, true, conf); Pair<Text, Text> pair = iter.next(); System.out.println(pair.toString()); assertTrue(pair.getFirst().toString().equals(partitionId + "," + 0)); assertTrue(pair.getSecond().toString().equals("line")); } private void runTestWithStartIndex(int startIndex) throws Exception { inputFile = getTestTempFile("sequential_id_input" + startIndex); outputDir = getTestTempDir("sequential_id_output" + startIndex); tempDir = getTestTempDir("sequential_id_tmp" + startIndex); inputFile.delete(); outputDir.delete(); tempDir.delete(); writeTestCase(inputFile, startIndex); SequentialIdGeneratorJob job = new SequentialIdGeneratorJob(); job.setConf(new Configuration()); job.run(new String[]{ "-i", inputFile.toString(), "-o", outputDir.toString(), "--cleanUp", "true", "--startIndex", String.valueOf(startIndex), "--tempDir", tempDir.toString() }); Map<String, String> lines = ALSMatrixUtil.fetchTextFiles(new Path(outputDir.toString()), ",", Arrays.asList(0), Arrays.asList(1)); for (Entry<String, String> line : lines.entrySet()) { assertTrue(expected.containsKey(line.getKey())); assertTrue(expected.get(line.getKey()).equals(line.getValue())); } } /** * test sequential id generate job with various startIndex */ @Test public void testJob() throws Exception { int[] startIndexs = new int[]{0, 1, 100, 923}; for (int i = 0; i < startIndexs.length; i++) { runTestWithStartIndex(startIndexs[i]); } } }