/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sqoop.job; import static org.testng.AssertJUnit.assertEquals; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.LinkedList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.sqoop.common.Direction; import org.apache.sqoop.connector.common.EmptyConfiguration; import org.apache.sqoop.connector.idf.CSVIntermediateDataFormat; import org.apache.sqoop.connector.idf.IntermediateDataFormat; import org.apache.sqoop.job.etl.Destroyer; import org.apache.sqoop.job.etl.DestroyerContext; import org.apache.sqoop.job.etl.Extractor; import org.apache.sqoop.job.etl.ExtractorContext; import org.apache.sqoop.job.etl.Loader; import org.apache.sqoop.job.etl.LoaderContext; import org.apache.sqoop.job.etl.Partition; import org.apache.sqoop.job.etl.Partitioner; import org.apache.sqoop.job.etl.PartitionerContext; import org.apache.sqoop.job.mr.MRConfigurationUtils; import org.apache.sqoop.job.mr.SqoopInputFormat; import org.apache.sqoop.job.mr.SqoopMapper; import org.apache.sqoop.job.mr.SqoopOutputFormat; import org.apache.sqoop.job.mr.SqoopSplit; import org.apache.sqoop.job.util.MRJobTestUtil; import org.testng.annotations.Test; public class TestMapReduce { private static final int START_PARTITION = 1; private static final int NUMBER_OF_PARTITIONS = 9; private static final int NUMBER_OF_ROWS_PER_PARTITION = 10; @Test public void testSqoopInputFormat() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConstants.JOB_ETL_PARTITIONER, DummyPartitioner.class.getName()); conf.set(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName()); conf.set(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName()); Job job = new Job(conf); SqoopInputFormat inputformat = new SqoopInputFormat(); List<InputSplit> splits = inputformat.getSplits(job); assertEquals(9, splits.size()); for (int id = START_PARTITION; id <= NUMBER_OF_PARTITIONS; id++) { SqoopSplit split = (SqoopSplit) splits.get(id - 1); DummyPartition partition = (DummyPartition) split.getPartition(); assertEquals(id, partition.getId()); } } @Test public void testSqoopMapper() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConstants.JOB_ETL_PARTITIONER, DummyPartitioner.class.getName()); conf.set(MRJobConstants.JOB_ETL_EXTRACTOR, DummyExtractor.class.getName()); conf.set(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName()); conf.set(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName()); Job job = new Job(conf); // from and to have the same schema in this test case MRConfigurationUtils.setConnectorSchema(Direction.FROM, job, org.apache.sqoop.job.util.MRJobTestUtil.getTestSchema()); MRConfigurationUtils.setConnectorSchema(Direction.TO, job, org.apache.sqoop.job.util.MRJobTestUtil.getTestSchema()); boolean success = MRJobTestUtil.runJob(job.getConfiguration(), SqoopInputFormat.class, SqoopMapper.class, DummyOutputFormat.class); assertEquals("Job failed!", true, success); } @Test public void testNullOutputFormat() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConstants.JOB_ETL_PARTITIONER, DummyPartitioner.class.getName()); conf.set(MRJobConstants.JOB_ETL_EXTRACTOR, DummyExtractor.class.getName()); conf.set(MRJobConstants.JOB_ETL_LOADER, DummyLoader.class.getName()); conf.set(MRJobConstants.JOB_ETL_FROM_DESTROYER, DummyFromDestroyer.class.getName()); conf.set(MRJobConstants.JOB_ETL_TO_DESTROYER, DummyToDestroyer.class.getName()); conf.set(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName()); conf.set(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName()); Job job = new Job(conf); // from and to have the same schema in this test case MRConfigurationUtils.setConnectorSchema(Direction.FROM, job, MRJobTestUtil.getTestSchema()); MRConfigurationUtils.setConnectorSchema(Direction.TO, job, MRJobTestUtil.getTestSchema()); boolean success = MRJobTestUtil.runJob(job.getConfiguration(), SqoopInputFormat.class, SqoopMapper.class, SqoopOutputFormat.class); assertEquals("Job failed!", true, success); // Make sure both destroyers get called. assertEquals(1, DummyFromDestroyer.count); assertEquals(1, DummyToDestroyer.count); } public static class DummyPartition extends Partition { private int id; public void setId(int id) { this.id = id; } public int getId() { return id; } @Override public void readFields(DataInput in) throws IOException { id = in.readInt(); } @Override public void write(DataOutput out) throws IOException { out.writeInt(id); } @Override public String toString() { return Integer.toString(id); } } public static class DummyPartitioner extends Partitioner { @Override public List<Partition> getPartitions(PartitionerContext context, Object oc, Object oj) { List<Partition> partitions = new LinkedList<Partition>(); for (int id = START_PARTITION; id <= NUMBER_OF_PARTITIONS; id++) { DummyPartition partition = new DummyPartition(); partition.setId(id); partitions.add(partition); } return partitions; } } public static class DummyExtractor extends Extractor<EmptyConfiguration, EmptyConfiguration, DummyPartition> { @Override public void extract(ExtractorContext context, EmptyConfiguration oc, EmptyConfiguration oj, DummyPartition partition) { int id = ((DummyPartition) partition).getId(); for (int row = 0; row < NUMBER_OF_ROWS_PER_PARTITION; row++) { context.getDataWriter().writeArrayRecord( new Object[] { id * NUMBER_OF_ROWS_PER_PARTITION + row, (double) (id * NUMBER_OF_ROWS_PER_PARTITION + row), String.valueOf(id * NUMBER_OF_ROWS_PER_PARTITION + row) }); } } @Override public long getRowsRead() { return NUMBER_OF_ROWS_PER_PARTITION; } } public static class DummyOutputFormat extends OutputFormat<Text, NullWritable> { @Override public void checkOutputSpecs(JobContext context) { // do nothing } @Override public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context) { return new DummyRecordWriter(); } @Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) { return new DummyOutputCommitter(); } public static class DummyRecordWriter extends RecordWriter<Text, NullWritable> { private int index = START_PARTITION * NUMBER_OF_ROWS_PER_PARTITION; // should I use a dummy IDF for testing? private IntermediateDataFormat<?> dataFormat = MRJobTestUtil.getTestIDF(); @Override public void write(Text key, NullWritable value) { String testData = "" + index + "," + (double) index + ",'" + String.valueOf(index) + "'"; dataFormat.setCSVTextData(testData); index++; assertEquals(dataFormat.getCSVTextData().toString(), key.toString()); } @Override public void close(TaskAttemptContext context) { // do nothing } } public static class DummyOutputCommitter extends OutputCommitter { @Override public void setupJob(JobContext jobContext) { } @Override public void setupTask(TaskAttemptContext taskContext) { } @Override public void commitTask(TaskAttemptContext taskContext) { } @Override public void abortTask(TaskAttemptContext taskContext) { } @Override public boolean needsTaskCommit(TaskAttemptContext taskContext) { return false; } } } // it is writing to the target. public static class DummyLoader extends Loader<EmptyConfiguration, EmptyConfiguration> { private int index = START_PARTITION * NUMBER_OF_ROWS_PER_PARTITION; private IntermediateDataFormat<?> dataFormat = MRJobTestUtil.getTestIDF(); private long rowsWritten = 0; @Override public void load(LoaderContext context, EmptyConfiguration oc, EmptyConfiguration oj) throws Exception { String data; while ((data = context.getDataReader().readTextRecord()) != null) { String testData = "" + index + "," + (double) index + ",'" + String.valueOf(index) + "'"; dataFormat.setCSVTextData(testData); index++; rowsWritten ++; assertEquals(dataFormat.getCSVTextData().toString(), data); } } /* (non-Javadoc) * @see org.apache.sqoop.job.etl.Loader#getRowsWritten() */ @Override public long getRowsWritten() { return rowsWritten; } } public static class DummyFromDestroyer extends Destroyer<EmptyConfiguration, EmptyConfiguration> { public static int count = 0; @Override public void destroy(DestroyerContext context, EmptyConfiguration o, EmptyConfiguration o2) { count++; } } public static class DummyToDestroyer extends Destroyer<EmptyConfiguration, EmptyConfiguration> { public static int count = 0; @Override public void destroy(DestroyerContext context, EmptyConfiguration o, EmptyConfiguration o2) { count++; } } }