/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.job;
import static org.apache.sqoop.connector.common.SqoopIDFUtils.BYTE_FIELD_CHARSET;
import static org.apache.sqoop.connector.common.SqoopIDFUtils.toText;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.sqoop.common.Direction;
import org.apache.sqoop.connector.common.EmptyConfiguration;
import org.apache.sqoop.connector.idf.CSVIntermediateDataFormat;
import org.apache.sqoop.connector.idf.IntermediateDataFormat;
import org.apache.sqoop.connector.matcher.Matcher;
import org.apache.sqoop.connector.matcher.MatcherFactory;
import org.apache.sqoop.job.etl.Extractor;
import org.apache.sqoop.job.etl.ExtractorContext;
import org.apache.sqoop.job.etl.Partition;
import org.apache.sqoop.job.etl.Partitioner;
import org.apache.sqoop.job.etl.PartitionerContext;
import org.apache.sqoop.job.mr.MRConfigurationUtils;
import org.apache.sqoop.job.mr.SqoopInputFormat;
import org.apache.sqoop.job.mr.SqoopMapper;
import org.apache.sqoop.job.util.MRJobTestUtil;
import org.apache.sqoop.schema.NullSchema;
import org.apache.sqoop.schema.Schema;
import org.apache.sqoop.schema.type.FixedPoint;
import org.apache.sqoop.schema.type.FloatingPoint;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Factory;
import org.testng.annotations.Test;
public class TestMatching {
private static final int START_PARTITION = 1;
private static final int NUMBER_OF_PARTITIONS = 1;
private static final int NUMBER_OF_ROWS_PER_PARTITION = 1;
private Schema from;
private Schema to;
@Factory(dataProvider="test-matching-data")
public TestMatching(Schema from, Schema to) {
this.from = from;
this.to = to;
}
@DataProvider(name="test-matching-data", parallel=true)
public static Object[][] data() {
List<Object[]> parameters = new ArrayList<Object[]>();
Schema emptyFrom = new Schema("FROM-EMPTY");
Schema emptyTo = new Schema("TO-EMPTY");
Schema from1 = new Schema("FROM-1");
Schema to1 = new Schema("TO-1");
Schema from2 = new Schema("FROM-2");
Schema to2 = new Schema("TO-2");
from1.addColumn(new FixedPoint("1", 4L, true)).addColumn(new FloatingPoint("2", 8L))
.addColumn(new org.apache.sqoop.schema.type.Text("3"));
to1.addColumn(new FixedPoint("1", 4L, true)).addColumn(new FloatingPoint("2", 8L))
.addColumn(new org.apache.sqoop.schema.type.Text("3"));
from2.addColumn(new FixedPoint("1", 4L, true)).addColumn(new FloatingPoint("2", 8L));
to2.addColumn(new FixedPoint("1", 4L, true)).addColumn(new FloatingPoint("2", 8L));
parameters.add(new Object[]{
emptyFrom,
emptyTo
});
parameters.add(new Object[]{
from1,
emptyTo
});
parameters.add(new Object[]{
emptyTo,
to1
});
parameters.add(new Object[]{
from1,
to1
});
parameters.add(new Object[]{
from2,
to1
});
parameters.add(new Object[]{
from1,
to2
});
return parameters.toArray(new Object[0][]);
}
@SuppressWarnings("deprecation")
@Test
public void testSchemaMatching() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConstants.JOB_ETL_PARTITIONER, DummyPartitioner.class.getName());
conf.set(MRJobConstants.JOB_ETL_EXTRACTOR, DummyExtractor.class.getName());
conf.set(MRJobConstants.FROM_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName());
conf.set(MRJobConstants.TO_INTERMEDIATE_DATA_FORMAT, CSVIntermediateDataFormat.class.getName());
Job job = new Job(conf);
MRConfigurationUtils.setConnectorSchema(Direction.FROM, job, from);
MRConfigurationUtils.setConnectorSchema(Direction.TO, job, to);
MRJobTestUtil.runJob(job.getConfiguration(), SqoopInputFormat.class, SqoopMapper.class,
DummyOutputFormat.class);
boolean success = MRJobTestUtil.runJob(job.getConfiguration(),
SqoopInputFormat.class, SqoopMapper.class,
DummyOutputFormat.class);
if (from.getName().split("-")[1].equals("EMPTY")) {
if (to.getName().split("-")[1].equals("EMPTY")) {
assertEquals("Job succeeded!", false, success);
} else {
assertEquals("Job failed!", true, success);
}
} else {
if (to.getName().split("-")[1].equals("EMPTY")) {
assertEquals("Job failed!", true, success);
} else if (from.getName().split("-")[1].equals(to.getName().split("-")[1])) {
assertEquals("Job failed!", true, success);
} else {
assertEquals("Job succeeded!", false, success);
}
}
}
@Test
public void testSchemalessFromAndTo() throws UnsupportedEncodingException {
String testData = "\"This is the data you are looking for. It has no structure.\"";
Object[] testObject = new Object[] {testData.getBytes(BYTE_FIELD_CHARSET)};
Object[] testObjectCopy = new Object[1];
System.arraycopy(testObject,0,testObjectCopy,0,testObject.length);
Matcher matcher = MatcherFactory.getMatcher(NullSchema.getInstance(),
NullSchema.getInstance());
// Checking FROM side only because currently that is the only IDF that is used
CSVIntermediateDataFormat dataFormat = new CSVIntermediateDataFormat(matcher.getFromSchema());
// Setting data as CSV and validating getting CSV and object
dataFormat.setCSVTextData(testData);
String validateCSV = dataFormat.getCSVTextData();
Object[] validateObj = dataFormat.getObjectData();
assertEquals(testData, validateCSV);
assertArrayEquals(testObject, validateObj);
// Setting data as Object
dataFormat.setObjectData(testObject);
validateCSV = toText(dataFormat.getCSVTextData());
validateObj = dataFormat.getObjectData();
assertEquals(testData, validateCSV);
assertArrayEquals(testObjectCopy, validateObj);
}
public static class DummyPartition extends Partition {
private int id;
public void setId(int id) {
this.id = id;
}
public int getId() {
return id;
}
@Override
public void readFields(DataInput in) throws IOException {
id = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(id);
}
@Override
public String toString() {
return Integer.toString(id);
}
}
public static class DummyPartitioner extends Partitioner {
@Override
public List<Partition> getPartitions(PartitionerContext context, Object oc, Object oj) {
List<Partition> partitions = new LinkedList<Partition>();
for (int id = START_PARTITION; id <= NUMBER_OF_PARTITIONS; id++) {
DummyPartition partition = new DummyPartition();
partition.setId(id);
partitions.add(partition);
}
return partitions;
}
}
public static class DummyExtractor extends Extractor<EmptyConfiguration, EmptyConfiguration, Partition> {
@Override
public void extract(ExtractorContext context, EmptyConfiguration oc, EmptyConfiguration oj, Partition partition) {
int id = ((DummyPartition)partition).getId();
for (int row = 0; row < NUMBER_OF_ROWS_PER_PARTITION; row++) {
context.getDataWriter().writeArrayRecord(new Object[] {
id * NUMBER_OF_ROWS_PER_PARTITION + row,
(double) (id * NUMBER_OF_ROWS_PER_PARTITION + row),
String.valueOf(id*NUMBER_OF_ROWS_PER_PARTITION+row)});
}
}
@Override
public long getRowsRead() {
return NUMBER_OF_ROWS_PER_PARTITION;
}
}
public static class DummyOutputFormat
extends OutputFormat<Text, NullWritable> {
@Override
public void checkOutputSpecs(JobContext context) {
// do nothing
}
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(
TaskAttemptContext context) {
return new DummyRecordWriter();
}
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) {
return new DummyOutputCommitter();
}
public static class DummyRecordWriter
extends RecordWriter<Text, NullWritable> {
private int index = START_PARTITION*NUMBER_OF_ROWS_PER_PARTITION;
private IntermediateDataFormat<?> dataFormat = MRJobTestUtil.getTestIDF();
@Override
public void write(Text key, NullWritable value) {
String testData = "" + index + "," + (double) index + ",'" + String.valueOf(index) + "'";
dataFormat.setCSVTextData(testData);
index++;
assertEquals(dataFormat.getCSVTextData().toString(), key.toString());
}
@Override
public void close(TaskAttemptContext context) {
// do nothing
}
}
public static class DummyOutputCommitter extends OutputCommitter {
@Override
public void setupJob(JobContext jobContext) { }
@Override
public void setupTask(TaskAttemptContext taskContext) { }
@Override
public void commitTask(TaskAttemptContext taskContext) { }
@Override
public void abortTask(TaskAttemptContext taskContext) { }
@Override
public boolean needsTaskCommit(TaskAttemptContext taskContext) {
return false;
}
}
}
}