package org.apache.sqoop.job.mr;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.log4j.Logger;
import org.apache.sqoop.common.SqoopException;
import org.apache.sqoop.connector.idf.IntermediateDataFormat;
import org.apache.sqoop.connector.matcher.Matcher;
import org.apache.sqoop.error.code.MRExecutionError;
import org.apache.sqoop.etl.io.DataWriter;
//There are two IDF objects we carry around in memory during the sqoop job execution.
// The fromIDF has the fromSchema in it, the toIDF has the toSchema in it.
// Before we do the writing to the toIDF object we do the matching process to negotiate between
// the two schemas and their corresponding column types before we write the data to the toIDF object
public class SqoopDataWriter extends DataWriter {
public static final Logger LOG = Logger.getLogger(SqoopDataWriter.class);
private Context context;
private IntermediateDataFormat<Object> fromIDF;
private IntermediateDataFormat<Object> toIDF;
private Matcher matcher;
public SqoopDataWriter(Context context, IntermediateDataFormat<Object> f,
IntermediateDataFormat<Object> t, Matcher m) {
this.context = context;
fromIDF = f;
toIDF = t;
matcher = m;
}
@Override
public void writeArrayRecord(Object[] array) {
fromIDF.setObjectData(array);
writeContent();
}
@Override
public void writeStringRecord(String text) {
fromIDF.setCSVTextData(text);
writeContent();
}
@Override
public void writeRecord(Object obj) {
fromIDF.setData(obj);
writeContent();
}
private void writeContent() {
try {
if (LOG.isDebugEnabled()) {
LOG.debug("Extracted data: " + fromIDF.getCSVTextData());
}
// NOTE: The fromIDF and the corresponding fromSchema is used only for the
// matching process
// The output of the mappers is finally written to the toIDF object after
// the matching process
// since the writable encapsulates the toIDF ==> new SqoopWritable(toIDF)
toIDF.setObjectData(matcher.getMatchingData(fromIDF.getObjectData()));
// NOTE: We do not use the reducer to do the writing (a.k.a LOAD in ETL).
// Hence the mapper sets up the writable
context.write(new Text(toIDF.getCSVTextData()), NullWritable.get());
} catch (Exception e) {
throw new SqoopException(MRExecutionError.MAPRED_EXEC_0013, e);
}
}
}