package org.lab41.sample.etl.mapreduce;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.avro.Schema;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyOutputFormat;
import org.apache.avro.mapreduce.AvroMultipleOutputs;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.DeflateCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.lab41.sample.etl.domain.SampleRecord;
/**
* Driver
*
* @author lab41
*
* The driver that runs and configures the job
*/
public class Driver extends Configured implements Tool {
private static final SimpleDateFormat directoryFormat = new SimpleDateFormat(
"_yyyy-MM-dd_HH-mm-ss.SSSS");
@Override
public int run(String[] args) throws Exception {
Path mrInput, mrOutput;
if (args.length == 2) {
mrInput = new Path(args[0]);
mrOutput = new Path(args[1] + directoryFormat.format(new Date()));
} else {
System.err.println("Parameter missing!");
return 1;
}
/** configure Job **/
Job job = new Job(getConf(), "DataIngest Example");
job.setJarByClass(Driver.class);
job.setUserClassesTakesPrecedence(true);
FileInputFormat.setInputPaths(job, mrInput);
FileOutputFormat.setOutputPath(job, mrOutput);
job.setMapperClass(MapperRawToAvro.class);
job.setReducerClass(ReducerByDateTime.class);
AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.LONG));
AvroJob.setMapOutputValueSchema(job, SampleRecord.SCHEMA$);
AvroKeyOutputFormat.setCompressOutput(job, true);
AvroKeyOutputFormat.setOutputCompressorClass(job, DeflateCodec.class);
AvroMultipleOutputs.addNamedOutput(job, "sampleRecord",
AvroKeyOutputFormat.class, SampleRecord.SCHEMA$);
MultipleOutputs.setCountersEnabled(job, true);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Driver(), args);
System.exit(exitCode);
}
}