package org.lab41.sample.etl.mapreduce;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapreduce.AvroMultipleOutputs;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.lab41.sample.etl.domain.SampleRecord;
/**
* ReducerByDateTime
*
* @author lab41.org
*
* This reducer creates and writes an Avro file containing all entries
* for a given year
*/
public class ReducerByDateTime extends
Reducer<AvroKey<Long>, AvroValue<SampleRecord>, Text, NullWritable> {
private static Log log = LogFactory.getLog(ReducerByDateTime.class);
private static final SimpleDateFormat directoryFormat = new SimpleDateFormat(
"'sample-record'/yyyy/");
private static final SimpleDateFormat fileNameFormat = new SimpleDateFormat(
"yyyy_'processtime'_'"
+ Long.toString(System.currentTimeMillis(),
Character.MAX_RADIX) + "'");
private AvroMultipleOutputs multipleOutputs;
private Date outputKeyDate;
protected String outputDirectory;
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
outputKeyDate = new Date();
multipleOutputs = new AvroMultipleOutputs(context);
}
@Override
protected void reduce(AvroKey<Long> key,
Iterable<AvroValue<SampleRecord>> values, Context context)
throws IOException, InterruptedException {
outputKeyDate.setTime(key.datum());
outputDirectory = directoryFormat.format(outputKeyDate);
// #sample-record/yyyy/yyyy_processtime_IEOXKQ
String outputFileName = outputDirectory + Path.SEPARATOR
+ fileNameFormat.format(outputKeyDate);
log.info("Writing avro file " + outputFileName);
SampleRecord sampleRecord = null;
for (AvroValue<SampleRecord> value : values) {
sampleRecord = value.datum();
multipleOutputs.write("sampleRecord", new AvroKey<SampleRecord>(
sampleRecord), NullWritable.get(), outputFileName);
}
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
multipleOutputs.close();
super.cleanup(context);
}
}