package org.lab41.sample.etl.mapreduce; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.lab41.sample.etl.domain.SampleRecord; /** * MapperRawToAvro * * @author lab41.org * * A sample mapper class that reads a line from a log file and converts * it to an Avro record, pivotting on the hour since the epoch */ public class MapperRawToAvro extends Mapper<LongWritable, Text, AvroKey<Long>, AvroValue<SampleRecord>> { private static final SimpleDateFormat dateFormat = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); private static final String splitToken = ","; private static Log log = LogFactory.getLog(MapperRawToAvro.class); private SampleRecord sampleRecord; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); sampleRecord = new SampleRecord(); } @Override public void run(Context context) throws IOException, InterruptedException { setup(context); try { while (context.nextKeyValue()) { map(context.getCurrentKey(), context.getCurrentValue(), context); } } catch (IOException ex) { log.error("Exception caught in the mapper.", ex); } } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Date startTime = null; String[] record = value.toString().split(splitToken, -1); try { startTime = dateFormat.parse(record[4]); sampleRecord.setRequiredName(record[0]); sampleRecord.setOptionalName(record[1]); sampleRecord.setDataItemLong(Long.parseLong(record[2])); sampleRecord.setDataItemInt(Integer.parseInt(record[3])); sampleRecord.setStartTime(startTime.getTime()); sampleRecord.setEndTime(dateFormat.parse(record[5]).getTime()); } catch (ParseException e) { log.error("Parse Error: " + value.toString()); e.printStackTrace(); } /** * Pivot on the milliseconds since the the epoch rounded down to nearest hour */ AvroKey<Long> avroKey = new AvroKey<Long>(DateUtils.truncate(startTime, Calendar.YEAR).getTime()); AvroValue<SampleRecord> avroValue = new AvroValue<SampleRecord>( sampleRecord); context.write(avroKey, avroValue); } }