package hip.util; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; import java.io.IOException; /** * Assumes one line per log entry object */ public class CommonLogInputFormat extends FileInputFormat<LongWritable, CommonLogEntry> { @Override public RecordReader<LongWritable, CommonLogEntry> createRecordReader( InputSplit split, TaskAttemptContext context) { return new CommonLogRecordReader(); } @Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(HadoopCompat.getConfiguration(context)) .getCodec(file); return codec == null; } public static class CommonLogRecordReader extends RecordReader<LongWritable, CommonLogEntry> { private LineRecordReader reader = new LineRecordReader(); private ApacheCommonLogReader logReader = new ApacheCommonLogReader(); private CommonLogEntry value_ = new CommonLogEntry(); @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { reader.initialize(split, context); } @Override public synchronized void close() throws IOException { reader.close(); } @Override public LongWritable getCurrentKey() throws IOException, InterruptedException { return reader.getCurrentKey(); } @Override public CommonLogEntry getCurrentValue() throws IOException, InterruptedException { return value_; } @Override public float getProgress() throws IOException, InterruptedException { return reader.getProgress(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { while (reader.nextKeyValue()) { if ((value_ = logReader.decodeLine(reader.getCurrentValue())) != null) { return true; } } return false; } } }