package com.caseystella.util.common.hadoop.input.fixed;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
/**
* Created by cstella on 9/3/14.
*/
public class FixedWidthInputFormat extends FileInputFormat<LongWritable, BytesWritable> {
public static final String WIDTH_KEY = "fwif.record.width";
Integer width = null;
public FixedWidthInputFormat(int width)
{
this();
this.width = width;
}
public FixedWidthInputFormat()
{
super();
}
@Override
public RecordReader<LongWritable, BytesWritable>
createRecordReader(InputSplit split,
TaskAttemptContext context) {
if(width == null)
{
String widthStr= context.getConfiguration().get(WIDTH_KEY);
width = Integer.parseInt(widthStr);
}
return new FixedWidthRecordReader(width);
}
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
}