package edu.isi.karma.mapreduce.inputformat;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.charset.Charset;
import org.apache.avro.Schema;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AvroKeyBatchInputFormat<T> extends FileInputFormat<Text, Text> {
private static Logger LOG = LoggerFactory.getLogger(AvroKeyBatchInputFormat.class);
@Override
public RecordReader<Text, Text> createRecordReader(
InputSplit split, TaskAttemptContext context) throws IOException,
InterruptedException {
Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
return new AvroBatchRecordReader<T>(readerSchema);
}
static {
try {
Field defaultCharsetField = Charset.class.getDeclaredField("defaultCharset");
defaultCharsetField.setAccessible(true);
defaultCharsetField.set(null, Charset.forName("UTF-8"));
} catch (Exception e) {
LOG.error("something wrong", e);
}
}
}