/** * */ package org.archive.hadoop.mapreduce; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; /** * InputFormat for reading CDXes. * It is a {@link TextInputFormat} with just one modification: uses LFOnlyLineRecordReader * instead of standard LineRecordReader. * * We may see more modifications that facilitates processing CDXes with Hadoop. * * @author kenji * */ public class CDXInputFormat extends TextInputFormat { @Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { return new LFOnlyLineRecordReader(); } }