package com.manning.hip.ch8; import com.manning.hip.ch3.passwd.Passwd; import com.manning.hip.common.HadoopCompat; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; import java.io.IOException; /** * An {@link org.apache.hadoop.mapreduce.InputFormat} for * working with binary files in Streaming. * <p/> * Keys are the filename ostensibly byte offsets in the file, and values * are {@link com.manning.hip.ch3.passwd.Passwd} objects. */ public class BinaryFilenameInputFormat extends FileInputFormat<LongWritable, Passwd> { @Override public RecordReader<LongWritable, Passwd> createRecordReader(InputSplit split, TaskAttemptContext context) { return new PasswdRecordReader(); } @Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(HadoopCompat.getConfiguration(context)) .getCodec(file); return codec == null; } public static class PasswdRecordReader extends RecordReader<LongWritable, Passwd> { public final static String PASSWD_LINE_SEPARATOR = ":"; private LineRecordReader reader = new LineRecordReader(); private Passwd value; @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { reader.initialize(split, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (reader.nextKeyValue()) { parseLine(); return true; } else { value = null; return false; } } private void parseLine() { String line = reader.getCurrentValue().toString(); String[] tokens = StringUtils.splitPreserveAllTokens(line, PASSWD_LINE_SEPARATOR); value = new Passwd( StringUtils.trimToNull(tokens[0]), StringUtils.trimToNull(tokens[1]), StringUtils.trimToNull(tokens[2]) == null ? null : Long.valueOf(tokens[2]), StringUtils.trimToNull(tokens[3]) == null ? null : Long.valueOf(tokens[3]), StringUtils.trimToNull(tokens[4]), StringUtils.trimToNull(tokens[5]), StringUtils.trimToNull(tokens[6]) ); } @Override public LongWritable getCurrentKey() throws IOException, InterruptedException { return reader.getCurrentKey(); } @Override public Passwd getCurrentValue() throws IOException, InterruptedException { return value; } @Override public float getProgress() throws IOException, InterruptedException { return reader.getProgress(); } @Override public void close() throws IOException { reader.close(); } } }