package com.manning.hip.ch3.binary; import com.manning.hip.common.HadoopCompat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import java.io.DataInputStream; import java.io.IOException; public class CustomBinaryRecordReader extends RecordReader<LongWritable, BytesWritable> { private DataInputStream in; private LongWritable key; private BytesWritable value; private long start; private long end; private long pos; @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = HadoopCompat.getConfiguration(context); System.out.println("Start = " + split.getStart()); System.out.println("Length = " + split.getLength()); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); fileIn.seek(start); in = new DataInputStream(fileIn); this.pos = start; } @Override public boolean nextKeyValue() throws IOException, InterruptedException { System.out.println("nextKeyValue with pos " + pos); if(pos >= end) { key = null; value = null; return false; } if (key == null) { key = new LongWritable(); } key.set(pos); if (value == null) { value = new BytesWritable(); } int len = in.readInt(); System.out.println("len = " + len); byte[] data = new byte[len]; int read = in.read(data); System.out.println("read = " + read); value.set(data, 0, data.length); pos += 4 + len; return true; } @Override public LongWritable getCurrentKey() throws IOException, InterruptedException { return key; } @Override public BytesWritable getCurrentValue() throws IOException, InterruptedException { return value; } @Override public float getProgress() throws IOException, InterruptedException { if (start == end) { return 0.0f; } else { return Math.min(1.0f, (pos - start) / (float)(end - start)); } } @Override public void close() throws IOException { if (in != null) { in.close(); } } }