package ch.unibe.scg.cells.benchmarks;
import java.io.IOException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import com.google.common.base.Charsets;
import com.google.common.io.ByteStreams;
class RawFileFormat extends FileInputFormat<ImmutableBytesWritable, ImmutableBytesWritable> {
@Override
public RecordReader<ImmutableBytesWritable, ImmutableBytesWritable> createRecordReader(
InputSplit is, TaskAttemptContext c) throws IOException, InterruptedException {
return new RawFileRecordReader();
}
/** Input format for a directory of files. No recursion. */
static class RawFileRecordReader extends
RecordReader<ImmutableBytesWritable, ImmutableBytesWritable> {
private ImmutableBytesWritable currentKey;
private ImmutableBytesWritable currentValue;
private boolean isFinished;
private FileSplit split;
private FileSystem fs;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
throws IOException, InterruptedException {
split = (FileSplit) inputSplit; // Cast will always hold for FileInputFormats.
// Taken from SequenceFileRecordReader#initialize.
fs = split.getPath().getFileSystem(taskAttemptContext.getConfiguration());
// Again, this line is stolen from SequenceFileRecordReader.
}
@Override
public void close() throws IOException {
if (fs != null) {
fs.close();
fs = null;
}
}
@Override
public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
return currentKey;
}
@Override
public ImmutableBytesWritable getCurrentValue() throws IOException, InterruptedException {
return currentValue;
}
@Override
public float getProgress() throws IOException, InterruptedException {
if (isFinished) {
return 1.0f;
}
return 0.0f;
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (isFinished) {
return false;
}
isFinished = true;
FileStatus stat = fs.getFileStatus(split.getPath());
if (stat.isDirectory()) {
throw new IOException("This input format is for a directory of files. No recursion.");
}
currentKey = new ImmutableBytesWritable(split.getPath().toString().getBytes(Charsets.UTF_8));
try (FSDataInputStream fsin = fs.open(split.getPath())) {
currentValue = new ImmutableBytesWritable(ByteStreams.toByteArray(fsin));
}
return true;
}
}
}