package org.archive.hadoop.streaming;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.LineRecordReader;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.archive.util.zip.OpenJDK7GZIPInputStream;
public class GzipSingleFileRecordReader implements RecordReader<Text, Text>
{
final static int MAX_ALLOW_FAILURES = 2;
LineRecordReader reader;
LongWritable longKey;
String currPath;
int currAttempt;
public GzipSingleFileRecordReader(CombineFileSplit split, Configuration conf, Reporter reporter, Integer index) throws IOException
{
this(split.getPath(index), split.getOffset(index), conf);
}
public GzipSingleFileRecordReader(Path file, long startOffset, Configuration conf) throws IOException
{
currPath = file.toString();
try {
currAttempt = TaskAttemptID.forName(conf.get("mapred.task.id")).getId();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream fileIn = fs.open(file);
String delimiter = conf.get("textinputformat.record.delimiter");
byte[] recordDelimiter = null;
if (null != delimiter) {
recordDelimiter = delimiter.getBytes();
}
InputStream in = new OpenJDK7GZIPInputStream(fileIn);
long endOffset = Long.MAX_VALUE;
reader = new LineRecordReader(in, startOffset, endOffset, conf, recordDelimiter);
longKey = reader.createKey();
} catch (Exception e) {
if (currAttempt < MAX_ALLOW_FAILURES) {
throw new IOException("Failed init for: " + currPath, e);
}
}
}
@Override
public boolean next(Text key, Text value) throws IOException {
if (reader == null) {
return false;
}
try {
value.clear();
return reader.next(longKey, key);
} catch (Exception e) {
if (currAttempt < MAX_ALLOW_FAILURES) {
throw new IOException("Failed reading from " + currPath, e);
} else {
key.clear();
return false;
}
}
}
@Override
public Text createKey() {
return new Text();
}
@Override
public Text createValue() {
if (reader == null) {
return new Text();
}
return reader.createValue();
}
@Override
public long getPos() throws IOException {
if (reader == null) {
return 0;
}
return reader.getPos();
}
@Override
public void close() throws IOException {
if (reader != null) {
reader.close();
}
}
@Override
public float getProgress() throws IOException {
if (reader == null) {
return 0;
}
return reader.getProgress();
}
}