// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.common;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class SamInputFormat extends FileInputFormat<LongWritable, ReadPair>
{
public static class SamRecordReader extends RecordReader<LongWritable, ReadPair>
{
private LineRecordReader lineReader;
private ReadPair value;
private FileSplit split; // memorize it for error messages
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException
{
lineReader = new LineRecordReader();
lineReader.initialize(genericSplit, context);
split = (FileSplit)genericSplit;
value = null;
}
@Override
public void close() throws IOException
{
lineReader.close();
}
@Override
public LongWritable getCurrentKey()
{
return lineReader.getCurrentKey();
}
@Override
public ReadPair getCurrentValue()
{
return value;
}
@Override
public float getProgress() throws IOException
{
return lineReader.getProgress();
}
@Override
public boolean nextKeyValue() throws IOException
{
if (lineReader.nextKeyValue())
{
Text line = lineReader.getCurrentValue();
value = new ReadPair();
try
{
TextSamMapping mapping = new TextSamMapping(line);
if (mapping.isRead2())
value.setRead2(mapping);
else // anything that's not explicitly labelled as "read 2" goes in as read 1.
value.setRead1(mapping);
return true;
}
catch (FormatException e) {
throw new FormatException(e.getMessage() + ". File: " + split.getPath() + "; Position: " + lineReader.getCurrentKey().get());
}
}
else
return false;
}
}
@Override
public RecordReader<LongWritable,ReadPair> createRecordReader(InputSplit split, TaskAttemptContext context)
{
FileSplit fsplit = (FileSplit)split;
if (fsplit.getStart() > 0 && !isSplitable(context, fsplit.getPath()))
throw new RuntimeException("Trying to split non-splittable file " + fsplit.getPath());
return new SamRecordReader();
}
@Override
protected boolean isSplitable(JobContext context, Path file)
{
CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
return codec == null;
}
}