// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.demux;
import it.crs4.seal.common.IMRContext;
import it.crs4.seal.common.SequenceId;
import org.seqdoop.hadoop_bam.SequencedFragment;
import org.apache.hadoop.io.Text;
import java.io.IOException;
/**
* Mapper for demultiplexing reads.
*
* Maps each sequence record into a SequenceId (key) and the same sequence record (value).
*
* The key is generated with (Instrument, run number, lane, tile, xpos ypos).
* Thus, the reducer gets all the reads for the specific position on the
* flowcell: read 1, read 2 (barcode), and read 3.
*/
public class DemuxMapper
{
private SequenceId key = new SequenceId();
private StringBuilder sBuilder = new StringBuilder(500);
/**
* Forms a key and ensures the sequence defines its lane and read number.
* The key's location is defined with a colon-delimited string containing
* (instrument, run number, lane, tile, xpos, ypos).
*
* @throws RuntimeException If the sequence doesn't define its necessary location fields.
*/
public void map(Text inputKey, SequencedFragment seq, IMRContext<SequenceId, SequencedFragment> context) throws IOException, InterruptedException
{
checkFields(seq);
sBuilder.setLength(0);
if (seq.getRead() <= 0)
throw new RuntimeException("Invalid read number " + seq.getRead() + " in sequence . Record: " + seq.toString());
// The key is: instrument, run number, lane, tile, xpos, ypos, read number, delimited by ':' characters.
sBuilder.append(seq.getInstrument()).append(':');
sBuilder.append(seq.getRunNumber()) .append(':');
sBuilder.append(seq.getLane()) .append(':');
sBuilder.append(seq.getTile()) .append(':');
sBuilder.append(seq.getXpos()) .append(':');
sBuilder.append(seq.getYpos());
key.set(sBuilder.toString(), seq.getRead());
context.write(key, seq);
}
private void checkFields(SequencedFragment seq)
{
try
{
if (seq.getInstrument() == null)
throw new RuntimeException("missing instrument name");
if (seq.getRunNumber() == null)
throw new RuntimeException("missing run number");
if (seq.getLane() == null)
throw new RuntimeException("missing lane");
if (seq.getRead() == null)
throw new RuntimeException("missing read");
if (seq.getTile() == null)
throw new RuntimeException("missing tile");
if (seq.getXpos() == null)
throw new RuntimeException("missing xpos");
if (seq.getYpos() == null)
throw new RuntimeException("missing ypos");
}
catch (RuntimeException e) {
throw new RuntimeException(e.toString() + " in sequence record: " + seq);
}
}
}