// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.common;
import it.crs4.seal.common.CutText;
import it.crs4.seal.common.AbstractSamMapping;
import org.apache.hadoop.io.Text;
import java.nio.ByteBuffer;
/**
* Implements a SAM mapping read from a Text object.
*/
public class TextSamMapping extends AbstractSamMapping
{
protected static final String Delim = "\t";
protected CutText cutter;
protected Text unparsedData; // part of the record we don't read unless we need to
protected int flag;
protected int pos5 = 0;
protected byte mapQ;
protected int matePos5 = 0;
protected int insertSize = 0;
protected int seqLen;
protected int qualityStart;
protected int tagsStart;
// samples SAM record
// 0 1 2 3 4 5 6 7 8 9 10
// DCW97JN1_252:1:1105:15329:186955#GGCTAC 89 chr1 12134 30 51M = 12134 0 TTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAG IJJJJIJJJJJJJIIGJIHFEIHFJJJJJJJJJJJJJJHHHHHFFFFFCCC XT:A:R NM:i:0 SM:i:0 AM:i:0 X0:i:6 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:51
public TextSamMapping(Text sam) throws FormatException
{
unparsedData = new Text();
cutter = new CutText(Delim, 0, 1, 2, 3, 4, 5, 6, 7, 8); // all fields up to and including insert size
try
{
cutter.loadRecord(sam);
flag = Integer.parseInt(cutter.getField(1)); // set flag first so we can use the flag methods
mapQ = Byte.parseByte(cutter.getField(4));
if (isMapped())
pos5 = Integer.parseInt(cutter.getField(3));
if (isMateMapped())
matePos5 = Integer.parseInt(cutter.getField(7));
if (isMapped() && isMateMapped())
insertSize = Integer.parseInt(cutter.getField(8));
}
catch (CutText.FormatException e) {
throw new FormatException("sam formatting problem: " + e + ". Record: " + sam);
}
catch (NumberFormatException e) {
throw new FormatException("sam formatting problem. Found text in place of a number. Record: " + sam);
}
int seqStart = cutter.getFieldPos(8) + cutter.getField(8).length() + 1;
if (seqStart > sam.getLength())
throw new FormatException("Incomplete SAM record -- missing fields. Record: " + sam);
// copy the sequence and tag data to our internal buffer
unparsedData.set(sam.getBytes(), seqStart, sam.getLength() - seqStart);
// Find the end of the sequence field. Search for a Delim after the insert size field.
int end = unparsedData.find(Delim);
if (end < 0)
throw new FormatException("Bad SAM format. Missing terminator for sequence field. SAM: " + sam);
seqLen = end;
// now repeat for the quality field
qualityStart = end + 1;
if (qualityStart > unparsedData.getLength())
throw new FormatException("Incomplete SAM record -- missing quality field. Record: " + sam);
end = unparsedData.find(Delim, qualityStart);
if (end < 0)
end = unparsedData.getLength();
if (seqLen != end - qualityStart)
{
throw new FormatException("Length of sequence (" + seqLen + ") is different from length of quality string ("
+ (end - qualityStart) + "). Record: " + sam);
}
tagsStart = end + 1;
}
public String getName() { return cutter.getField(0); }
public int getFlag() { return flag; }
public String getContig()
{
if (isUnmapped())
throw new IllegalStateException();
return cutter.getField(2);
}
public int get5Position()
{
if (isUnmapped())
throw new IllegalStateException();
return pos5;
}
public int getMapQ() { return mapQ; }
public String getCigarStr()
{
if (isUnmapped())
throw new IllegalStateException();
return cutter.getField(5);
}
public boolean isTemplateLengthAvailable()
{
return insertSize != 0;
}
public int getTemplateLength()
{
int abs = Math.abs(insertSize);
if (abs > 0)
return abs;
else
throw new IllegalStateException();
}
public ByteBuffer getSequence() { return (ByteBuffer)ByteBuffer.wrap(unparsedData.getBytes(), 0, seqLen).mark(); }
public ByteBuffer getBaseQualities() { return (ByteBuffer)ByteBuffer.wrap(unparsedData.getBytes(), qualityStart, seqLen).mark(); }
public int getLength() { return seqLen; }
protected String getTagText(String name)
{
if (tagsStart >= unparsedData.getLength()) // no tags
return null;
String text = null;
try {
int pos = unparsedData.find(Delim + name, tagsStart - 1);
if (pos >= 0)
{
int fieldEnd = unparsedData.find(Delim, pos + 1); // fieldEnd: index one position beyond the last char of the field
if (fieldEnd < 0)
fieldEnd = unparsedData.getLength();
// decode n bytes from start
// start = pos + 1 (+1 to skip the delimiter)
// n = fieldEnd - start
// = fieldEnd - (pos + 1)
// = fieldEnd - pos - 1
text = Text.decode(unparsedData.getBytes(), pos + 1, fieldEnd - pos - 1);
}
}
catch (java.nio.charset.CharacterCodingException e) {
throw new RuntimeException("character coding error retrieving tag '" + name + "' from SAM record " + this.toString());
}
return text;
}
public String toString()
{
StringBuilder builder = new StringBuilder(1000);
for (int i = 0; i <= 8; ++i)
builder.append(cutter.getField(i)).append('\t');
builder.append(unparsedData.toString());
return builder.toString();
}
}