// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package it.crs4.seal.recab;
import it.crs4.seal.common.AbstractTaggedMapping;
import java.util.HashMap;
import java.nio.ByteBuffer;
/**
* Dinucleotide covariate.
* Assumes reads mapped to the reverse strand have been reversed and complemented.
*
* TODO: profile!
*/
public class DinucCovariate implements Covariate
{
private static final String NN = "NN";
private int readLength = -1;
private boolean forwardStrand;
private ByteBuffer sequence;
private ByteBuffer revComplement;
private String[] values;
private static final int BUFFER_SIZE = 200;
private static final HashMap<Byte, Byte> complementByte;
static {
complementByte = new HashMap<Byte, Byte>();
complementByte.put((byte)'N', (byte)'N');
complementByte.put((byte)'A', (byte)'T');
complementByte.put((byte)'C', (byte)'G');
complementByte.put((byte)'G', (byte)'C');
complementByte.put((byte)'T', (byte)'A');
}
public DinucCovariate()
{
revComplement = ByteBuffer.allocate(BUFFER_SIZE);
revComplement.mark();
values = new String[BUFFER_SIZE];
}
public void applyToMapping(AbstractTaggedMapping m)
{
readLength = m.getLength();
forwardStrand = !m.isOnReverse();
sequence = m.getSequence();
if (!forwardStrand)
{
// calculate the reversed complement
if (revComplement.capacity() < readLength) // ensure space is sufficient
{
revComplement = ByteBuffer.allocate(readLength);
revComplement.mark();
}
// now revComplement the read into the buffer
revComplement.reset();
int startPos = sequence.position();
for (int pos = startPos + readLength - 1; pos >= startPos; --pos)
revComplement.put( complementByte.get( sequence.get(pos)) );
revComplement.reset();
sequence = revComplement;
}
// now sequence is what was read by the sequencer
// Calculate the dinucleotides. We calculate them starting from the beginning
// of 'sequence', but the order in which they are inserted into 'values' must
// match the original base order.
final byte[] seqArray = sequence.array();
final int sequenceStart = sequence.position();
int valuesPtr;
int valuesIncrement;
if (forwardStrand)
{
values[0] = NN;
valuesPtr = 1;
valuesIncrement = 1;
}
else
{
values[readLength - 1] = NN;
valuesPtr = readLength - 2;
valuesIncrement = -1;
}
// for each "previous" base, if it's an N insert a value of NN, otherwise the
// dinucleotide (previous, previous+1)
for (int previous = 0; previous < readLength - 1; ++previous)
{
if (seqArray[sequenceStart + previous] == 'N') // if previous base is an N
values[ valuesPtr ] = NN;
else
values[ valuesPtr ] = new String(seqArray, sequenceStart + previous, 2); // string length 2 starting from seqArray[previous]
valuesPtr += valuesIncrement;
}
}
public String getValue(int pos)
{
if (readLength < 0)
throw new RuntimeException("BUG! readLength == " + readLength + ". applyToMapping not called before DinucCovariate.getValue");
else if (pos < 0 || pos >= readLength)
throw new IndexOutOfBoundsException("pos " + pos + " is out of read boundaries [0," + readLength + ")");
return values[pos];
}
}