/*
* The MIT License
*
* Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
/** Utilities to calculate GC Bias
* Created by kbergin on 9/23/15.
*/
public class GcBiasUtils {
/////////////////////////////////////////////////////////////////////////////
// Calculates GC as a number from 0 to 100 in the specified window.
// If the window includes more than five no-calls then -1 is returned.
/////////////////////////////////////////////////////////////////////////////
public static int calculateGc(final byte[] bases, final int startIndex, final int endIndex, final CalculateGcState state) {
if (state.init) {
state.init = false;
state.gcCount = 0;
state.nCount = 0;
for (int i = startIndex; i < endIndex; ++i) {
final byte base = bases[i];
if (SequenceUtil.basesEqual(base, (byte)'G') || SequenceUtil.basesEqual(base, (byte)'C')) ++state.gcCount;
else if (SequenceUtil.basesEqual(base, (byte)'N')) ++state.nCount;
}
} else {
final byte newBase = bases[endIndex - 1];
if (SequenceUtil.basesEqual(newBase, (byte)'G') || SequenceUtil.basesEqual(newBase, (byte)'C')) ++state.gcCount;
else if (newBase == 'N') ++state.nCount;
if (SequenceUtil.basesEqual(state.priorBase, (byte)'G') || SequenceUtil.basesEqual(state.priorBase, (byte)'C')) --state.gcCount;
else if (SequenceUtil.basesEqual(state.priorBase, (byte)'N')) --state.nCount;
}
state.priorBase = bases[startIndex];
if (state.nCount > 4) return -1;
else return (state.gcCount * 100) / (endIndex - startIndex);
}
/////////////////////////////////////////////////////////////////////////////
// Calculate number of 100bp windows in the refBases passed in that fall into
// each gc content bin (0-100% gc)
/////////////////////////////////////////////////////////////////////////////
public static int[] calculateRefWindowsByGc(final int windows, final File referenceSequence, final int windowSize) {
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequence);
ReferenceSequence ref;
final int [] windowsByGc = new int [windows];
while ((ref = refFile.nextSequence()) != null) {
final byte[] refBases = ref.getBases();
StringUtil.toUpperCase(refBases);
final int refLength = refBases.length;
final int lastWindowStart = refLength - windowSize;
final CalculateGcState state = new GcBiasUtils().new CalculateGcState();
for (int i = 1; i < lastWindowStart; ++i) {
final int windowEnd = i + windowSize;
final int gcBin = calculateGc(refBases, i, windowEnd, state);
if (gcBin != -1) windowsByGc[gcBin]++;
}
}
return windowsByGc;
}
/////////////////////////////////////////////////////////////////////////////
// Calculate all the GC values for all windows
/////////////////////////////////////////////////////////////////////////////
public static byte [] calculateAllGcs(final byte[] refBases, final int lastWindowStart, final int windowSize) {
final CalculateGcState state = new GcBiasUtils().new CalculateGcState();
final int refLength = refBases.length;
final byte[] gc = new byte[refLength + 1];
for (int i = 1; i < lastWindowStart; ++i) {
final int windowEnd = i + windowSize;
final int windowGc = calculateGc(refBases, i, windowEnd, state);
gc[i] = (byte) windowGc;
}
return gc;
}
/////////////////////////////////////////////////////////////////////////////
// Keeps track of current GC calculation state
/////////////////////////////////////////////////////////////////////////////
class CalculateGcState {
boolean init = true;
int nCount;
int gcCount;
byte priorBase;
}
}