/* * The MIT License (MIT) * * Copyright (c) 2016 University of California San Diego * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * */ package org.broad.igv.sam; import org.apache.commons.math.stat.StatUtils; import org.broad.igv.util.collections.DoubleArrayList; /** * Created by jrobinso on 12/20/16. * <p> * Some simple statistics to distinguish dna, rna, and 3rd gen alignment datasets. */ public class ReadStats { private int readCount = 0; private int indelCount = 0; private int nCount = 0; private DoubleArrayList readLengths = new DoubleArrayList(10000); private DoubleArrayList refToReadRatios = new DoubleArrayList(10000); public double medianReadLength = 0; public double readLengthStdDev = 0; public double medianRefToReadRatio = 0; public double fracReadsWithIndels; public double fracReadsWithNs; public void addAlignment(Alignment alignment) { if (alignment.isMapped()) { readCount++; final int readLength = alignment.getReadSequence().length(); final char[] cigarString = alignment.getCigarString().toCharArray(); readLengths.add(readLength); if (containsChar(cigarString, 'N')) { nCount++; refToReadRatios.add(((double) (alignment.getAlignmentEnd() - alignment.getAlignmentStart())) / readLength); } if (containsChar(cigarString, 'D') || containsChar(cigarString, 'I')) { indelCount++; } } } public void compute() { if (readLengths.size() > 0) { medianReadLength = StatUtils.percentile(readLengths.toArray(), 50); readLengthStdDev = Math.sqrt(StatUtils.variance(readLengths.toArray())); } if (refToReadRatios.size() > 0) { medianRefToReadRatio = StatUtils.percentile(refToReadRatios.toArray(), 50); } fracReadsWithIndels = ((double) indelCount) / readCount; fracReadsWithNs = ((double) nCount) / readCount; } // Hand coded for speed private boolean containsChar(char[] charArray, char c) { for (int i = 0; i < charArray.length; i++) { if (charArray[i] == c) return true; } return false; } }