/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.tools;
import htsjdk.samtools.util.CloseableIterator;
import org.apache.commons.math.stat.StatUtils;
import org.apache.log4j.Logger;
import org.broad.igv.sam.Alignment;
import org.broad.igv.sam.ReadMate;
import org.broad.igv.sam.reader.AlignmentReader;
import org.broad.igv.sam.reader.AlignmentReaderFactory;
import java.io.IOException;
import java.util.Iterator;
/**
* @author jrobinso
* @date Jan 14, 2011
*/
public class PairedEndStats {
static private Logger log = Logger.getLogger(PairedEndStats.class);
private double minPercentileInsertSize;
private double maxPercentileInsertSize;
private double averageInsertSize;
private double medianInsertSize;
private double stddevInsertSize;
private double madInsertSize;
private static final int MAX_PAIRS = 10000;
public static void main(String[] args) throws IOException {
AlignmentReader reader = AlignmentReaderFactory.getReader(args[0], false);
CloseableIterator<Alignment> iter = reader.iterator();
PairedEndStats stats = compute(iter, .1, 99.9);
iter.close();
reader.close();
System.out.println(args[0] + "\t" + stats.averageInsertSize + "\t" + stats.medianInsertSize +
"\t" + stats.stddevInsertSize + "\t" + stats.madInsertSize);
}
public PairedEndStats(double averageInsertSize, double medianInsertSize, double insertSizeStdev, double madInsertSize, double secondPercentileSize, double maxPercentileInsertSize) {
this.averageInsertSize = averageInsertSize;
this.medianInsertSize = medianInsertSize;
this.stddevInsertSize = insertSizeStdev;
this.madInsertSize = madInsertSize;
this.minPercentileInsertSize = secondPercentileSize;
this.maxPercentileInsertSize = maxPercentileInsertSize;
}
public static PairedEndStats compute(String bamFile) {
AlignmentReader reader = null;
try {
reader = AlignmentReaderFactory.getReader(bamFile, false);
final CloseableIterator<Alignment> alignmentCloseableIterator = reader.iterator();
PairedEndStats stats = compute(alignmentCloseableIterator, .1, 99.9);
alignmentCloseableIterator.close();
return stats;
} catch (IOException e) {
log.error("Error reading sam file: " + e.getMessage(), e);
return null;
}
finally {
try {
if (reader != null)
reader.close();
} catch (IOException e) {
log.error(e.getMessage(), e);
}
}
}
public static PairedEndStats compute(AlignmentReader reader, String chr, int start, int end) {
try {
PairedEndStats stats = compute(reader.query(chr, start, end, false), .1, 99.9);
return stats;
} catch (IOException e) {
log.error("Error computing alignment stats: " + e.getMessage(), e);
return null;
}
}
public static PairedEndStats compute(Iterator<Alignment> alignments, double minPercentile, double maxPercentile) {
double[] insertSizes = new double[MAX_PAIRS];
int nPairs = 0;
while (alignments.hasNext()) {
Alignment al = alignments.next();
if (isProperPair(al)) {
insertSizes[nPairs] = Math.abs(al.getInferredInsertSize());
nPairs++;
}
if (nPairs >= MAX_PAIRS) {
break;
}
}
if(nPairs == 0) {
log.error("Error computing insert size distribution. No alignments in sample interval.");
return null;
}
double mean = StatUtils.mean(insertSizes, 0, nPairs);
double median = StatUtils.percentile(insertSizes, 0, nPairs, 50);
double stdDev = Math.sqrt(StatUtils.variance(insertSizes, 0, nPairs));
double[] deviations = new double[nPairs];
for (int i = 0; i < nPairs; i++) {
deviations[i] = Math.abs(insertSizes[i] - median);
}
// MAD, as defined at http://stat.ethz.ch/R-manual/R-devel/library/stats/html/mad.html
double mad = 1.4826 * StatUtils.percentile(deviations, 50);
double sec = StatUtils.percentile(insertSizes, 0, nPairs, minPercentile);
double max = StatUtils.percentile(insertSizes, 0, nPairs, maxPercentile);
PairedEndStats stats = new PairedEndStats(mean, median, stdDev, mad, sec, max);
return stats;
}
static boolean isProperPair(Alignment alignment) {
if (alignment.isMapped() &&
alignment.isPaired() &&
alignment.isProperPair() &&
!alignment.isDuplicate() &&
alignment.getMappingQuality() > 0 &&
!alignment.isVendorFailedRead() &&
alignment.getInferredInsertSize() > 0) {
ReadMate mate = alignment.getMate();
boolean mateMapped = mate != null && mate.isMapped();
boolean sameChromosome = mateMapped && mate.getChr().equals(alignment.getChr());
return mateMapped && sameChromosome;
}
return false;
}
public double getAverageInsertSize() {
return averageInsertSize;
}
public double getMedianInsertSize() {
return medianInsertSize;
}
public double getStddevInsertSize() {
return stddevInsertSize;
}
public double getMadInsertSize() {
return madInsertSize;
}
public double getMinPercentileInsertSize() {
return minPercentileInsertSize;
}
public double getMaxPercentileInsertSize() {
return maxPercentileInsertSize;
}
}