/** * Copyright Copyright 2014 Simon Andrews * * This file is part of BamQC. * * BamQC is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * BamQC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with FastQC; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * Changelog: * - Piero Dalle Pezze: Added y axis label, antialiasing, axes numbers resizing to avoid overlapping, reports. * - Bart Ailey: Class creation. */ package uk.ac.babraham.BamQC.Modules; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.swing.JPanel; import javax.xml.stream.XMLStreamException; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import uk.ac.babraham.BamQC.DataTypes.Genome.AnnotationSet; import uk.ac.babraham.BamQC.Graphs.BarGraph; import uk.ac.babraham.BamQC.Report.HTMLReportArchive; import uk.ac.babraham.BamQC.Sequence.SequenceFile; import uk.ac.babraham.BamQC.Utilities.CalculateDistribution; /** * @author Bart Ailey * @author Piero Dalle Pezze * */ public class InsertLengthDistribution extends AbstractQCModule { public final static int MAX_INSERT_SIZE = ModuleConfig.getParam("InsertLengthDistribution_max_insert_size", "ignore").intValue(); public final static int BIN_SIZE = ModuleConfig.getParam("InsertLengthDistribution_bin_size", "ignore").intValue(); public final static double PERCENTAGE_DEVIATION_ERROR = ModuleConfig.getParam("InsertLengthDistribution_percentage_deviation", "error"); public final static double PERCENTAGE_DEVIATION_WARN = ModuleConfig.getParam("InsertLengthDistribution_percentage_deviation", "warn"); private static Logger log = Logger.getLogger(InsertLengthDistribution.class); private ArrayList<Long> insertLengthCounts = new ArrayList<Long>(); private double[] distributionDouble = null; private double aboveMaxInsertLengthCount = 0L; private double [] graphCounts = null; private String [] xCategories = null; private double max = 0.0d; private boolean calculated = false; private long unpairedReads = 0; private long reads = 0; private double percentageDeviation = 0.0; private boolean percentageDeviationCalculated = false; public InsertLengthDistribution() {} @Override public void processFile(SequenceFile file) { } @Override public void processAnnotationSet(AnnotationSet annotation) { } @Override public void processSequence(SAMRecord read) { int inferredInsertSize = Math.abs(read.getInferredInsertSize()); reads++; if (read.getReadPairedFlag() && read.getProperPairFlag()) { if (inferredInsertSize > MAX_INSERT_SIZE) { log.debug("inferredInsertSize = " + inferredInsertSize); aboveMaxInsertLengthCount++; } else { if (inferredInsertSize >= insertLengthCounts.size()) { for (long i = insertLengthCounts.size(); i < inferredInsertSize; i++) { insertLengthCounts.add(0L); } insertLengthCounts.add(1L); } else { long existingValue = insertLengthCounts.get(inferredInsertSize); insertLengthCounts.set(inferredInsertSize, ++existingValue); } } } else { unpairedReads++; } } @Override public JPanel getResultsPanel() { log.debug("Number of inferred insert sizes above the maximum allowed = " + aboveMaxInsertLengthCount); log.debug("Number of unpaired reads = " + unpairedReads); if (!calculated) { double[] rawCounts = new double[insertLengthCounts.size()]; for(int i=0; i<rawCounts.length; i++) { rawCounts[i] = insertLengthCounts.get(i); } CalculateDistribution cd = new CalculateDistribution(rawCounts, aboveMaxInsertLengthCount, BIN_SIZE); graphCounts = cd.getGraphCounts(); xCategories = cd.getXCategories(); max = cd.getMax(); distributionDouble = cd.getDistributionDouble(); calculated = true; } String title = String.format("Paired Read Insert Length Distrib ( %d bp max size and %.3f %% unpaired reads )", MAX_INSERT_SIZE, (((double) unpairedReads / reads) * 100.0)); String xLabel = "Inferred Insert Length bp"; String yLabel = "Percent of Reads"; return new BarGraph(graphCounts, 0.0d, max, xLabel, yLabel, xCategories, title); } @Override public String name() { return "Insert Length Distribution"; } @Override public String description() { return "Distribution of the read insert length"; } @Override public void reset() { insertLengthCounts = new ArrayList<Long>(); aboveMaxInsertLengthCount = 0L; percentageDeviationCalculated = false; percentageDeviation = 0.0; } private double calculatePercentageDeviation() { if (!percentageDeviationCalculated) { List<Double> distributionDouble = new ArrayList<Double>(); for (long count : insertLengthCounts) { distributionDouble.add((double) count); } NormalDistributionModeler normalDistributionModeler = new NormalDistributionModeler(); normalDistributionModeler.setDistribution(distributionDouble); normalDistributionModeler.calculateDistribution(); percentageDeviation = normalDistributionModeler.getDeviationPercent(); if (Double.isNaN(percentageDeviation)) percentageDeviation = 100.0; log.debug("percentageDeviation = " + percentageDeviation); percentageDeviationCalculated = true; } return percentageDeviation; } @Override public boolean raisesError() { return calculatePercentageDeviation() > PERCENTAGE_DEVIATION_ERROR; } @Override public boolean raisesWarning() { return calculatePercentageDeviation() > PERCENTAGE_DEVIATION_WARN; } @Override public boolean needsToSeeSequences() { return true; } @Override public boolean needsToSeeAnnotation() { return false; } @Override public boolean ignoreInReport() { if(ModuleConfig.getParam("InsertLengthDistribution", "ignore") > 0 || insertLengthCounts == null || insertLengthCounts.size() == 0) { return true; } return false; } private String[] buildLabels(int binNumber) { String[] label = new String[binNumber]; for (int i = 0; i < label.length; i++) { label[i] = Integer.toString(i * 10); } return label; } @Override public void makeReport(HTMLReportArchive report) throws XMLStreamException, IOException { String title = String.format("Paired Read Insert Length Distribution ( %d bp max size and %.3f %% unpaired reads )", MAX_INSERT_SIZE, (((double) unpairedReads / reads) * 100.0)); super.writeDefaultImage(report, "InsertLengthDistribution.png", title, 800, 600); if(insertLengthCounts == null || insertLengthCounts.size() == 0) { return; } int binNumber = (insertLengthCounts.size() / BIN_SIZE) + 2; String[] label = buildLabels(binNumber); StringBuffer sb = report.dataDocument(); sb.append("Inferred_insert_length(bp)\tPaired_read_insert_length_distribution\n"); for (int i=0;i<distributionDouble.length;i++) { sb.append(label[i]).append("\t").append(distributionDouble[i]).append("\n"); } } public ArrayList<Long> getInsertLengthCounts() { return insertLengthCounts; } public long getUnpairedReads() { return unpairedReads; } }