/** * Copyright Copyright 2015 Simon Andrews * * This file is part of BamQC. * * BamQC is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * BamQC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with BamQC; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * Changelog: * - Piero Dalle Pezze: Class creation. */ package uk.ac.babraham.BamQC.Modules; import java.awt.GridLayout; import java.io.IOException; import java.util.HashMap; import javax.swing.JPanel; import javax.xml.stream.XMLStreamException; import org.apache.log4j.Logger; import net.sf.samtools.SAMRecord; import uk.ac.babraham.BamQC.DataTypes.Genome.AnnotationSet; import uk.ac.babraham.BamQC.Graphs.LineGraph; import uk.ac.babraham.BamQC.Report.HTMLReportArchive; import uk.ac.babraham.BamQC.Sequence.SequenceFile; /** * This class re-uses the computation collected by the class VariantCallDetection * and plots the SNP Frequencies. * @author Piero Dalle Pezze */ public class SNPFrequencies extends AbstractQCModule { private static Logger log = Logger.getLogger(SNPFrequencies.class); double[] dFirstSNPPos = null; double[] dSecondSNPPos = null; // threshold for the plot y axis. private double firstMaxY=0.0d; private double secondMaxY=0.0d; // The analysis collecting all the results. VariantCallDetection variantCallDetection = null; // data fields for plotting private static String[] snpName = {"SNPs"}; // Constructors // /** // * Default constructor // */ // public SNPFrequencies() { } /** * Constructor. Reuse of the computation provided by VariantCallDetection analysis. */ public SNPFrequencies(VariantCallDetection vcd) { super(); variantCallDetection = vcd; } // Private methods /** * Computes the maximum value for the x axis. * @return xMaxValue */ private int computeXMaxValue() { HashMap<Integer, Long> hm = variantCallDetection.getContributingReadsPerPos(); Integer[] readLengths = hm.keySet().toArray(new Integer[0]); Long[] readCounts = hm.values().toArray(new Long[0]); int xMaxValue = 5; // sequences long at least 5. long moreFrequentReadLength = 0; // Computes a variable threshold depending on the read length distribution of read library for(int i=0; i<readCounts.length; i++) { if(readCounts[i] > moreFrequentReadLength) { moreFrequentReadLength = readCounts[i]; } } double threshold = moreFrequentReadLength * ModuleConfig.getParam("VariantCallPosition_snp_seqpercent_xaxis_threshold", "ignore").intValue() / 100d; // Filters the reads to show based on a the threshold computed previously. for(int i=0; i<readCounts.length; i++) { if(readCounts[i] >= threshold && xMaxValue < readLengths[i]) { xMaxValue = readLengths[i]; } log.debug("Read Length: " + readLengths[i] + ", Num Reads: " + readCounts[i] + ", Min Accepted Length: " + threshold); } return xMaxValue+1; //this will be used for array sizes (so +1). } // @Override methods @Override public void processSequence(SAMRecord read) { } @Override public void processFile(SequenceFile file) { } @Override public void processAnnotationSet(AnnotationSet annotation) { } @Override public JPanel getResultsPanel() { // compute the totals variantCallDetection.computeTotals(); long totSNPs = variantCallDetection.getTotalMutations(), totBases = variantCallDetection.getTotal(); log.debug("Total SNPs: " + totSNPs + " ( " + totSNPs*100f/totBases + "% )"); JPanel resultsPanel = new JPanel(); // We do not need a BaseGroup here // These two arrays have same length. // first/second identify the first or second segments respectively. long[] totalPos = variantCallDetection.getTotalPos(); // initialise and configure the LineGraph // compute the maximum value for the X axis int maxX = computeXMaxValue(); String[] xCategories = new String[maxX]; // compute statistics from the FIRST segment data // We do not need a BaseGroup here long[] firstSNPPos = variantCallDetection.getFirstSNPPos(); dFirstSNPPos = new double[maxX]; for(int i=0; i<maxX && i<firstSNPPos.length; i++) { dFirstSNPPos[i]= (firstSNPPos[i] * 100d) / totalPos[i]; if(dFirstSNPPos[i] > firstMaxY) { firstMaxY = dFirstSNPPos[i]; } xCategories[i] = String.valueOf(i+1); } double[][] firstSNPData = new double [][] {dFirstSNPPos}; // compute statistics from the SECOND segment data if there are paired reads. if(variantCallDetection.existPairedReads()) { resultsPanel.setLayout(new GridLayout(2,1)); // We do not need a BaseGroup here long[] secondSNPPos = variantCallDetection.getSecondSNPPos(); dSecondSNPPos = new double[maxX]; for(int i=0; i<maxX && i<secondSNPPos.length; i++) { dSecondSNPPos[i]= (secondSNPPos[i] * 100d) / totalPos[i]; if(dSecondSNPPos[i] > secondMaxY) { secondMaxY = dSecondSNPPos[i]; } } double[][] secondSNPData = new double [][] {dSecondSNPPos}; String title = String.format("First Read SNP frequencies ( total SNPs: %.3f %% )", totSNPs*100.0f/totBases); // add 10% to the top for improving the visualisation of the plot. resultsPanel.add(new LineGraph(firstSNPData, 0d, firstMaxY+firstMaxY*0.1, "Position in Read (bp)", "Frequency (%)", snpName, xCategories, title)); String title2 = "Second Read SNP frequencies"; // add 10% to the top for improving the visualisation of the plot. resultsPanel.add(new LineGraph(secondSNPData, 0d, secondMaxY+secondMaxY*0.1, "Position in Read (bp)", "Frequency (%)", snpName, xCategories, title2)); } else { resultsPanel.setLayout(new GridLayout(1,1)); String title = String.format("Read SNP frequencies ( total SNPs: %.3f %% )", totSNPs*100.0f/totBases); // add 10% to the top for improving the visualisation of the plot. resultsPanel.add(new LineGraph(firstSNPData, 0d, firstMaxY+firstMaxY*0.1, "Position in Read (bp)", "Frequency (%)", snpName, xCategories, title)); } return resultsPanel; } @Override public String name() { return "SNP Frequencies"; } @Override public String description() { return "Looks at the SNP frequencies in the data"; } @Override public void reset() { } @Override public boolean raisesError() { double snpPercent = 100.0d*(variantCallDetection.getTotalMutations()) / variantCallDetection.getTotal(); if(snpPercent > ModuleConfig.getParam("VariantCallPosition_snp_threshold", "error").doubleValue()) return true; return false; } @Override public boolean raisesWarning() { double snpPercent = 100.0d*(variantCallDetection.getTotalMutations()) / variantCallDetection.getTotal(); if(snpPercent > ModuleConfig.getParam("VariantCallPosition_snp_threshold", "warn").doubleValue()) return true; return false; } @Override public boolean needsToSeeSequences() { return false; } @Override public boolean needsToSeeAnnotation() { return false; } @Override public boolean ignoreInReport() { if(ModuleConfig.getParam("SNPFrequencies", "ignore") > 0 || variantCallDetection == null) return true; // compute the totals variantCallDetection.computeTotals(); if(variantCallDetection.getTotalMutations() == 0) return true; return false; } @Override public void makeReport(HTMLReportArchive report) throws XMLStreamException, IOException { super.writeDefaultImage(report, "snp_frequencies.png", "SNP Frequencies", 800, 600); // write raw data in a report if(dFirstSNPPos == null) { return; } StringBuffer sb = report.dataDocument(); if(dSecondSNPPos != null) { sb.append("Position\t1st_read_SNP_freq\t2nd_read_SNP_freq\n"); for (int i=0;i<dFirstSNPPos.length;i++) { sb.append((i+1)); sb.append("\t"); sb.append(dFirstSNPPos[i]); sb.append("\t"); sb.append(dSecondSNPPos[i]); sb.append("\n"); } } else { sb.append("Position\tRead_SNP_freq\n"); for (int i=0;i<dFirstSNPPos.length;i++) { sb.append((i+1)); sb.append("\t"); sb.append(dFirstSNPPos[i]); sb.append("\n"); } } } }