/**
* Copyright Copyright 2015 Simon Andrews
*
* This file is part of BamQC.
*
* BamQC is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* BamQC is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with BamQC; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* Changelog:
* - Piero Dalle Pezze: Class creation.
*/
package uk.ac.babraham.BamQC.Modules;
import java.awt.GridLayout;
import java.io.IOException;
import java.util.HashMap;
import javax.swing.JPanel;
import javax.xml.stream.XMLStreamException;
import org.apache.log4j.Logger;
import net.sf.samtools.SAMRecord;
import uk.ac.babraham.BamQC.DataTypes.Genome.AnnotationSet;
import uk.ac.babraham.BamQC.Report.HTMLReportArchive;
import uk.ac.babraham.BamQC.Sequence.SequenceFile;
import uk.ac.babraham.BamQC.Graphs.LineGraph;
/**
* This class re-uses the computation collected by the class VariantCallDetection
* and plots the Indel Frequencies.
* @author Piero Dalle Pezze
*/
public class IndelFrequencies extends AbstractQCModule {
private static Logger log = Logger.getLogger(IndelFrequencies.class);
private String[] indelNames = {"Deletions", "Insertions"};
double[] dFirstDeletionPos = null;
double[] dFirstInsertionPos = null;
double[] dSecondDeletionPos = null;
double[] dSecondInsertionPos = null;
// threshold for the plot y axis.
private double firstMaxY=0.0d;
private double secondMaxY=0.0d;
// The analysis collecting all the results.
VariantCallDetection variantCallDetection = null;
// Constructors
// /**
// * Default constructor
// */
// public IndelFrequencies() { }
/**
* Constructor. Reuse of the computation provided by VariantCallDetection analysis.
*/
public IndelFrequencies(VariantCallDetection vcd) {
super();
variantCallDetection = vcd;
}
// Private methods
/**
* Computes the maximum value for the x axis.
* @return xMaxValue
*/
private int computeXMaxValue() {
HashMap<Integer, Long> hm = variantCallDetection.getContributingReadsPerPos();
Integer[] readLengths = hm.keySet().toArray(new Integer[0]);
Long[] readCounts = hm.values().toArray(new Long[0]);
int xMaxValue = 5; // sequences long at least 5.
long moreFrequentReadLength = 0;
// Computes a variable threshold depending on the read length distribution of read library
for(int i=0; i<readCounts.length; i++) {
if(readCounts[i] > moreFrequentReadLength) {
moreFrequentReadLength = readCounts[i];
}
}
double threshold = moreFrequentReadLength * ModuleConfig.getParam("VariantCallPosition_indel_seqpercent_xaxis_threshold", "ignore").intValue() / 100d;
// Filters the reads to show based on a the threshold computed previously.
for(int i=0; i<readCounts.length; i++) {
if(readCounts[i] >= threshold && xMaxValue < readLengths[i]) {
xMaxValue = readLengths[i];
}
log.debug("Read Length: " + readLengths[i] + ", Num Reads: " + readCounts[i] + ", Min Accepted Length: " + threshold);
}
return xMaxValue+1; //this will be used for array sizes (so +1).
}
// @Override methods
@Override
public void processSequence(SAMRecord read) { }
@Override
public void processFile(SequenceFile file) { }
@Override
public void processAnnotationSet(AnnotationSet annotation) {
}
@Override
public JPanel getResultsPanel() {
// compute the totals
variantCallDetection.computeTotals();
long totDel = variantCallDetection.getTotalDeletions(),
totIns = variantCallDetection.getTotalInsertions(),
totBases = variantCallDetection.getTotal();
log.debug("Total deletions: " + totDel + " ( " + totDel*100f/totBases + "% )");
log.debug("Total insertions: " + totIns + " ( " + totIns*100f/totBases + "% )");
log.debug("Skipped reads: " + variantCallDetection.getSkippedReads() + " ( "+ (variantCallDetection.getSkippedReads()*100.0f)/variantCallDetection.getTotalReads() + "% )");
JPanel resultsPanel = new JPanel();
// We do not need a BaseGroup here
// These two arrays have same length.
// first/second identify the first or second segments respectively.
long[] totalPos = variantCallDetection.getTotalPos();
// initialise and configure the LineGraph
// compute the maximum value for the X axis
int maxX = computeXMaxValue();
String[] xCategories = new String[maxX];
// compute statistics from the FIRST segment data
long[] firstDeletionPos = variantCallDetection.getFirstDeletionPos();
long[] firstInsertionPos = variantCallDetection.getFirstInsertionPos();
dFirstDeletionPos = new double[maxX];
dFirstInsertionPos = new double[maxX];
for(int i=0; i<maxX && i<firstDeletionPos.length; i++) {
dFirstDeletionPos[i]= (firstDeletionPos[i] * 100d) / totalPos[i];
dFirstInsertionPos[i]= (firstInsertionPos[i] * 100d) / totalPos[i];
if(dFirstDeletionPos[i] > firstMaxY) { firstMaxY = dFirstDeletionPos[i]; }
if(dFirstInsertionPos[i] > firstMaxY) { firstMaxY = dFirstInsertionPos[i]; }
xCategories[i] = String.valueOf(i+1);
}
double[][] firstIndelData = new double [][] {dFirstDeletionPos, dFirstInsertionPos};
// compute statistics from the SECOND segment data if there are paired reads.
if(variantCallDetection.existPairedReads()) {
resultsPanel.setLayout(new GridLayout(2,1));
long[] secondDeletionPos = variantCallDetection.getSecondDeletionPos();
long[] secondInsertionPos = variantCallDetection.getSecondInsertionPos();
dSecondDeletionPos = new double[maxX];
dSecondInsertionPos = new double[maxX];
for(int i=0; i<maxX && i<secondDeletionPos.length; i++) {
dSecondDeletionPos[i]= (secondDeletionPos[i] * 100d) / totalPos[i];
dSecondInsertionPos[i]= (secondInsertionPos[i] * 100d) / totalPos[i];
if(dSecondDeletionPos[i] > secondMaxY) { secondMaxY = dSecondDeletionPos[i]; }
if(dSecondInsertionPos[i] > secondMaxY) { secondMaxY = dSecondInsertionPos[i]; }
}
double[][] secondIndelData = new double [][] {dSecondDeletionPos, dSecondInsertionPos};
String title = String.format("First Read Indel Frequencies ( total deletions: %.3f %%, total insertions: %.3f %% )",
totDel*100.0f/totBases, totIns*100.0f/totBases);
// add 10% to the top for improving the visualisation of the plot.
resultsPanel.add(new LineGraph(firstIndelData, 0d, firstMaxY+firstMaxY*0.1, "Position in Read (bp)", "Frequency (%)", indelNames, xCategories, title));
String title2 = "Second Read Indel Frequencies";
// add 10% to the top for improving the visualisation of the plot.
resultsPanel.add(new LineGraph(secondIndelData, 0d, secondMaxY+secondMaxY*0.1, "Position in Read (bp)", "Frequency (%)", indelNames, xCategories, title2));
} else {
resultsPanel.setLayout(new GridLayout(1,1));
String title = String.format("Read Indel Frequencies ( total deletions: %.3f %%, total insertions: %.3f %% )",
totDel*100.0f/totBases, totIns*100.0f/totBases);
// add 10% to the top for improving the visualisation of the plot.
resultsPanel.add(new LineGraph(firstIndelData, 0d, firstMaxY+firstMaxY*0.1, "Position in Read (bp)", "Frequency (%)", indelNames, xCategories, title));
}
return resultsPanel;
}
@Override
public String name() {
return "Indel Frequencies";
}
@Override
public String description() {
return "Looks at the indel frequencies in the data";
}
@Override
public void reset() { }
@Override
public boolean raisesError() {
double indelPercent = 100.0d*(variantCallDetection.getTotalDeletions() + variantCallDetection.getTotalInsertions() )
/ variantCallDetection.getTotal();
if(indelPercent > ModuleConfig.getParam("VariantCallPosition_indel_threshold", "error").doubleValue())
return true;
return false;
}
@Override
public boolean raisesWarning() {
double indelPercent = 100.0d*(variantCallDetection.getTotalDeletions() + variantCallDetection.getTotalInsertions() )
/ variantCallDetection.getTotal();
if(indelPercent > ModuleConfig.getParam("VariantCallPosition_indel_threshold", "warn").doubleValue())
return true;
return false;
}
@Override
public boolean needsToSeeSequences() {
return false;
}
@Override
public boolean needsToSeeAnnotation() {
return false;
}
@Override
public boolean ignoreInReport() {
if(ModuleConfig.getParam("IndelFrequencies", "ignore") > 0 || variantCallDetection == null)
return true;
// compute the totals
variantCallDetection.computeTotals();
if(variantCallDetection.getTotalDeletions() == 0 && variantCallDetection.getTotalInsertions() == 0)
return true;
return false;
}
@Override
public void makeReport(HTMLReportArchive report) throws XMLStreamException, IOException {
super.writeDefaultImage(report, "indel_frequencies.png", "Indel Frequencies", 800, 600);
// write raw data in a report
if(dFirstDeletionPos == null) { return; }
StringBuffer sb = report.dataDocument();
if(dSecondInsertionPos != null) {
sb.append("Position\t1st_read_del_freq\t1st_read_ins_freq\t2nd_read_del_freq\t2nd_read_ins_freq\n");
for (int i=0;i<dFirstInsertionPos.length;i++) {
sb.append((i+1));
sb.append("\t");
sb.append(dFirstDeletionPos[i]);
sb.append("\t");
sb.append(dFirstInsertionPos[i]);
sb.append("\t");
sb.append(dSecondDeletionPos[i]);
sb.append("\t");
sb.append(dSecondInsertionPos[i]);
sb.append("\n");
}
} else {
sb.append("Position\tRead_del_freq\tRead_ins_freq\n");
for (int i=0;i<dFirstInsertionPos.length;i++) {
sb.append((i+1));
sb.append("\t");
sb.append(dFirstDeletionPos[i]);
sb.append("\t");
sb.append(dFirstInsertionPos[i]);
sb.append("\n");
}
}
}
}