/**
* Copyright Copyright 2010-14 Simon Andrews
*
* This file is part of BamQC.
*
* BamQC is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* BamQC is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with BamQC; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* Changelog:
* - Piero Dalle Pezze: annotation, command, variant calls, splices, creation of new table having multilines.
* - Simon Andrews: Class creation.
*/
package uk.ac.babraham.BamQC.Modules;
import java.awt.BorderLayout;
import java.io.IOException;
import java.util.ArrayList;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTable;
import javax.swing.table.AbstractTableModel;
import javax.swing.table.TableModel;
import javax.xml.stream.XMLStreamException;
import net.sf.samtools.SAMRecord;
import uk.ac.babraham.BamQC.BamQCConfig;
import uk.ac.babraham.BamQC.DataTypes.Genome.AnnotationSet;
import uk.ac.babraham.BamQC.DataTypes.Genome.Chromosome;
import uk.ac.babraham.BamQC.Report.HTMLReportArchive;
import uk.ac.babraham.BamQC.Sequence.SequenceFile;
import uk.ac.babraham.BamQC.Utilities.MultiLineTableCellRenderer;
/**
* @author Simon Andrews
* @author Piero Dalle Pezze
*
*/
public class BasicStatistics extends AbstractQCModule {
private String filename = "";
private boolean headerParsed = false;
private String command = "";
private boolean hasAnnotation = false;
private String annotationFile = "";
private long featureTypeCount = 0;
private int chromosomeCount = 0;
private long actualCount = 0;
private long primaryCount = 0;
private long pairedCount = 0;
private long properPairCount = 0;
private long unmappedCount = 0;
private long duplicateCount = 0;
private long qcFailCount = 0;
private long singletonCount = 0;
private long totalSplicedReads = 0;
private long totalSkippedReads = 0;
private long totalReadsWithoutMDString = 0;
private long totalReadsWithoutCigarString = 0;
private long totalInconsistenReadsCigarMDString = 0;
private long variantCallDetectionTotalReads = 0;
private long totalSoftClips = 0;
private long totalInsertions = 0;
private long totalDeletions = 0;
private long totalMutations = 0;
private long totalBases = 0;
VariantCallDetection vcd = null;
private boolean genomeCoverage = false;
/**
* Constructor. Reuse of the computation provided by VariantCallDetection analysis.
*/
public BasicStatistics(VariantCallDetection vcd) {
super();
this.vcd = vcd;
}
@Override
public String description() {
return "Calculates some basic statistics about the file";
}
@Override
public void processAnnotationSet(AnnotationSet annotation) {
if (annotation.hasFeatures()) {
hasAnnotation = true;
if(annotation.getFile() != null) {
annotationFile = annotation.getFile().getName();
} else {
annotationFile = "Genome: " + BamQCConfig.getInstance().genome;
}
featureTypeCount = annotation.getAllFeatures().length;
}
chromosomeCount = annotation.chromosomeFactory().getAllChromosomes().length;
Chromosome[] chromosomes = annotation.chromosomeFactory().getAllChromosomes();
if(chromosomes != null && chromosomes.length > 0) {
for(int c = 0; c < chromosomes.length && !genomeCoverage; c++)
for(int i = 0; i< chromosomes[c].getBinCountData().length && !genomeCoverage; i++)
if(chromosomes[c].getBinCountData().length > 1)
genomeCoverage = true;
}
}
@Override
public boolean needsToSeeSequences() {
return true;
}
@Override
public boolean needsToSeeAnnotation() {
return true;
}
@Override
public JPanel getResultsPanel() {
JPanel returnPanel = new JPanel();
returnPanel.setLayout(new BorderLayout());
returnPanel.add(new JLabel("Basic Statistics",JLabel.CENTER),BorderLayout.NORTH);
TableModel model = new ResultsTable();
JTable table = new JTable(model);
// add multi line per cell renderer to this table.
table.setDefaultRenderer(String.class, new MultiLineTableCellRenderer());
returnPanel.add(new JScrollPane(table),BorderLayout.CENTER);
return returnPanel;
}
@Override
public void reset () {
}
@Override
public String name() {
return "Basic Statistics";
}
@Override
public void processSequence(SAMRecord sequence) {
// extract the method used for generating the SAM/BAM file if present in the header file.
if(!headerParsed) {
String fullHeader = sequence.getHeader().getTextHeader();
if(fullHeader != null) {
String[] headerLines = fullHeader.split("@");
for(int i=0; i<headerLines.length; i++) {
if(headerLines[i].startsWith("PG")) {
command += headerLines[i].replace("PG\t", "").replace('\t', ' ');
}
}
}
headerParsed = true;
}
actualCount++;
if (!sequence.isSecondaryOrSupplementary()) {
++primaryCount;
}
if (sequence.getReadPairedFlag()) {
pairedCount++;
if (sequence.getProperPairFlag()) properPairCount++;
if (sequence.getMateUnmappedFlag() && ! sequence.getReadUnmappedFlag()) singletonCount++;
}
if (sequence.getReadUnmappedFlag()) unmappedCount++;
if (sequence.getReadFailsVendorQualityCheckFlag()) qcFailCount++;
if (sequence.getDuplicateReadFlag()) duplicateCount++;
}
@Override
public void processFile (SequenceFile file) {
this.filename = file.name();
}
@Override
public boolean raisesError() {
return false;
}
@Override
public boolean raisesWarning() {
return false;
}
@Override
public boolean ignoreInReport () {
return false;
}
@Override
public void makeReport(HTMLReportArchive report) throws XMLStreamException,IOException {
// note: the following method generates both the HTML code and the text report.
// Therefore no text report code is required here.
super.writeTable(report, new ResultsTable());
}
private String formatPercentage(long a, long b) {
return String.format("%6.3f", 100 * a / (double) b);
}
/**
* The Table containing the statistics and additional information about the SAM/Bam file.
* @author Piero Dalle Pezze
*/
private class ResultsTable extends AbstractTableModel {
private static final long serialVersionUID = 4444508216021418468L;
private ArrayList<String> rowNames = new ArrayList<String>();
private ArrayList<String> rowValues = new ArrayList<String>();
public ResultsTable() {
super();
extractVariantCallStatistics();
rowNames.add("File name");
rowValues.add(filename);
if(!command.equals("")) {
rowNames.add("Command generating Sam/Bam file");
rowValues.add(command);
}
rowNames.add("Has annotation");
if(hasAnnotation) {
rowValues.add("Yes");
rowNames.add("Annotation file name");
rowValues.add(annotationFile);
rowNames.add("Total feature types");
rowValues.add("" + featureTypeCount);
}
else {
rowValues.add("No");
}
rowNames.add("Total chromosomes");
rowValues.add("" + chromosomeCount);
// Genome Coverage
rowNames.add("Sufficient genome coverage");
if(genomeCoverage) {
rowValues.add("Yes");
} else {
rowValues.add("No");
}
rowNames.add("Total sequences");
rowValues.add("" + actualCount);
rowNames.add("Percent primary alignments");
rowValues.add(formatPercentage(primaryCount, actualCount));
rowNames.add("Percent sequences failed vendor QC");
rowValues.add(formatPercentage(qcFailCount, actualCount));
rowNames.add("Percent marked duplicate");
rowValues.add(formatPercentage(duplicateCount, actualCount));
rowNames.add("Percent sequences spliced");
rowValues.add(formatPercentage(totalSplicedReads, variantCallDetectionTotalReads));
rowNames.add("Percent sequences paired");
rowValues.add(formatPercentage(pairedCount, actualCount));
if(pairedCount > 0) {
rowNames.add("Percent sequences properly paired");
rowValues.add(formatPercentage(properPairCount, actualCount));
rowNames.add("Percent singletons");
rowValues.add(formatPercentage(singletonCount, actualCount));
}
rowNames.add("Percent sequences unmapped");
rowValues.add(formatPercentage(unmappedCount, actualCount));
// we do not consider here the unmapped sequences which are naturally discarded.
rowNames.add("Percent sequences without MD Tag String");
rowValues.add(formatPercentage(totalReadsWithoutMDString, variantCallDetectionTotalReads));
rowNames.add("Percent sequences without Cigar String");
rowValues.add(formatPercentage(totalReadsWithoutCigarString, variantCallDetectionTotalReads));
rowNames.add("Percent sequences with inconsistent Cigar or MD Tag Strings");
rowValues.add(formatPercentage(totalInconsistenReadsCigarMDString, variantCallDetectionTotalReads));
rowNames.add("Percent sequences discarded for variant call detection");
rowValues.add(formatPercentage(totalSkippedReads, variantCallDetectionTotalReads));
if(totalSkippedReads < variantCallDetectionTotalReads) {
rowNames.add("Percent indels");
if(totalBases > 0) {
rowValues.add(formatPercentage(totalInsertions+totalDeletions, totalBases) +
" (Ins:" + formatPercentage(totalInsertions, totalBases) +
"; Del:" + formatPercentage(totalDeletions, totalBases) + ")");
} else {
rowValues.add("NaN");
}
rowNames.add("Percent SNPs");
if(totalBases > 0) {
rowValues.add(formatPercentage(totalMutations, totalBases));
} else {
rowValues.add("NaN");
}
rowNames.add("Percent soft clips");
if(totalBases > 0) {
rowValues.add(formatPercentage(totalSoftClips, totalBases));
} else {
rowValues.add("NaN");
}
}
}
// Sequence - Count - Percentage
@Override
public int getColumnCount() {
return 2;
}
@Override
public int getRowCount() {
return rowNames.size();
}
@Override
public Object getValueAt(int rowIndex, int columnIndex) {
if(columnIndex == 0)
return rowNames.get(rowIndex);
else if(columnIndex == 1)
return rowValues.get(rowIndex);
else
return null;
}
@Override
public String getColumnName (int columnIndex) {
switch (columnIndex) {
case 0: return "Measure";
case 1: return "Value";
}
return null;
}
@Override
public Class<?> getColumnClass (int columnIndex) {
return String.class;
}
@Override
public boolean isCellEditable(int row, int column) {
return false;
}
}
private void extractVariantCallStatistics() {
// extract these results
// Variant Call Detection Summaries
if(vcd != null) {
// compute the totals
vcd.computeTotals();
totalSplicedReads = vcd.getTotalSplicedReads();
totalSkippedReads = vcd.getSkippedReads();
totalReadsWithoutMDString = vcd.getReadWithoutMDString();
totalReadsWithoutCigarString = vcd.getReadWithoutCigarString();
totalInconsistenReadsCigarMDString = vcd.getInconsistentCigarMDStrings();
variantCallDetectionTotalReads = vcd.getTotalReads();
totalSoftClips = vcd.getTotalSoftClips();
totalInsertions = vcd.getTotalInsertions();
totalDeletions = vcd.getTotalDeletions();
totalMutations = vcd.getTotalMutations();
totalBases = vcd.getTotal();
}
}
public String getFilename() {
return filename;
}
public boolean isHeaderParsed() {
return headerParsed;
}
public String getCommand() {
return command;
}
public boolean isHasAnnotation() {
return hasAnnotation;
}
public String getAnnotationFile() {
return annotationFile;
}
public long getFeatureTypeCount() {
return featureTypeCount;
}
public int getChromosomeCount() {
return chromosomeCount;
}
public long getActualCount() {
return actualCount;
}
public long getPrimaryCount() {
return primaryCount;
}
public long getPairedCount() {
return pairedCount;
}
public long getProperPairCount() {
return properPairCount;
}
public long getUnmappedCount() {
return unmappedCount;
}
public long getDuplicateCount() {
return duplicateCount;
}
public long getQcFailCount() {
return qcFailCount;
}
public long getSingletonCount() {
return singletonCount;
}
public long getTotalSplicedReads() {
return totalSplicedReads;
}
public long getTotalSkippedReads() {
return totalSkippedReads;
}
public long getVariantCallDetectionTotalReads() {
return variantCallDetectionTotalReads;
}
public long getTotalInsertions() {
return totalInsertions;
}
public long getTotalDeletions() {
return totalDeletions;
}
public long getTotalMutations() {
return totalMutations;
}
public long getTotalBases() {
return totalBases;
}
}