/* * Changelog: * - Piero Dalle Pezze: Added plot and reports. * - Bart Ailey: Class creation. */ package uk.ac.babraham.BamQC.Modules; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.swing.JPanel; import javax.xml.stream.XMLStreamException; import org.apache.log4j.Logger; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import uk.ac.babraham.BamQC.DataTypes.Genome.AnnotationSet; import uk.ac.babraham.BamQC.Graphs.BarGraph; import uk.ac.babraham.BamQC.Report.HTMLReportArchive; import uk.ac.babraham.BamQC.Sequence.SequenceFile; import uk.ac.babraham.BamQC.Utilities.CalculateDistribution; /** * @author Bart Ailey * @author Piero Dalle Pezze * */ public class RpkmReference extends AbstractQCModule { public final static int BIN_SIZE = ModuleConfig.getParam("RpkmReference_bin_size", "ignore").intValue(); private static final int MEGABASE = 1000000; private static final int KILOBASE = 1000; private static Logger log = Logger.getLogger(RpkmReference.class); private boolean raiseError = false; private boolean raiseWarning = false; private int binNumber = 0; private double[] coverage; private double[] distributionDouble = null; private double aboveMaxThreshold = ModuleConfig.getParam("RpkmReference_max_size", "ignore").intValue(); private double [] graphCounts = null; private String [] xCategories = null; private ArrayList<Long> sequenceStarts = new ArrayList<Long>(); private long binNucleotides = MEGABASE; private int readNumber = 0; private int errorReads = 0; private double maxCoverage; private boolean isBinNucleotidesSet = false; public RpkmReference() { // TODO Auto-generated constructor stub } @Override public void reset() { raiseError = false; raiseWarning = false; readNumber = 0; errorReads = 0; } private void setBinNumber(SAMSequenceDictionary samSequenceDictionary) { List<SAMSequenceRecord> samSequenceRecords = samSequenceDictionary.getSequences(); long totalNucleotideNumber = 0; for (SAMSequenceRecord samSequenceRecord : samSequenceRecords) { sequenceStarts.add(totalNucleotideNumber); totalNucleotideNumber += samSequenceRecord.getSequenceLength(); log.debug(String.format("%s sequence length = %d, total = %d", samSequenceRecord.getSequenceName(), samSequenceRecord.getSequenceLength(), totalNucleotideNumber)); } binNumber = (int) (totalNucleotideNumber / MEGABASE); binNumber++; coverage = new double[binNumber]; log.debug(String.format("%d / %d = %d", totalNucleotideNumber, binNumber, binNucleotides)); isBinNucleotidesSet = true; } private void recordCoverage(long alignmentStartAbsolute, long alignmentEndAbsolute) { int startIndex = (int) (alignmentStartAbsolute / binNucleotides); int endIndex = (int) (alignmentEndAbsolute / binNucleotides); int index = startIndex; log.debug(String.format("startIndex %d endIndex %d", startIndex, endIndex)); while (index <= endIndex) { long binStart = index * binNucleotides; long binEnd = (index + 1) * binNucleotides; long start = alignmentStartAbsolute > binStart ? alignmentStartAbsolute : binStart; long end = alignmentEndAbsolute > binEnd ? binEnd : alignmentEndAbsolute; double length = end - start; log.debug(String.format("binStart %d binEnd %d, start = %d, end %d, length = %d", binStart, binEnd, start, end, (end - start))); log.debug("index = " + index); double binCoverage = length / binNucleotides; coverage[index] += binCoverage; if (coverage[index] > maxCoverage) maxCoverage = coverage[index]; log.debug(String.format("Start %d - End %d, length %d, index %d, binCoverage %f, ", alignmentStartAbsolute, alignmentEndAbsolute, (end - start), index, binCoverage, coverage[index])); if (binCoverage < 0.0) throw new RuntimeException("negative binCoverage"); index++; } } @Override public void processSequence(SAMRecord read) { SAMFileHeader header = read.getHeader(); SAMSequenceDictionary samSequenceDictionary = header.getSequenceDictionary(); int referenceIndex = read.getReferenceIndex(); long alignmentStart = read.getAlignmentStart(); long alignmentEnd = read.getAlignmentEnd(); log.debug("header = " + header); log.debug("referenceIndex = " + referenceIndex); readNumber++; if (referenceIndex > -1) { if (!isBinNucleotidesSet) { setBinNumber(samSequenceDictionary); } if (alignmentEnd > alignmentStart) { long referenceStart = sequenceStarts.get(referenceIndex); long alignmentStartAbsolute = alignmentStart + referenceStart; long alignmentEndAbsolute = alignmentEnd + referenceStart; recordCoverage(alignmentStartAbsolute, alignmentEndAbsolute); } else { errorReads++; } } } public double[] getCoverage() { return coverage; } @Override public void processFile(SequenceFile file) { } @Override public void processAnnotationSet(AnnotationSet annotation) { } @Override public JPanel getResultsPanel() { CalculateDistribution cd = new CalculateDistribution(coverage, aboveMaxThreshold, BIN_SIZE); graphCounts = cd.getGraphCounts(); xCategories = cd.getXCategories(); //double max = cd.getMax(); distributionDouble = cd.getDistributionDouble(); String title = String.format("Reads per kB per MB"); String xLabel = "Bases bp"; String yLabel = "Reads"; double min=Double.MAX_VALUE, max=Double.MIN_VALUE; for(int i=0; i<graphCounts.length; i++) { if(min>graphCounts[i]) min = graphCounts[i]; else if(max<graphCounts[i]) max = graphCounts[i]; } return new BarGraph(graphCounts, 0.0, max, xLabel, yLabel, xCategories, title); // int[] xCategories = new int[coverage.length]; // for(int i=0; i<coverage.length; i++) { // if(min>coverage[i]) // min = coverage[i]; // else if(max<coverage[i]) // max = coverage[i]; // xCategories[i] = i; // } // return new BarGraph(coverage, min, max, xLabel, yLabel, xCategories, title); } @Override public String name() { return "Reads per kB per MB"; } @Override public String description() { return "Reads per kilobase per megabase"; } @Override public boolean raisesError() { return raiseError; } @Override public boolean raisesWarning() { return raiseWarning; } @Override public boolean needsToSeeSequences() { return true; } @Override public boolean needsToSeeAnnotation() { return false; } @Override public boolean ignoreInReport() { if(ModuleConfig.getParam("RpkmReference", "ignore") > 0 || coverage == null || coverage.length==0) return true; return false; } @Override public void makeReport(HTMLReportArchive report) throws XMLStreamException, IOException { // TODO Auto-generated method stub } }