/* * The MIT License * * Copyright (c) 2011 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.Histogram; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.OverlapDetector; import picard.PicardException; import picard.analysis.directed.RnaSeqMetricsCollector; import picard.annotation.Gene; import picard.annotation.GeneAnnotationReader; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.programgroups.Metrics; import picard.util.RExecutor; import java.io.File; import java.util.HashSet; import java.util.List; import java.util.Set; @CommandLineProgramProperties( usage = "Collect metrics about the alignment of RNA to various functional classes of loci in the genome:" + "coding, intronic, UTR, intergenic, ribosomal. Also determines strand-specificity for strand-specific libraries.", usageShort = "Produces RNA alignment metrics for a SAM or BAM file", programGroup = Metrics.class ) public class CollectRnaSeqMetrics extends SinglePassSamProgram { private static final Log LOG = Log.getInstance(CollectRnaSeqMetrics.class); @Option(doc="Gene annotations in refFlat form. Format described here: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat") public File REF_FLAT; @Option(doc="Location of rRNA sequences in genome, in interval_list format. " + "If not specified no bases will be identified as being ribosomal. " + "Format described here: http://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/util/IntervalList.html", optional = true) public File RIBOSOMAL_INTERVALS; @Option(shortName = "STRAND", doc="For strand-specific library prep. " + "For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand.") public RnaSeqMetricsCollector.StrandSpecificity STRAND_SPECIFICITY; @Option(doc="When calculating coverage based values (e.g. CV of coverage) only use transcripts of this length or greater.") public int MINIMUM_LENGTH = 500; @Option(doc="The PDF file to write out a plot of normalized position vs. coverage.", shortName="CHART", optional = true) public File CHART_OUTPUT; @Option(doc="If a read maps to a sequence specified with this option, all the bases in the read are counted as ignored bases. " + "These reads are not counted as ") public Set<String> IGNORE_SEQUENCE = new HashSet<String>(); @Option(doc="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair by this must in order to be considered rRNA.") public double RRNA_FRAGMENT_PERCENTAGE = 0.8; @Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics. ") private Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS); private RnaSeqMetricsCollector collector; /** * A subtitle for the plot, usually corresponding to a library. */ private String plotSubtitle = ""; /** Required main method implementation. */ public static void main(final String[] argv) { new CollectRnaSeqMetrics().instanceMainWithExit(argv); } @Override protected void setup(final SAMFileHeader header, final File samFile) { if (CHART_OUTPUT != null) IOUtil.assertFileIsWritable(CHART_OUTPUT); final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadRefFlat(REF_FLAT, header.getSequenceDictionary()); LOG.info("Loaded " + geneOverlapDetector.getAll().size() + " genes."); final Long ribosomalBasesInitialValue = RIBOSOMAL_INTERVALS != null ? 0L : null; final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = RnaSeqMetricsCollector.makeOverlapDetector(samFile, header, RIBOSOMAL_INTERVALS); final HashSet<Integer> ignoredSequenceIndices = RnaSeqMetricsCollector.makeIgnoredSequenceIndicesSet(header, IGNORE_SEQUENCE); collector = new RnaSeqMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), ribosomalBasesInitialValue, geneOverlapDetector, ribosomalSequenceOverlapDetector, ignoredSequenceIndices, MINIMUM_LENGTH, STRAND_SPECIFICITY, RRNA_FRAGMENT_PERCENTAGE, true); // If we're working with a single library, assign that library's name as a suffix to the plot title final List<SAMReadGroupRecord> readGroups = header.getReadGroups(); if (readGroups.size() == 1) { this.plotSubtitle = readGroups.get(0).getLibrary(); if (null == this.plotSubtitle) this.plotSubtitle = ""; } } @Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence refSeq) { collector.acceptRecord(rec, refSeq); } @Override protected void finish() { collector.finish(); final MetricsFile<RnaSeqMetrics, Integer> file = getMetricsFile(); collector.addAllLevelsToFile(file); file.write(OUTPUT); boolean atLeastOneHistogram = false; for (Histogram<Integer> histo : file.getAllHistograms()) { atLeastOneHistogram = atLeastOneHistogram || !histo.isEmpty(); } // Generate the coverage by position plot if (CHART_OUTPUT != null && atLeastOneHistogram) { final int rResult = RExecutor.executeFromClasspath("picard/analysis/rnaSeqCoverage.R", OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), this.plotSubtitle); if (rResult != 0) { throw new PicardException("Problem invoking R to generate plot."); } } } }