/* * The MIT License * * Copyright (c) 2015 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.analysis; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.SAMException; import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; import org.testng.Assert; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; /** * Created by kbergin on 3/26/15 to test GcBias MultiLevel Collector. */ public class CollectGcBiasMetricsTest extends CommandLineProgramTest { private final static String sample1 = "TestSample1"; private final static String sample2 = "TestSample2"; private final static String readGroupId1 = "TestReadGroup1"; private final static String readGroupId2 = "TestReadGroup2"; private final static String readGroupId3 = "TestReadGroup3"; private final static String platform = "ILLUMINA"; private final static String library1 = "TestLibrary1"; private final static String library2 = "TestLibrary2"; private final static String library3 = "TestLibrary3"; private final static int LENGTH = 99; private final static int NUM_READS = 100; private final static String READ_NAME = "TESTBARCODE"; private final static File TEST_DIR = new File("testdata/picard/sam/CollectGcBiasMetrics/"); private final File dict = new File(TEST_DIR, "MNOheader.dict"); private final String REFERENCE_FILE_1 = "testdata/picard/metrics/chrMNO.reference.fasta"; private final String REFERENCE_FILE_2 = "testdata/picard/metrics/chrM.reference.fasta"; File tempSamFileChrM_O; File tempSamFileAllChr; SAMRecordSetBuilder setBuilder1 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); SAMRecordSetBuilder setBuilder2 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); SAMRecordSetBuilder setBuilder3 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); SAMRecordSetBuilder setBuilder4 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); SAMReadGroupRecord readGroupRecord1 = new SAMReadGroupRecord(readGroupId1); SAMReadGroupRecord readGroupRecord2 = new SAMReadGroupRecord(readGroupId2); SAMReadGroupRecord readGroupRecord3 = new SAMReadGroupRecord(readGroupId3); ///////////////////////////////////////////////////////////////////////////// //create two Sam Files. //One with different samples, read groups and libraries that overlap for runGcBiasMultiLevelTest. Reads will align to chrM and O, not N. //Second Sam file is one sample/read group/library but has reads that align to all three chr (M,N,O). For runWindowsComparisonTest. ///////////////////////////////////////////////////////////////////////////// @BeforeTest void setupBuilder() throws IOException { tempSamFileChrM_O = File.createTempFile("CollectGcBias", ".bam", TEST_DIR); tempSamFileAllChr = File.createTempFile("CollectGcBias", ".bam", TEST_DIR); tempSamFileChrM_O.deleteOnExit(); tempSamFileAllChr.deleteOnExit(); final File tempSamFileUnsorted = File.createTempFile("CollectGcBias", ".bam", TEST_DIR); tempSamFileUnsorted.deleteOnExit(); final SAMFileHeader header = new SAMFileHeader(); try { header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(dict)); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); } catch (final SAMException e) { e.printStackTrace(); } //build different levels to put into the same bam file for testing multi level collection setupTest1(1, readGroupId1, readGroupRecord1, sample1, library1, header, setBuilder1); //Sample 1, Library 1, RG 1 setupTest1(2, readGroupId2, readGroupRecord2, sample1, library2, header, setBuilder2); //Sample 1, Library 2, RG 2 setupTest1(3, readGroupId3, readGroupRecord3, sample2, library3, header, setBuilder3); //Sample 2, Library 3, RG 3 //build one last readgroup for comparing that window count stays the same whether you use all contigs or not setupTest2(1, readGroupId1, readGroupRecord1, sample1, library1, header, setBuilder4); final List<SAMRecordSetBuilder> test1Builders = new ArrayList<SAMRecordSetBuilder>(); test1Builders.add(setBuilder1); test1Builders.add(setBuilder2); test1Builders.add(setBuilder3); final List<SAMRecordSetBuilder> test2Builders = new ArrayList<SAMRecordSetBuilder>(); test2Builders.add(setBuilder4); tempSamFileChrM_O = build(test1Builders, tempSamFileUnsorted, header); tempSamFileAllChr = build(test2Builders, tempSamFileUnsorted, header); } public String getCommandLineProgramName() { return CollectGcBiasMetrics.class.getSimpleName(); } ///////////////////////////////////////////////////////////////////////////// //This test checks the functionality of the gc bias code. Compares values from running a generated temporary Sam file through // CollectGcBiasMetrics to manually-calculated values. ///////////////////////////////////////////////////////////////////////////// @Test public void runGcBiasMultiLevelTest() throws IOException { final File outfile = File.createTempFile("test", ".gc_bias.summary_metrics"); final File detailsOutfile = File.createTempFile("test", ".gc_bias.detail_metrics"); outfile.deleteOnExit(); detailsOutfile.deleteOnExit(); runGcBias(tempSamFileChrM_O, REFERENCE_FILE_1, outfile, detailsOutfile, false); final MetricsFile<GcBiasSummaryMetrics, Comparable<?>> output = new MetricsFile<GcBiasSummaryMetrics, Comparable<?>>(); output.read(new FileReader(outfile)); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 300); Assert.assertEquals(metrics.ALIGNED_READS, 600); Assert.assertEquals(metrics.AT_DROPOUT, 21.624498); Assert.assertEquals(metrics.GC_DROPOUT, 3.525922); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.831374); Assert.assertEquals(metrics.GC_NC_40_59, 1.049672); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else if (metrics.READ_GROUP != null && metrics.READ_GROUP.equals("TestReadGroup1")) { //Library 1 Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100); Assert.assertEquals(metrics.ALIGNED_READS, 200); Assert.assertEquals(metrics.AT_DROPOUT, 23.627784); Assert.assertEquals(metrics.GC_DROPOUT, 2.582877); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.793584); Assert.assertEquals(metrics.GC_NC_40_59, 1.060382); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else if (metrics.READ_GROUP != null && metrics.READ_GROUP.equals("TestReadGroup2")) {//Library 2 Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100); Assert.assertEquals(metrics.ALIGNED_READS, 200); Assert.assertEquals(metrics.AT_DROPOUT, 23.784958); Assert.assertEquals(metrics.GC_DROPOUT, 4.025922); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.816258); Assert.assertEquals(metrics.GC_NC_40_59, 1.053956); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else if (metrics.READ_GROUP != null && metrics.READ_GROUP.equals("TestReadGroup3")) {//Library 3 Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100); Assert.assertEquals(metrics.ALIGNED_READS, 200); Assert.assertEquals(metrics.AT_DROPOUT, 21.962578); Assert.assertEquals(metrics.GC_DROPOUT, 4.559328); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.88428); Assert.assertEquals(metrics.GC_NC_40_59, 1.034676); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else if (metrics.SAMPLE != null && metrics.SAMPLE.equals("TestSample1")) {//Library 1 and 2 Assert.assertEquals(metrics.TOTAL_CLUSTERS, 200); Assert.assertEquals(metrics.ALIGNED_READS, 400); Assert.assertEquals(metrics.AT_DROPOUT, 23.194597); Assert.assertEquals(metrics.GC_DROPOUT, 3.275922); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.804921); Assert.assertEquals(metrics.GC_NC_40_59, 1.057169); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else if (metrics.SAMPLE != null && metrics.SAMPLE.equals("TestSample2")) {//Library 3 Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100); Assert.assertEquals(metrics.ALIGNED_READS, 200); Assert.assertEquals(metrics.AT_DROPOUT, 21.962578); Assert.assertEquals(metrics.GC_DROPOUT, 4.559328); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.88428); Assert.assertEquals(metrics.GC_NC_40_59, 1.034676); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else { Assert.fail("Unexpected metric: " + metrics); } } } ///////////////////////////////////////////////////////////////////////////// //Compare GcBiasDetailMetrics output file from test1 which only has reads that align to chrM and chrO, but not chrN (in the middle) to // a GcBiasDetailMetrics output file that has reads aligned to all three chromosomes in this reference file. The number of 100bp windows // found across the whole reference should be the same regardless of where records align. //This test ensures that there is not a bug in calculating the gc windows. ///////////////////////////////////////////////////////////////////////////// @Test public void runWindowsComparisonTest() throws IOException { final File outfile = File.createTempFile("test", ".gc_bias_summary_metrics"); final File allChrOutFile = File.createTempFile("testAllChr", ".gc_bias_summary_metrics"); final File detailsOutfile = File.createTempFile("test", ".gc_bias_detail_metrics"); final File allChrDetailsOutfile = File.createTempFile("testAllChrDetails", ".gc_bias_detail_metrics"); outfile.deleteOnExit(); allChrOutFile.deleteOnExit(); detailsOutfile.deleteOnExit(); allChrDetailsOutfile.deleteOnExit(); runGcBias(tempSamFileChrM_O, REFERENCE_FILE_1, outfile, detailsOutfile, false); runGcBias(tempSamFileAllChr, REFERENCE_FILE_1, allChrOutFile, allChrDetailsOutfile, false); final MetricsFile<GcBiasDetailMetrics, Comparable<?>> outputDetails = new MetricsFile<GcBiasDetailMetrics, Comparable<?>>(); outputDetails.read(new FileReader(detailsOutfile)); final List<GcBiasDetailMetrics> details = outputDetails.getMetrics(); final MetricsFile<GcBiasDetailMetrics, Comparable<?>> outputAllChrDetails = new MetricsFile<GcBiasDetailMetrics, Comparable<?>>(); outputAllChrDetails.read(new FileReader(allChrDetailsOutfile)); int i = 0; //Output for the two sam files are only the same for the "All Reads" level for (final GcBiasDetailMetrics metrics : outputAllChrDetails.getMetrics()) { if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { Assert.assertEquals(metrics.WINDOWS, details.get(i).WINDOWS); i++; } else {break;} } } ///////////////////////////////////////////////////////////////////////////// // Writes the setBuilders to a SAMFileWriter and sorts the sam. // Takes in a list of SAMRecordSetBuilders because of the multi-level collection: setBuilders cannot take in more than one read group // or library or sample, so there are separate ones for each type when testing multi-level collection. ///////////////////////////////////////////////////////////////////////////// public File build (final List<SAMRecordSetBuilder> setBuilder, final File unsortedSam, final SAMFileHeader header) throws IOException { final File sortedSam = File.createTempFile("CollectGcBias", ".bam", TEST_DIR); sortedSam.deleteOnExit(); final SAMFileWriter writer = new SAMFileWriterFactory() .setCreateIndex(true).makeBAMWriter(header, false, unsortedSam); for (final SAMRecordSetBuilder subSetBuilder : setBuilder) { for (final SAMRecord record : subSetBuilder) { writer.addAlignment(record); } } writer.close(); final SortSam sorter = new SortSam(); final String[] args = new String[] { "INPUT=" + unsortedSam.getAbsolutePath(), "OUTPUT=" + sortedSam.getAbsolutePath(), "SORT_ORDER=coordinate" }; sorter.instanceMain(args); return sortedSam; } ///////////////////////////////////////////////////////////////////////////// // Runs CollectGcBias with input Sam file and outputs details and summary files for truth assertion. ///////////////////////////////////////////////////////////////////////////// public void runGcBias (final File input, final String referenceFile, final File summaryOutfile, final File detailsOutfile, final boolean nonDups) throws IOException { final File pdf = File.createTempFile("test", ".pdf"); pdf.deleteOnExit(); final int windowSize = 100; final double minGenFraction = 1.0E-5; final boolean biSulfiteSeq = false; final boolean assumeSorted = false; final String[] args = new String[]{ "INPUT=" + input.getAbsolutePath(), "OUTPUT=" + detailsOutfile.getAbsolutePath(), "REFERENCE_SEQUENCE=" + referenceFile, "SUMMARY_OUTPUT=" + summaryOutfile.getAbsolutePath(), "CHART_OUTPUT=" + pdf.getAbsolutePath(), "SCAN_WINDOW_SIZE=" + windowSize, "MINIMUM_GENOME_FRACTION=" + minGenFraction, "IS_BISULFITE_SEQUENCED=" + biSulfiteSeq, "LEVEL=ALL_READS", "LEVEL=SAMPLE", "LEVEL=READ_GROUP", "ASSUME_SORTED=" + assumeSorted, "ALSO_IGNORE_DUPLICATES=" + nonDups }; runPicardCommandLine(args); } /** * Compares metric's results by summary files without duplicates. * @throws IOException */ @Test public void runNonDupsComparisonTest() throws IOException { final File inputFileWithDuplicates = new File("testdata/picard/metrics/chrMReads.sam"); final File detailsOutfile = File.createTempFile("test", ".gc_bias_detail_metrics"); final File summaryOutfile = File.createTempFile("test", ".gc_bias_summary_metrics"); detailsOutfile.deleteOnExit(); summaryOutfile.deleteOnExit(); runGcBias(inputFileWithDuplicates, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, true); final MetricsFile<GcBiasSummaryMetrics, Comparable<?>> outputSummary = new MetricsFile<>(); outputSummary.read(new FileReader(summaryOutfile)); for (final GcBiasSummaryMetrics summary : outputSummary.getMetrics()) { if (summary.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS) && summary.READS_USED.equals(READS_USED_UNIQUE)) { //ALL_READS level for case without duplicates Assert.assertEquals(summary.TOTAL_CLUSTERS, 3); Assert.assertEquals(summary.ALIGNED_READS, 3); Assert.assertEquals(summary.AT_DROPOUT, 79.180328); Assert.assertEquals(summary.GC_DROPOUT, 12.28901); Assert.assertEquals(summary.GC_NC_0_19, 0.0); Assert.assertEquals(summary.GC_NC_20_39, 0.0); Assert.assertEquals(summary.GC_NC_40_59, 1.246783); Assert.assertEquals(summary.GC_NC_60_79, 0.0); Assert.assertEquals(summary.GC_NC_80_100, 0.0); } if (summary.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS) && summary.READS_USED.equals(READS_USED_ALL)) { //ALL_READS level Assert.assertEquals(summary.TOTAL_CLUSTERS, 5); Assert.assertEquals(summary.ALIGNED_READS, 5); Assert.assertEquals(summary.AT_DROPOUT, 79.180328); Assert.assertEquals(summary.GC_DROPOUT, 10.37037); Assert.assertEquals(summary.GC_NC_0_19, 0.0); Assert.assertEquals(summary.GC_NC_20_39, 0.0); Assert.assertEquals(summary.GC_NC_40_59, 1.246783); Assert.assertEquals(summary.GC_NC_60_79, 0.0); Assert.assertEquals(summary.GC_NC_80_100, 0.0); } } } /** * If SAM/BAM file with '*' in SEQ field omit this read. */ @Test public void runCheckingNoSEQTest() throws IOException { final File input = new File("testdata/picard/metrics/chrM_NO_SEQ.sam"); final File summaryOutfile = File.createTempFile("test", ".gc_bias.summary_metrics"); final File detailsOutfile = File.createTempFile("test", ".gc_bias.detail_metrics"); summaryOutfile.deleteOnExit(); detailsOutfile.deleteOnExit(); runGcBias(input, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, false); final MetricsFile<GcBiasSummaryMetrics, Comparable<?>> output = new MetricsFile<>(); output.read(new FileReader(summaryOutfile)); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 0); Assert.assertEquals(metrics.ALIGNED_READS, 1); Assert.assertEquals(metrics.AT_DROPOUT, 78.682453); Assert.assertEquals(metrics.GC_DROPOUT, 14.693382); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 0.0); Assert.assertEquals(metrics.GC_NC_40_59, 1.246783); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } } } ///////////////////////////////////////////////////////////////////////////// //Used to generate the Sam Record Sets with SamRecordSetBuilder.addPair(). //testNumber 1: runGcBiasMultiLevelTest, generates records aligning to chrM and chrO //testNumber 2: runWindowsComparisonTest, generates records aligning to chrM,N,O. ///////////////////////////////////////////////////////////////////////////// public void setupTest1(final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample, final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder) throws IOException { final String separator = ":"; final int contig1 = 0; final int contig2 = 1; readGroupRecord.setSample(sample); readGroupRecord.setPlatform(platform); readGroupRecord.setLibrary(library); readGroupRecord.setPlatformUnit(readGroupId); header.addReadGroup(readGroupRecord); setBuilder.setReadGroup(readGroupRecord); setBuilder.setUseNmFlag(true); setBuilder.setHeader(header); final int max = 800; final int min = 1; final Random rg = new Random(5); //add records that align to chrM and O but not N for (int i = 0; i < NUM_READS; i++) { final int start = rg.nextInt(max) + min; final String newReadName = READ_NAME + separator + ID + separator + i; if (i != NUM_READS - 1) { setBuilder.addPair(newReadName, contig1, start + ID, start + ID + LENGTH); } else { setBuilder.addPair(newReadName, contig2, start + ID, start + ID + LENGTH); } } } public void setupTest2(final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample, final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder) throws IOException { final String separator = ":"; final int contig1 = 0; final int contig2 = 1; final int contig3 = 2; readGroupRecord.setSample(sample); readGroupRecord.setPlatform(platform); readGroupRecord.setLibrary(library); readGroupRecord.setPlatformUnit(readGroupId); setBuilder.setReadGroup(readGroupRecord); setBuilder.setUseNmFlag(true); setBuilder.setHeader(header); final int max = 800; final int min = 1; final Random rg = new Random(5); //add records that align to all 3 chr in reference file for (int i = 0; i < NUM_READS; i++) { final int start = rg.nextInt(max) + min; final String newReadName = READ_NAME + separator + ID + separator + i; if (i<=NUM_READS/3) { setBuilder.addPair(newReadName, contig1, start + ID, start + ID + LENGTH); } else if (i< (NUM_READS - (NUM_READS/3))) { setBuilder.addPair(newReadName, contig2, start + ID, start + ID + LENGTH); } else { setBuilder.addPair(newReadName, contig3, start + ID, start + ID + LENGTH); } } } }