package picard.analysis; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.SAMTextHeaderCodec; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.BufferedLineReader; import org.testng.Assert; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.Random; /** * Tests the two default "programs" that have tests in CollectMultipleMetrics * * * @author Yossi farjoun */ public class CollectMultipleMetricsTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/sam"); public String getCommandLineProgramName() { return CollectMultipleMetrics.class.getSimpleName(); } @Test public void testAlignmentSummaryViaMultipleMetrics() throws IOException { final File input = new File(TEST_DATA_DIR, "summary_alignment_stats_test.sam"); final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta"); final File outfile = File.createTempFile("alignmentMetrics", ""); outfile.deleteOnExit(); final String[] args = new String[] { "INPUT=" + input.getAbsolutePath(), "OUTPUT=" + outfile.getAbsolutePath(), "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(), "METRIC_ACCUMULATION_LEVEL="+MetricAccumulationLevel.ALL_READS.name(), "PROGRAM=null", "PROGRAM="+CollectMultipleMetrics.Program.CollectAlignmentSummaryMetrics.name(), "PROGRAM="+CollectMultipleMetrics.Program.CollectInsertSizeMetrics.name() }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> output = new MetricsFile<AlignmentSummaryMetrics, Comparable<?>>(); output.read(new FileReader(outfile + ".alignment_summary_metrics")); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { Assert.assertEquals(metrics.MEAN_READ_LENGTH, 101.0); switch (metrics.CATEGORY) { case FIRST_OF_PAIR: Assert.assertEquals(metrics.TOTAL_READS, 9); Assert.assertEquals(metrics.PF_READS, 7); Assert.assertEquals(metrics.PF_NOISE_READS, 1); Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 3); Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 59); Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 19.0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 303); Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*58D/303D*/0.191419); Assert.assertEquals(metrics.BAD_CYCLES, 19); break; case SECOND_OF_PAIR: Assert.assertEquals(metrics.TOTAL_READS, 9); Assert.assertEquals(metrics.PF_READS, 9); Assert.assertEquals(metrics.PF_NOISE_READS, 1); Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 7); Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 239); Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 707); Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*19D/707D*/0.026874); Assert.assertEquals(metrics.BAD_CYCLES, 3); break; case PAIR: Assert.assertEquals(metrics.TOTAL_READS, 18); Assert.assertEquals(metrics.PF_READS, 16); Assert.assertEquals(metrics.PF_NOISE_READS, 2); Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 10); Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 298); Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 1010); Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*77D/1010D*/0.076238); Assert.assertEquals(metrics.BAD_CYCLES, 22); break; case UNPAIRED: default: Assert.fail("Data does not contain this category: " + metrics.CATEGORY); } } } @Test public void testInsertSize() throws IOException { final File input = new File(TEST_DATA_DIR, "insert_size_metrics_test.sam"); final File outfile = File.createTempFile("test", ""); final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta"); final File pdf = File.createTempFile("test", ".pdf"); outfile.deleteOnExit(); pdf.deleteOnExit(); final String[] args = new String[] { "INPUT=" + input.getAbsolutePath(), "OUTPUT=" + outfile.getAbsolutePath(), "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(), "METRIC_ACCUMULATION_LEVEL="+MetricAccumulationLevel.ALL_READS.name(), "PROGRAM=null", "PROGRAM="+CollectMultipleMetrics.Program.CollectAlignmentSummaryMetrics.name(), "PROGRAM="+CollectMultipleMetrics.Program.CollectInsertSizeMetrics.name() }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<InsertSizeMetrics, Comparable<?>>(); output.read(new FileReader(outfile + ".insert_size_metrics")); for (final InsertSizeMetrics metrics : output.getMetrics()) { Assert.assertEquals(metrics.PAIR_ORIENTATION.name(), "FR"); if (metrics.LIBRARY==null) { // SAMPLE or ALL_READS level Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 41); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 45); Assert.assertEquals(metrics.READ_PAIRS, 13); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 7); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 7); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 7); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 9); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 11); } else if (metrics.LIBRARY.equals("Solexa-41753")) { // one LIBRARY and one READ_GROUP Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 44); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 44); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 44); Assert.assertEquals(metrics.READ_PAIRS, 2); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 1); } else if (metrics.LIBRARY.equals("Solexa-41748") && metrics.READ_GROUP == null) { Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 40); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 45); Assert.assertEquals(metrics.READ_PAIRS, 9); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 3); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 3); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 3); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 5); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 5); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 9); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 9); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 11); } else if (metrics.LIBRARY.equals("Solexa-41734") && metrics.READ_GROUP == null) { Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 26); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 41); Assert.assertEquals(metrics.READ_PAIRS, 9); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 11); } else if (metrics.READ_GROUP.equals("62A79AAXX100907.7")) { Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 41); Assert.assertEquals(metrics.READ_PAIRS, 4); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 5); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 5); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 11); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 11); } else if (metrics.READ_GROUP.equals("62A79AAXX100907.6")) { Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 41); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 38); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 45); Assert.assertEquals(metrics.READ_PAIRS, 5); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 3); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 3); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 7); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 7); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 9); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 9); } else if (metrics.READ_GROUP.equals("62A79AAXX100907.5")) { Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 41); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 41); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 41); Assert.assertEquals(metrics.READ_PAIRS, 1); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 1); } else if (metrics.READ_GROUP.equals("62A79AAXX100907.3")) { Assert.assertEquals((int)metrics.MEDIAN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MIN_INSERT_SIZE, 36); Assert.assertEquals(metrics.MAX_INSERT_SIZE, 36); Assert.assertEquals(metrics.READ_PAIRS, 1); Assert.assertEquals(metrics.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_20_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_30_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_40_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_50_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_60_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_70_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_80_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_90_PERCENT, 1); Assert.assertEquals(metrics.WIDTH_OF_99_PERCENT, 1); } else { Assert.fail("Unexpected metric: " + metrics); } } } @Test //test all gcBias collection levels public void testGcBiasMetrics() throws IOException{ runGcTest(tempSamFile); } public void runGcTest(final File input) throws IOException { final File outfile = File.createTempFile("test", ""); final String referenceFile = "testdata/picard/quality/chrM.reference.fasta"; outfile.deleteOnExit(); final String[] args = new String[]{ "INPUT=" + input.getAbsolutePath(), "OUTPUT=" + outfile.getAbsolutePath(), "REFERENCE_SEQUENCE=" + referenceFile, "METRIC_ACCUMULATION_LEVEL="+MetricAccumulationLevel.ALL_READS.name(), "PROGRAM=null", "PROGRAM="+CollectMultipleMetrics.Program.CollectAlignmentSummaryMetrics.name(), "PROGRAM="+CollectMultipleMetrics.Program.CollectInsertSizeMetrics.name(), "PROGRAM="+CollectMultipleMetrics.Program.CollectGcBiasMetrics.name() }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<GcBiasSummaryMetrics, Comparable<?>> output = new MetricsFile<GcBiasSummaryMetrics, Comparable<?>>(); output.read(new FileReader(outfile + ".gc_bias.summary_metrics")); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 300); Assert.assertEquals(metrics.ALIGNED_READS, 600); Assert.assertEquals(metrics.AT_DROPOUT, 7.234062); Assert.assertEquals(metrics.GC_DROPOUT, 4.086217); Assert.assertEquals(metrics.GC_NC_0_19, 0.0); Assert.assertEquals(metrics.GC_NC_20_39, 1.06826); Assert.assertEquals(metrics.GC_NC_40_59, 0.987036); Assert.assertEquals(metrics.GC_NC_60_79, 0.0); Assert.assertEquals(metrics.GC_NC_80_100, 0.0); } else { Assert.fail("Unexpected metric: " + metrics); } } } //gcBias multi level collector test creates a sam file from chrM for testing purposes //more variables needed for gcbias test to create temp sam file private final static String sample1 = "TestSample1"; private final static String sample2 = "TestSample2"; private final static String readGroupId1 = "TestReadGroup1"; private final static String readGroupId2 = "TestReadGroup2"; private final static String readGroupId3 = "TestReadGroup3"; private final static String platform = "ILLUMINA"; private final static String library1 = "TestLibrary1"; private final static String library2 = "TestLibrary2"; private final static String library3 = "TestLibrary3"; private final static File TEST_DIR = new File("testdata/picard/sam/CollectGcBiasMetrics/"); private final File dict = new File("testdata/picard/quality/chrM.reference.dict"); private File tempSamFile; private final SAMRecordSetBuilder setBuilder1 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); private final SAMRecordSetBuilder setBuilder2 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); private final SAMRecordSetBuilder setBuilder3 = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); private final SAMReadGroupRecord readGroupRecord1 = new SAMReadGroupRecord(readGroupId1); private final SAMReadGroupRecord readGroupRecord2 = new SAMReadGroupRecord(readGroupId2); private final SAMReadGroupRecord readGroupRecord3 = new SAMReadGroupRecord(readGroupId3); //create a samfile with different samples, read groups and libraries that overlap for testing. @BeforeTest void setupBuilder() throws IOException { final int numReads = 100; final String flowCellBarcode = "TESTBARCODE"; tempSamFile = File.createTempFile("CollectGcBias", ".bam", TEST_DIR); final File tempSamFileUnsorted = File.createTempFile("CollectGcBias", ".bam", TEST_DIR); tempSamFileUnsorted.deleteOnExit(); tempSamFile.deleteOnExit(); BufferedLineReader bufferedLineReader = null; try { bufferedLineReader = new BufferedLineReader(new FileInputStream(dict)); } catch (FileNotFoundException e) { e.printStackTrace(); } final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); final SAMFileHeader header = codec.decode(bufferedLineReader, dict.toString()); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); //build different levels to put into the same bam file for testing multi level collection setup(numReads, flowCellBarcode, 1, readGroupId1, readGroupRecord1, sample1, library1, header, setBuilder1); //Sample 1, Library 1, RG 1 setup(numReads, flowCellBarcode, 2, readGroupId2, readGroupRecord2, sample1, library2, header, setBuilder2); //Sample 1, Library 2, RG 2 setup(numReads, flowCellBarcode, 3, readGroupId3, readGroupRecord3, sample2, library3, header, setBuilder3); //Sample 2, Library 3, RG 3 final SAMFileWriter writer = new SAMFileWriterFactory() .setCreateIndex(true).makeBAMWriter(header, false, tempSamFileUnsorted); for (final SAMRecord record : setBuilder1) { writer.addAlignment(record); } for (final SAMRecord record : setBuilder2) { writer.addAlignment(record); } for (final SAMRecord record : setBuilder3) { writer.addAlignment(record); } writer.close(); //sort the temp file final SortSam sorter = new SortSam(); final String[] args = new String[]{"INPUT=" + tempSamFileUnsorted.getAbsolutePath(), "OUTPUT=" + tempSamFile.getAbsolutePath(), "SORT_ORDER=coordinate"}; sorter.instanceMain(args); } void setup(final int numReads, final String readName, final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample, final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder) throws IOException { final String separator = ":"; readGroupRecord.setSample(sample); readGroupRecord.setPlatform(platform); readGroupRecord.setLibrary(library); readGroupRecord.setPlatformUnit(readGroupId); header.addReadGroup(readGroupRecord); setBuilder.setReadGroup(readGroupRecord); setBuilder.setUseNmFlag(true); setBuilder.setHeader(header); final int max = 15000; final int min = 1; final Random rg = new Random(5); for (int i = 0; i < numReads; i++) { final int start = rg.nextInt(max) + min; final String newReadName = readName + separator + ID + separator + i; setBuilder.addPair(newReadName, 0, start+ID, start+ID+99); } } }