/* * The MIT License * * Copyright (c) 2014 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.sam.markduplicates; import htsjdk.samtools.SAMRecordSetBuilder; import picard.sam.DuplicationMetrics; import picard.cmdline.CommandLineProgram; import htsjdk.samtools.metrics.MetricsFile; import picard.sam.testers.SamFileTester; import htsjdk.samtools.util.FormatUtil; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.TestUtil; import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy; import org.testng.Assert; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; /** * This class is an extension of SamFileTester used to test AbstractMarkDuplicatesCommandLineProgram's with SAM files generated on the fly. * This performs the underlying tests defined by classes such as AbstractMarkDuplicatesCommandLineProgramTest. */ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends SamFileTester { final private File metricsFile; final DuplicationMetrics expectedMetrics; public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy) { super(50, true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, duplicateScoringStrategy); expectedMetrics = new DuplicationMetrics(); expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES = 0; metricsFile = new File(getOutputDir(), "metrics.txt"); addArg("METRICS_FILE=" + metricsFile); addArg("DUPLICATE_SCORING_STRATEGY=" + duplicateScoringStrategy.name()); } public AbstractMarkDuplicatesCommandLineProgramTester() { this(SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY); } @Override public String getCommandLineProgramName() { return getProgram().getClass().getSimpleName(); } /** * Fill in expected duplication metrics directly from the input records given to this tester */ private void updateExpectedDuplicationMetrics() { final FormatUtil formatter = new FormatUtil(); final CloseableIterator<SAMRecord> inputRecordIterator = this.getRecordIterator(); while (inputRecordIterator.hasNext()) { final SAMRecord record = inputRecordIterator.next(); if (!record.isSecondaryOrSupplementary()) { final String key = samRecordToDuplicatesFlagsKey(record); if (!this.duplicateFlags.containsKey(key)) { System.err.println("DOES NOT CONTAIN KEY: " + key); } final boolean isDuplicate = this.duplicateFlags.get(key); // First bring the simple metricsFile up to date if (record.getReadUnmappedFlag()) { ++expectedMetrics.UNMAPPED_READS; } else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) { ++expectedMetrics.UNPAIRED_READS_EXAMINED; if (isDuplicate) ++expectedMetrics.UNPAIRED_READ_DUPLICATES; } else { ++expectedMetrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end if (isDuplicate) ++expectedMetrics.READ_PAIR_DUPLICATES; // will need to be divided by 2 at the end } } } expectedMetrics.READ_PAIR_DUPLICATES = expectedMetrics.READ_PAIR_DUPLICATES / 2; expectedMetrics.READ_PAIRS_EXAMINED = expectedMetrics.READ_PAIRS_EXAMINED / 2; expectedMetrics.calculateDerivedMetrics(); // Have to run this Double value through the same format/parsing operations as during a file write/read expectedMetrics.PERCENT_DUPLICATION = formatter.parseDouble(formatter.format(expectedMetrics.PERCENT_DUPLICATION)); } public void setExpectedOpticalDuplicate(final int expectedOpticalDuplicatePairs) { expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES = expectedOpticalDuplicatePairs; } @Override public void test() { try { updateExpectedDuplicationMetrics(); // Read the output and check the duplicate flag int outputRecords = 0; final SAMFileReader reader = new SAMFileReader(getOutput()); for (final SAMRecord record : reader) { outputRecords++; final String key = samRecordToDuplicatesFlagsKey(record); if (!this.duplicateFlags.containsKey(key)) { System.err.println("DOES NOT CONTAIN KEY: " + key); } Assert.assertTrue(this.duplicateFlags.containsKey(key)); final boolean value = this.duplicateFlags.get(key); this.duplicateFlags.remove(key); if (value != record.getDuplicateReadFlag()) { System.err.println("Mismatching read:"); System.err.print(record.getSAMString()); } Assert.assertEquals(record.getDuplicateReadFlag(), value); } reader.close(); // Ensure the program output the same number of records as were read in Assert.assertEquals(outputRecords, this.getNumberOfRecords(), ("saw " + outputRecords + " output records, vs. " + this.getNumberOfRecords() + " input records")); // Check the values written to metrics.txt against our input expectations final MetricsFile<DuplicationMetrics, Comparable<?>> metricsOutput = new MetricsFile<DuplicationMetrics, Comparable<?>>(); try{ metricsOutput.read(new FileReader(metricsFile)); } catch (final FileNotFoundException ex) { System.err.println("Metrics file not found: " + ex); } // NB: Test writes an initial metrics line with a null entry for LIBRARY and 0 values for all metrics. Why? final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(metricsOutput.getMetrics().size() - 1); Assert.assertEquals(observedMetrics.UNPAIRED_READS_EXAMINED, expectedMetrics.UNPAIRED_READS_EXAMINED, "UNPAIRED_READS_EXAMINED does not match expected"); Assert.assertEquals(observedMetrics.READ_PAIRS_EXAMINED, expectedMetrics.READ_PAIRS_EXAMINED, "READ_PAIRS_EXAMINED does not match expected"); Assert.assertEquals(observedMetrics.UNMAPPED_READS, expectedMetrics.UNMAPPED_READS, "UNMAPPED_READS does not match expected"); Assert.assertEquals(observedMetrics.UNPAIRED_READ_DUPLICATES, expectedMetrics.UNPAIRED_READ_DUPLICATES, "UNPAIRED_READ_DUPLICATES does not match expected"); Assert.assertEquals(observedMetrics.READ_PAIR_DUPLICATES, expectedMetrics.READ_PAIR_DUPLICATES, "READ_PAIR_DUPLICATES does not match expected"); Assert.assertEquals(observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match expected"); Assert.assertEquals(observedMetrics.PERCENT_DUPLICATION, expectedMetrics.PERCENT_DUPLICATION, "PERCENT_DUPLICATION does not match expected"); Assert.assertEquals(observedMetrics.ESTIMATED_LIBRARY_SIZE, expectedMetrics.ESTIMATED_LIBRARY_SIZE, "ESTIMATED_LIBRARY_SIZE does not match expected"); } finally { TestUtil.recursiveDelete(getOutputDir()); } } abstract protected CommandLineProgram getProgram(); }