/* * The MIT License * * Copyright (c) 2015 Nils Homer * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.sam.markduplicates; import htsjdk.samtools.metrics.MetricsFile; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.DuplicationMetrics; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class EstimateLibraryComplexityTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/sam/EstimateLibraryComplexity"); @DataProvider(name = "testSimpleDuplicate") public Object[][] createDataTestSimpleDuplicate() { return new Object[][]{ {"dupes.sam", 2, 2}, {"big_dupes.sam", 12, 500} }; } @DataProvider(name = "testMaxDiffRate") public Object[][] createDataTestMaxDiffRate() { return new Object[][]{ {"dupes.sam", 0, 2}, {"big_dupes.sam", 8, 500} }; } @DataProvider(name = "testSimpleDuplicateWithMaxReadLength") public Object[][] createDataTestSimpleDuplicateWithMaxReadLength() { return new Object[][]{ {"dupes.sam", 2, 2}, {"big_dupes.sam", 512, 500} }; } @DataProvider(name = "testDefaultMinGroupCount") public Object[][] createDataTestDefaultMinGroupCount() { return new Object[][]{ {"dupes.sam", 0, 0}, {"big_dupes.sam", 8, 497} }; } @DataProvider(name = "testSimpleDuplicatesWithSecondaryAndSupplementaryRecords") public Object[][] createDataTestSimpleDuplicatesWithSecondaryAndSupplementaryRecords() { return new Object[][]{ {"dupes_with_sos.sam", 2, 2}, {"big_dupes_with_sos.sam", 12, 500} }; } public String getCommandLineProgramName() { return EstimateLibraryComplexity.class.getSimpleName(); } private void examineMetricsFile(final File output, final int numDuplicates, final int numReadPairsExamined) { final List<DuplicationMetrics> metricsList = MetricsFile.readBeans(output); Assert.assertEquals(metricsList.size(), 1); final DuplicationMetrics metrics = metricsList.get(0); Assert.assertEquals(metrics.READ_PAIR_DUPLICATES * 2 + metrics.UNPAIRED_READ_DUPLICATES, numDuplicates); Assert.assertEquals(metrics.READ_PAIRS_EXAMINED, numReadPairsExamined); } /** * Finds duplicates as expected. */ @Test(dataProvider = "testSimpleDuplicate") public void testSimpleDuplicate(final String testName, final int numDuplicates, final int numReadPairsExamined) throws IOException { final File input = new File(TEST_DATA_DIR, testName); final File output = File.createTempFile("estimateLibraryComplexity", ".els_metrics"); output.deleteOnExit(); final List<String> args = new ArrayList<>(); args.add("INPUT=" + input.getAbsolutePath()); args.add("OUTPUT=" + output.getAbsolutePath()); args.add("MIN_GROUP_COUNT=1"); Assert.assertEquals(runPicardCommandLine(args), 0); examineMetricsFile(output, numDuplicates, numReadPairsExamined); } /** * Finds duplicates as expected ignoring secondary and supplementary records. */ @Test(dataProvider = "testSimpleDuplicatesWithSecondaryAndSupplementaryRecords") public void testSimpleDuplicatesWithSecondaryAndSupplementaryRecords(final String testName, final int numDuplicates, final int numReadPairsExamined) throws IOException { final File input = new File(TEST_DATA_DIR, testName); final File output = File.createTempFile("estimateLibraryComplexity", ".els_metrics"); output.deleteOnExit(); final List<String> args = new ArrayList<>(); args.add("INPUT=" + input.getAbsolutePath()); args.add("OUTPUT=" + output.getAbsolutePath()); args.add("MIN_GROUP_COUNT=1"); Assert.assertEquals(runPicardCommandLine(args), 0); examineMetricsFile(output, numDuplicates, numReadPairsExamined); } /** * Does not find duplicates since the difference rate was too high across the entire read */ @Test(dataProvider = "testMaxDiffRate") public void testMaxDiffRate(final String testName, final int numDuplicates, final int numReadPairsExamined) throws IOException { final File input = new File(TEST_DATA_DIR, testName); final File output = File.createTempFile("estimateLibraryComplexity", ".els_metrics"); output.deleteOnExit(); final List<String> args = new ArrayList<>(); args.add("INPUT=" + input.getAbsolutePath()); args.add("OUTPUT=" + output.getAbsolutePath()); args.add("MAX_DIFF_RATE=0.0"); args.add("MIN_GROUP_COUNT=1"); Assert.assertEquals(runPicardCommandLine(args), 0); examineMetricsFile(output, numDuplicates, numReadPairsExamined); } /** * Finds duplicates since the we examine only the fist ten bases. */ @Test(dataProvider = "testSimpleDuplicateWithMaxReadLength") public void testSimpleDuplicateWithMaxReadLength(final String testName, final int numDuplicates, final int numReadPairsExamined) throws IOException { final File input = new File(TEST_DATA_DIR, testName); final File output = File.createTempFile("estimateLibraryComplexity", ".els_metrics"); output.deleteOnExit(); final List<String> args = new ArrayList<>(); args.add("INPUT=" + input.getAbsolutePath()); args.add("OUTPUT=" + output.getAbsolutePath()); args.add("MAX_DIFF_RATE=0.0"); args.add("MIN_GROUP_COUNT=1"); args.add("MAX_READ_LENGTH=10"); Assert.assertEquals(runPicardCommandLine(args), 0); examineMetricsFile(output, numDuplicates, numReadPairsExamined); } /** * Does not find any duplicates since there was only one group of duplicates of size one. Also * there are no reads examined due to this filtering step. */ @Test(dataProvider = "testDefaultMinGroupCount") public void testDefaultMinGroupCount(final String testName, final int numDuplicates, final int numReadPairsExamined) throws IOException { final File input = new File(TEST_DATA_DIR, testName); final File output = File.createTempFile("estimateLibraryComplexity", ".els_metrics"); output.deleteOnExit(); final List<String> args = new ArrayList<>(); args.add("INPUT=" + input.getAbsolutePath()); args.add("OUTPUT=" + output.getAbsolutePath()); Assert.assertEquals(runPicardCommandLine(args), 0); examineMetricsFile(output, numDuplicates, numReadPairsExamined); // no read pairs examined!!! } }