/*
* The MIT License
*
* Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Histogram;
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.sam.SortSam;
import java.io.*;
import java.util.Random;
/**
* Tests for methods in CollectWgsMetrics
*
*
* @author Kylee Bergin
*/
public class CollectWgsMetricsTest extends CommandLineProgramTest {
private final static File REF_DICT_DIR = new File("testdata/picard/sam/CollectGcBiasMetrics/");
private final static File TEST_DIR = new File("testdata/picard/sam/");
private final File referenceDict = new File(REF_DICT_DIR, "MSmallHeader.dict");
private File tempSamFile;
private File outfile;
private final static int READ_PAIR_DISTANCE = 99;
private final static String SAMPLE = "TestSample1";
private final static String READ_GROUP_ID = "TestReadGroup1";
private final static String PLATFORM = "ILLUMINA";
private final static String LIBRARY = "TestLibrary1";
private final static int NUM_READS = 40000;
public String getCommandLineProgramName() {
return CollectWgsMetrics.class.getSimpleName();
}
@DataProvider(name = "wgsDataProvider")
public Object[][] wgsDataProvider() {
final String referenceFile = "testdata/picard/quality/chrM.reference.fasta";
return new Object[][] {
{tempSamFile, outfile, referenceFile, "false"},
{tempSamFile, outfile, referenceFile, "true"},
};
}
@Test(dataProvider = "wgsDataProvider")
public void testMetricsFromWGS(final File input, final File outfile, final String referenceFile,
final String useFastAlgorithm) throws IOException {
outfile.deleteOnExit();
final int sampleSize = 1000;
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + referenceFile,
"SAMPLE_SIZE=" + sampleSize,
"USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>>();
output.read(new FileReader(outfile));
for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.MEAN_COVERAGE, 13.985155, .02);
Assert.assertEquals(metrics.PCT_EXC_OVERLAP, 0.0); // 52 of 606 bases
Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.399906, .02); // 114 of 606 bases
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.0); // 202 of 606 bases
Assert.assertEquals(metrics.SD_COVERAGE, 57.364434, .02);
Assert.assertEquals(metrics.MEDIAN_COVERAGE, 0.0);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.0);
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.0);
Assert.assertEquals(metrics.PCT_EXC_CAPPED, 0.519542, .001);
Assert.assertEquals(metrics.PCT_EXC_TOTAL, 0.919537, .001);
Assert.assertEquals(metrics.PCT_1X, 0.056364, .0001);
Assert.assertEquals(metrics.PCT_5X, 0.056364, .0001);
Assert.assertEquals(metrics.PCT_10X, 0.056364, .0001);
Assert.assertEquals(metrics.PCT_15X, 0.056364, .0001);
Assert.assertEquals(metrics.PCT_20X, 0.056364, .0001);
Assert.assertEquals(metrics.PCT_25X, 0.056303, .0001);
Assert.assertEquals(metrics.PCT_30X, 0.056303, .0001);
Assert.assertEquals(metrics.PCT_40X, 0.056243, .0001);
Assert.assertEquals(metrics.PCT_50X, 0.056243, .0001);
Assert.assertEquals(metrics.PCT_60X, 0.056182, .0001);
Assert.assertEquals(metrics.PCT_70X, 0.056182, .0001);
Assert.assertEquals(metrics.PCT_80X, 0.056122, .0001);
Assert.assertEquals(metrics.PCT_90X, 0.056062, .0001);
Assert.assertEquals(metrics.PCT_100X, 0.056062, .0001);
Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.056362, .02);
Assert.assertEquals(metrics.HET_SNP_Q, 0.0);
}
}
//create a samfile for testing.
@BeforeTest
void setupBuilder() throws IOException {
final String readName = "TESTBARCODE";
//Create Sam Files
tempSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
final File tempSamFileUnsorted = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
tempSamFileUnsorted.deleteOnExit();
tempSamFile.deleteOnExit();
final SAMFileHeader header = new SAMFileHeader();
//Check that dictionary file is readable and then set header dictionary
try {
header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(referenceDict));
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
} catch (final SAMException e) {
e.printStackTrace();
}
//Set readGroupRecord
final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(READ_GROUP_ID);
readGroupRecord.setSample(SAMPLE);
readGroupRecord.setPlatform(PLATFORM);
readGroupRecord.setLibrary(LIBRARY);
readGroupRecord.setPlatformUnit(READ_GROUP_ID);
header.addReadGroup(readGroupRecord);
//Add to setBuilder
final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
setBuilder.setReadGroup(readGroupRecord);
setBuilder.setUseNmFlag(true);
setBuilder.setHeader(header);
//Read settings
final String separator = ":";
final int ID = 1;
final int maxReadStart = 800;
final int minReadStart = 1;
final Random rg = new Random(5);
for (int i = 0; i < NUM_READS; i++) {
final int start = rg.nextInt(maxReadStart) + minReadStart;
final String newReadName = readName + separator + ID + separator + i;
setBuilder.addPair(newReadName, 0, start + ID, start + ID + READ_PAIR_DISTANCE);
}
//Write SAM file
final SAMFileWriter writer = new SAMFileWriterFactory()
.setCreateIndex(true).makeBAMWriter(header, false, tempSamFileUnsorted);
for (final SAMRecord record : setBuilder) {
writer.addAlignment(record);
}
writer.close();
//sort the temp file
final SortSam sorter = new SortSam();
final String[] args = new String[]{
"INPUT=" + tempSamFileUnsorted.getAbsolutePath(),
"OUTPUT=" + tempSamFile.getAbsolutePath(),
"SORT_ORDER=coordinate"
};
sorter.instanceMain(args);
//create output files for tests
outfile = File.createTempFile("testWgsMetrics", ".txt", TEST_DIR);
outfile.deleteOnExit();
}
@DataProvider(name = "wgsAlgorithm")
public Object[][] wgsAlgorithm() {
return new Object[][] {
{"false"},
{"true"},
};
}
@Test(dataProvider = "wgsAlgorithm")
public void testLargeIntervals(final String useFastAlgorithm) throws IOException {
final File input = new File(TEST_DIR, "forMetrics.sam");
final File outfile = File.createTempFile("test", ".wgs_metrics");
final File ref = new File(TEST_DIR, "merger.fasta");
final File intervals = new File(TEST_DIR, "largeIntervals.interval_list");
final int sampleSize = 1000;
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
"INTERVALS=" + intervals.getAbsolutePath(),
"SAMPLE_SIZE=" + sampleSize,
"USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.GENOME_TERRITORY, 404);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
}
}
@Test(dataProvider = "wgsAlgorithm")
public void testExclusions(final String useFastAlgorithm) throws IOException {
final File reference = new File("testdata/picard/sam/merger.fasta");
final File tempSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
tempSamFile.deleteOnExit();
final SAMRecordSetBuilder setBuilder = CollectWgsMetricsTestUtils.createTestSAMBuilder(reference, READ_GROUP_ID, SAMPLE, PLATFORM, LIBRARY);
setBuilder.setReadLength(10);
int expectedSingletonCoverage = 0;
expectedSingletonCoverage += 13;
setBuilder.addPair("overlappingReads", 0, 2, 5, false, false, "10M", "10M", true, false, 30);
expectedSingletonCoverage += 2 * 5; // 5 bases for each mate are good (see AAA!!!AA!! below).
setBuilder.addPair("poorQualityReads", 1, 2, 20, false, false, "10M", "10M", true, false, -1);
for(int i = 1; i < 5; i++) {
setBuilder.addPair("deepStack-" + i, 2, 2, 20, false, false, "10M", "10M", true, false, 30);
}
// modify quality of reads
setBuilder.getRecords().stream()
.filter(samRecord -> samRecord.getReadName().equals("poorQualityReads"))
.forEach(record -> record.setBaseQualityString("AAA!!!AA!!"));
setBuilder.getSamReader();
// Write SAM file
final SAMFileWriter writer = new SAMFileWriterFactory()
.setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, tempSamFile);
for (final SAMRecord record : setBuilder) {
writer.addAlignment(record);
}
writer.close();
// create output files for tests
final File outfile = File.createTempFile("testWgsMetrics", ".txt");
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + tempSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
"INCLUDE_BQ_HISTOGRAM=true",
"COVERAGE_CAP=3",
"USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> output = new MetricsFile<>();
output.read(new FileReader(outfile));
final CollectWgsMetrics.WgsMetrics metrics = output.getMetrics().get(0);
final Histogram<Integer> highQualityDepthHistogram = output.getAllHistograms().get(0);
final Histogram<Integer> baseQHistogram = output.getAllHistograms().get(1);
Assert.assertEquals((long) highQualityDepthHistogram.getSumOfValues(), metrics.GENOME_TERRITORY);
Assert.assertEquals((long) highQualityDepthHistogram.get(1).getValue(), expectedSingletonCoverage);
Assert.assertEquals((long) highQualityDepthHistogram.get(3).getValue(), 2*10);
}
@Test(dataProvider = "wgsAlgorithm")
public void testPoorQualityBases(final String useFastAlgorithm) throws IOException {
final File reference = new File("testdata/picard/quality/chrM.reference.fasta");
final File testSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
testSamFile.deleteOnExit();
/**
* Our test SAM looks as follows:
*
* ---------- <- reads with great base qualities (60) -> ----------
* ---------- ----------
* ---------- ----------
* ********** <- reads with poor base qualities (10) -> **********
* ********** **********
* ********** **********
*
* We exclude half of the bases because they are low quality.
* We do not exceed the coverage cap (3), thus none of the bases is excluded as such.
*
*/
final SAMRecordSetBuilder setBuilder = CollectWgsMetricsTestUtils.createTestSAMBuilder(reference, READ_GROUP_ID, SAMPLE, PLATFORM, LIBRARY);
setBuilder.setReadLength(10);
for (int i = 0; i < 3; i++){
setBuilder.addPair("GreatBQRead:" + i, 0, 1, 30, false, false, "10M", "10M", false, true, 40);
}
for (int i = 0; i < 3; i++){
setBuilder.addPair("PoorBQRead:" + i, 0, 1, 30, false, false, "10M", "10M", false, true, 10);
}
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
for (final SAMRecord record : setBuilder) {
writer.addAlignment(record);
}
writer.close();
final File outfile = File.createTempFile("testExcludedBases-metrics", ".txt");
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
"INCLUDE_BQ_HISTOGRAM=true",
"COVERAGE_CAP=3",
"USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> output = new MetricsFile<>();
output.read(new FileReader(outfile));
final CollectWgsMetrics.WgsMetrics metrics = output.getMetrics().get(0);
Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.5);
Assert.assertEquals(metrics.PCT_EXC_CAPPED, 0.0);
}
@Test(dataProvider = "wgsAlgorithm")
public void testGiantDeletion(final String useFastAlgorithm) throws IOException {
final File reference = new File("testdata/picard/quality/chrM.reference.fasta");
final File testSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
testSamFile.deleteOnExit();
final SAMRecordSetBuilder setBuilder = CollectWgsMetricsTestUtils.createTestSAMBuilder(reference, READ_GROUP_ID, SAMPLE, PLATFORM, LIBRARY);
setBuilder.setReadLength(10);
for (int i = 0; i < 3; i++){
setBuilder.addPair("Read:" + i, 0, 1, 30, false, false, "5M10000D5M", "1000D10M", false, true, 40);
}
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
for (final SAMRecord record : setBuilder) {
writer.addAlignment(record);
}
writer.close();
final File outfile = File.createTempFile("testGiantDeletion", ".txt");
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
"INCLUDE_BQ_HISTOGRAM=true",
"COVERAGE_CAP=3",
"USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> output = new MetricsFile<>();
output.read(new FileReader(outfile));
final CollectWgsMetrics.WgsMetrics metrics = output.getMetrics().get(0);
Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.0);
Assert.assertEquals(metrics.PCT_EXC_CAPPED, 0.0);
}
}