package picard.analysis;
import picard.analysis.CollectWgsMetricsWithNonZeroCoverage.*;
import htsjdk.samtools.*;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.metrics.MetricsFile;
import org.testng.Assert;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class CollectWgsMetricsWithNonZeroCoverageTest extends CommandLineProgramTest {
private final static File TEST_DIR = new File("testdata/picard/sam/");
private final static String SAMPLE = "TestSample1";
private final static String READ_GROUP_ID = "TestReadGroup1";
private final static String PLATFORM = "ILLUMINA";
private final static String LIBRARY = "TestLibrary1";
public String getCommandLineProgramName() {
return CollectWgsMetricsWithNonZeroCoverage.class.getSimpleName();
}
@Test
public void testWithoutIntervals() throws IOException {
final File input = new File(TEST_DIR, "forMetrics.sam");
final File outfile = File.createTempFile("test", ".wgs_metrics");
final File pdffile = File.createTempFile("test", ".wgs_metrics.pdf");
final File ref = new File(TEST_DIR, "merger.fasta");
final int sampleSize = 1000;
outfile.deleteOnExit();
pdffile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
"SAMPLE_SIZE=" + sampleSize,
"CHART=" + pdffile.getAbsolutePath()
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<WgsMetricsWithNonZeroCoverage , Integer> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final WgsMetricsWithNonZeroCoverage metrics : output.getMetrics()) {
if (metrics.CATEGORY == WgsMetricsWithNonZeroCoverage.Category.WHOLE_GENOME) {
Assert.assertEquals(metrics.GENOME_TERRITORY, 1210);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
Assert.assertEquals(metrics.PCT_1X, 0.107438);
} else {
Assert.assertEquals(metrics.GENOME_TERRITORY, 130);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
Assert.assertEquals(metrics.PCT_1X, 1.0);
}
}
for (final Histogram<Integer> histogram : output.getAllHistograms()) {
if (histogram.getValueLabel().equals("count_WHOLE_GENOME")) {
Assert.assertEquals(histogram.get(0).getValue(), 1080d);
} else {
Assert.assertEquals(histogram.get(0).getValue(), 0d);
}
Assert.assertEquals(histogram.get(1).getValue(), 9d);
Assert.assertEquals(histogram.get(2).getValue(), 35d);
Assert.assertEquals(histogram.get(3).getValue(), 86d);
Assert.assertEquals(histogram.get(4).getValue(), 0d);
}
}
@Test
public void testWithIntervals() throws IOException {
final File input = new File(TEST_DIR, "forMetrics.sam");
final File outfile = File.createTempFile("test", ".wgs_metrics");
final File pdffile = File.createTempFile("test", ".wgs_metrics.pdf");
final File ref = new File(TEST_DIR, "merger.fasta");
final File intervals = new File(TEST_DIR, "largeIntervals.interval_list");
final int sampleSize = 1000;
outfile.deleteOnExit();
pdffile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
"INTERVALS=" + intervals.getAbsolutePath(),
"SAMPLE_SIZE=" + sampleSize,
"CHART=" + pdffile.getAbsolutePath()
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<WgsMetricsWithNonZeroCoverage , Integer> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final WgsMetricsWithNonZeroCoverage metrics : output.getMetrics()) {
if (metrics.CATEGORY == WgsMetricsWithNonZeroCoverage.Category.WHOLE_GENOME) {
Assert.assertEquals(metrics.GENOME_TERRITORY, 404);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
Assert.assertEquals(metrics.PCT_1X, 0.321782);
} else {
Assert.assertEquals(metrics.GENOME_TERRITORY, 130);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
Assert.assertEquals(metrics.PCT_1X, 1.0);
}
}
for (final Histogram<Integer> histogram : output.getAllHistograms()) {
if (histogram.getValueLabel().equals("count_WHOLE_GENOME")) {
Assert.assertEquals(histogram.get(0).getValue(), 274d);
} else {
Assert.assertEquals(histogram.get(0).getValue(), 0d);
}
Assert.assertEquals(histogram.get(1).getValue(), 9d);
Assert.assertEquals(histogram.get(2).getValue(), 35d);
Assert.assertEquals(histogram.get(3).getValue(), 86d);
Assert.assertEquals(histogram.get(4).getValue(), 0d);
}
}
@Test
public void testPoorQualityBases() throws IOException {
final File reference = new File("testdata/picard/quality/chrM.reference.fasta");
final File testSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
testSamFile.deleteOnExit();
/**
* Our test SAM looks as follows:
*
* ---------- <- reads with great base qualities (60) -> ----------
* ---------- ----------
* ---------- ----------
* ********** <- reads with poor base qualities (10) -> **********
* ********** **********
* ********** **********
*
* We exclude half of the bases because they are low quality.
* We do not exceed the coverage cap (3), thus none of the bases is excluded as such.
*
*/
final SAMRecordSetBuilder setBuilder = CollectWgsMetricsTestUtils.createTestSAMBuilder(reference, READ_GROUP_ID, SAMPLE, PLATFORM, LIBRARY);
setBuilder.setReadLength(10);
for (int i = 0; i < 3; i++){
setBuilder.addPair("GreatBQRead:" + i, 0, 1, 30, false, false, "10M", "10M", false, true, 60);
}
for (int i = 0; i < 3; i++){
setBuilder.addPair("PoorBQRead:" + i, 0, 1, 30, false, false, "10M", "10M", false, true, 10);
}
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
setBuilder.forEach(writer::addAlignment);
writer.close();
final File outfile = File.createTempFile("testExcludedBases-metrics", ".txt");
outfile.deleteOnExit();
final File chartOutFile = File.createTempFile("testExcludedBases",".pdf");
chartOutFile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
"INCLUDE_BQ_HISTOGRAM=true",
"COVERAGE_CAP=3",
"CHART_OUTPUT=" + chartOutFile.getAbsolutePath()
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> output = new MetricsFile<>();
output.read(new FileReader(outfile));
final CollectWgsMetrics.WgsMetrics metrics = output.getMetrics().get(0);
final CollectWgsMetrics.WgsMetrics nonZeroMetrics = output.getMetrics().get(1);
// Some metrics should not change between with and without zero
Assert.assertEquals(nonZeroMetrics.PCT_EXC_BASEQ, metrics.PCT_EXC_BASEQ);
Assert.assertEquals(nonZeroMetrics.PCT_EXC_CAPPED, metrics.PCT_EXC_CAPPED);
// Other metrics change when we ignore the zero depth bin
Assert.assertEquals(nonZeroMetrics.GENOME_TERRITORY, 20);
Assert.assertEquals(nonZeroMetrics.MEAN_COVERAGE, 3.0);
}
}