/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.StringUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.Iterator;
/**
* Tests almost all error conditions detected by the sam file validator. The
* conditions not tested are proactively prevented by sam generation code.
*
* @author Doug Voet
*/
public class ValidateSamFileTest {
private static final File TEST_DATA_DIR = new File("testdata/htsjdk/samtools/ValidateSamFileTest");
@Test
public void testValidSamFile() throws Exception {
final ValidationStringency saveStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
try {
final SAMFileReader samReader = new SAMFileReader(new File(TEST_DATA_DIR, "valid.sam"));
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertTrue(results.isEmpty());
} finally {
SAMFileReader.setDefaultValidationStringency(saveStringency);
}
}
@Test
public void testSortOrder() throws IOException {
Histogram<String> results = executeValidation(new SAMFileReader(new File(TEST_DATA_DIR, "invalid_coord_sort_order.sam")), null);
Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 1.0);
results = executeValidation(new SAMFileReader(new File(TEST_DATA_DIR, "invalid_queryname_sort_order.sam")), null);
Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 5.0);
}
@Test
public void testVerbose() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
for (int i=0; i<20; i++) {
samBuilder.addFrag(String.valueOf(i), 1, i, false);
}
for (final SAMRecord record : samBuilder) {
record.setProperPairFlag(true);
}
final StringWriter results = new StringWriter();
final SamFileValidator validator = new SamFileValidator(new PrintWriter(results), 8000);
validator.setVerbose(true, 10);
validator.validateSamFileVerbose(samBuilder.getSamReader(), null);
final int lineCount = results.toString().split("\n").length;
Assert.assertEquals(lineCount, 11);
}
@Test
public void testUnpairedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
for (int i=0; i<6; i++) {
samBuilder.addFrag(String.valueOf(i), i, i, false);
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setProperPairFlag(true);
records.next().setMateUnmappedFlag(true);
records.next().setMateNegativeStrandFlag(true);
records.next().setFirstOfPairFlag(true);
records.next().setSecondOfPairFlag(true);
records.next().setMateReferenceIndex(1);
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_NEG_STRAND.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_FIRST_OF_PAIR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_SECOND_OF_PAIR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_MATE_REF_INDEX.getHistogramString()).getValue(), 1.0);
}
@Test
public void testPairedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
for (int i=0; i<5; i++) {
samBuilder.addPair(String.valueOf(i), i, i, i+100);
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setMateReferenceName("*");
records.next().setMateAlignmentStart(Integer.MAX_VALUE);
records.next().setMateAlignmentStart(records.next().getAlignmentStart()+1);
records.next().setMateNegativeStrandFlag(!records.next().getReadNegativeStrandFlag());
records.next().setMateReferenceIndex(records.next().getReferenceIndex() + 1);
records.next().setMateUnmappedFlag(!records.next().getReadUnmappedFlag());
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_ALIGNMENT_START.getHistogramString()).getValue(), 3.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_NEG_STRAND.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_MATE_ALIGNMENT_START.getHistogramString()).getValue(), 2.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_MATE_REF_INDEX.getHistogramString()).getValue(), 2.0);
}
@Test(dataProvider = "missingMateTestCases")
public void testMissingMate(final SAMFileHeader.SortOrder sortOrder) throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(true, sortOrder);
samBuilder.addPair(String.valueOf(1), 1, 1, 101);
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next();
records.remove();
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
Assert.assertEquals(results.get(SAMValidationError.Type.MATE_NOT_FOUND.getHistogramString()).getValue(), 1.0);
}
@DataProvider(name = "missingMateTestCases")
public Object[][] missingMateTestCases() {
return new Object[][] {
{SAMFileHeader.SortOrder.coordinate},
{SAMFileHeader.SortOrder.queryname},
{SAMFileHeader.SortOrder.unsorted},
};
}
@Test
public void testUnmappedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
for (int i=0; i<4; i++) {
samBuilder.addUnmappedFragment(String.valueOf(i));
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setReadNegativeStrandFlag(true);
records.next().setNotPrimaryAlignmentFlag(true);
records.next().setMappingQuality(10);
records.next().setCigarString("36M");
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_NOT_PRIM_ALIGNMENT.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_MAPPING_QUALITY.getHistogramString()).getValue(), 1.0);
}
@Test
public void testMappedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
for (int i=0; i<2; i++) {
samBuilder.addFrag(String.valueOf(i), i, i, false);
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setCigarString("25M3S25M");
records.next().setReferenceName("*");
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_CIGAR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_READ_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_TAG_NM.getHistogramString()).getValue(), 1.0);
}
@Test
public void testNmFlagValidation() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
for (int i=0; i<3; i++) {
samBuilder.addFrag(String.valueOf(i), i, i+1, false);
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setAttribute(ReservedTagConstants.NM, 4);
// PIC-215: Confirm correct NM value when there is an insertion and a deletion.
final SAMRecord recordWithInsert = records.next();
final byte[] sequence = recordWithInsert.getReadBases();
Arrays.fill(sequence, (byte)'A');
recordWithInsert.setReadBases(sequence);
recordWithInsert.setCigarString("1D" + Integer.toString(sequence.length-1) + "M1I");
recordWithInsert.setAttribute(ReservedTagConstants.NM, 2);
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), new ReferenceSequenceFile() {
private int index=0;
public SAMSequenceDictionary getSequenceDictionary() {
return null;
}
public ReferenceSequence nextSequence() {
final byte[] bases = new byte[10000];
Arrays.fill(bases, (byte) 'A');
return new ReferenceSequence("foo", index++, bases);
}
public void reset() {
this.index = 0;
}
public boolean isIndexed() { return false; }
public ReferenceSequence getSequence(final String contig) {
throw new UnsupportedOperationException();
}
public ReferenceSequence getSubsequenceAt(final String contig, final long start, final long stop) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {
//no-op
}
});
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_TAG_NM.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_TAG_NM.getHistogramString()).getValue(), 1.0);
}
@Test(dataProvider = "testMateCigarScenarios")
public void testMateCigarScenarios(final String scenario, final String inputFile, final SAMValidationError.Type expectedError)
throws Exception {
final SAMFileReader reader = new SAMFileReader(new File(TEST_DATA_DIR, inputFile));
final Histogram<String> results = executeValidation(reader, null);
Assert.assertNotNull(results.get(expectedError.getHistogramString()));
Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0);
}
@DataProvider(name = "testMateCigarScenarios")
public Object[][] testMateCigarScenarios() {
return new Object[][] {
{"invalid mate cigar", "invalid_mate_cigar_string.sam", SAMValidationError.Type.MISMATCH_MATE_CIGAR_STRING},
{"inappropriate mate cigar", "inappropriate_mate_cigar_string.sam", SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE}
};
}
@Test(dataProvider = "testTruncatedScenarios")
public void testTruncated(final String scenario, final String inputFile, final SAMValidationError.Type expectedError)
throws Exception {
final SAMFileReader reader = new SAMFileReader(new File(TEST_DATA_DIR, inputFile));
final Histogram<String> results = executeValidation(reader, null);
Assert.assertNotNull(results.get(expectedError.getHistogramString()));
Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0);
}
@DataProvider(name = "testTruncatedScenarios")
public Object[][] testTruncatedScenarios() {
return new Object[][] {
{"truncated bam", "truncated.bam", SAMValidationError.Type.TRUNCATED_FILE},
{"truncated quals", "truncated_quals.sam", SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_QUALS_LENGTH},
// TODO: Because validation is turned off when parsing, this error is not detectable currently by validator.
//{"truncated tag", "truncated_tag.sam", SAMValidationError.Type.TRUNCATED_FILE},
// TODO: Currently, this is not considered an error. Should it be?
//{"hanging tab", "hanging_tab.sam", SAMValidationError.Type.TRUNCATED_FILE},
};
}
@Test(expectedExceptions = SAMException.class, dataProvider = "testFatalParsingErrors")
public void testFatalParsingErrors(final String scenario, final String inputFile) throws Exception {
final SAMFileReader reader = new SAMFileReader(new File(TEST_DATA_DIR, inputFile));
executeValidation(reader, null);
Assert.fail("Exception should have been thrown.");
}
@DataProvider(name = "testFatalParsingErrors")
public Object[][] testFatalParsingErrorScenarios() {
return new Object[][] {
{"missing fields", "missing_fields.sam"},
{"zero length read", "zero_length_read.sam"}
};
}
@Test
public void testHeaderVersionValidation() throws Exception {
final String header = "@HD VN:Hi,Mom! SO:queryname";
final InputStream strm = new ByteArrayInputStream(StringUtil.stringToBytes(header));
final SAMFileReader samReader = new SAMFileReader(strm);
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()).getValue(), 1.0);
}
@Test(enabled=false, description="File is actually valid for Standard quality scores so this test fails with an NPE.")
public void testQualityFormatValidation() throws Exception {
final SAMFileReader samReader = new SAMFileReader(new File("./testdata/htsjdk/samtools/util/QualityEncodingDetectorTest/illumina-as-standard.bam"));
final Histogram<String> results = executeValidation(samReader, null);
final Histogram<String>.Bin bin = results.get(SAMValidationError.Type.INVALID_QUALITY_FORMAT.getHistogramString());
final double value = bin.getValue();
Assert.assertEquals(value, 1.0);
}
@Test
public void testCigarOffEndOfReferenceValidation() throws Exception {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
samBuilder.addFrag(String.valueOf(0), 0, 1, false);
final int contigLength = samBuilder.getHeader().getSequence(0).getSequenceLength();
// Should hang off the end.
samBuilder.addFrag(String.valueOf(1), 0, contigLength - 1, false);
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
Assert.assertNotNull(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString()));
Assert.assertEquals(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString()).getValue(), 1.0);
}
@Test(expectedExceptions = SAMFormatException.class)
public void testConflictingTags() throws Exception {
final String header = "@HD VN:1.0 SO:queryname SO:coordinate";
final InputStream strm = new ByteArrayInputStream(StringUtil.stringToBytes(header));
final SAMFileReader samReader = new SAMFileReader(strm);
Assert.fail("Exception should have been thrown.");
}
@Test
public void testRedundantTags() throws Exception {
final String header = "@HD VN:1.0 SO:coordinate SO:coordinate";
final InputStream strm = new ByteArrayInputStream(StringUtil.stringToBytes(header));
final SAMFileReader samReader = new SAMFileReader(strm);
Assert.assertEquals(SAMFileHeader.SortOrder.coordinate, samReader.getFileHeader().getSortOrder());
}
@Test
public void testHeaderValidation() throws Exception {
final ValidationStringency saveStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
try {
final SAMFileReader samReader = new SAMFileReader(new File(TEST_DATA_DIR, "buggyHeader.sam"));
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertEquals(results.get(SAMValidationError.Type.UNRECOGNIZED_HEADER_TYPE.getHistogramString()).getValue(), 3.0);
Assert.assertEquals(results.get(SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED.getHistogramString()).getValue(), 1.0);
} finally {
SAMFileReader.setDefaultValidationStringency(saveStringency);
}
}
@Test
public void testPlatformMissing() throws Exception {
final ValidationStringency saveStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
try {
final SAMFileReader samReader = new SAMFileReader(new File(TEST_DATA_DIR, "missing_platform_unit.sam"));
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_PLATFORM_VALUE.getHistogramString()).getValue(), 1.0);
} finally {
SAMFileReader.setDefaultValidationStringency(saveStringency);
}
}
@Test
public void testDuplicateRGIDs() throws Exception {
final ValidationStringency saveStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
try {
final SAMFileReader samReader = new SAMFileReader(new File(TEST_DATA_DIR, "duplicate_rg.sam"));
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertEquals(results.get(SAMValidationError.Type.DUPLICATE_READ_GROUP_ID.getHistogramString()).getValue(), 1.0);
} finally {
SAMFileReader.setDefaultValidationStringency(saveStringency);
}
}
@Test
public void testIndexFileValidation() throws Exception {
final ValidationStringency saveStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
try {
final SAMFileReader samReader = new SAMFileReader(new File(TEST_DATA_DIR, "bad_index.bam"));
samReader.enableIndexCaching(true);
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_INDEX_FILE_POINTER.getHistogramString()).getValue(), 1.0);
} finally {
SAMFileReader.setDefaultValidationStringency(saveStringency);
}
}
private Histogram<String> executeValidation(final SAMFileReader samReader, final ReferenceSequenceFile reference) throws IOException {
final File outFile = File.createTempFile("validation", ".txt");
outFile.deleteOnExit();
final PrintWriter out = new PrintWriter(outFile);
new SamFileValidator(out, 8000).setValidateIndex(true).validateSamFileSummary(samReader, reference);
final LineNumberReader reader = new LineNumberReader(new FileReader(outFile));
if (reader.readLine().equals("No errors found")) {
return new Histogram<String>();
}
final MetricsFile<MetricBase, String> outputFile = new MetricsFile<MetricBase, String>();
outputFile.read(new FileReader(outFile));
Assert.assertNotNull(outputFile.getHistogram());
return outputFile.getHistogram();
}
@Test(dataProvider = "headerVersions")
public void testHeaderVersion(final String version, final boolean expectValid) throws Exception {
final File samFile = File.createTempFile("validateHeader.", ".sam");
samFile.deleteOnExit();
final PrintWriter pw = new PrintWriter(samFile);
pw.println("@HD\tVN:" + version);
pw.close();
final SAMFileReader reader = new SAMFileReader(samFile);
final Histogram<String> results = executeValidation(reader, null);
if (expectValid) Assert.assertNull(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()));
else {
Assert.assertNotNull(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()));
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()).getValue(), 1.0);
}
}
@DataProvider(name = "headerVersions")
public Object[][] testHeaderVersionScenarios() {
return new Object[][] {
{"1.0", true},
{"1.3", true},
{"1.4", true},
{"1.5", false},
};
}
}