/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.util;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.TextCigarCodec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
/**
* @author alecw@broadinstitute.org
*/
public class SequenceUtilTest {
private static final String HEADER = "@HD\tVN:1.0\tSO:unsorted\n";
private static final String SEQUENCE_NAME=
"@SQ\tSN:phix174.seq\tLN:5386\tUR:/seq/references/PhiX174/v0/PhiX174.fasta\tAS:PhiX174\tM5:3332ed720ac7eaa9b3655c06f6b9e196";
@Test
public void testExactMatch() {
final SAMSequenceDictionary sd1 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
final SAMSequenceDictionary sd2 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2);
}
@Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
public void testMismatch() {
final SAMSequenceDictionary sd1 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
final SAMSequenceDictionary sd2 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
"deadbeef");
SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2);
Assert.fail();
}
@Test
public void testFileColonDifference() {
final SAMSequenceDictionary sd1 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
final SAMSequenceDictionary sd2 = makeSequenceDictionary(5386, "file:/seq/references/PhiX174/v0/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2);
}
@Test
public void testURDifferent() {
final SAMSequenceDictionary sd1 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
final SAMSequenceDictionary sd2 = makeSequenceDictionary(5386, "file:/seq/references/PhiX174/v1/PhiX174.fasta",
"3332ed720ac7eaa9b3655c06f6b9e196");
SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2);
}
private SAMSequenceDictionary makeSequenceDictionary(final int length, final String ur, final String m5) {
final String s = HEADER +
String.format("@SQ\tSN:phix174.seq\tLN:%d\tUR:%s\tAS:PhiX174\tM5:%s\n", length, ur, m5);
return new SAMTextHeaderCodec().decode(new StringLineReader(s), null).getSequenceDictionary();
}
@Test(dataProvider = "makeReferenceFromAlignment")
public void testMakeReferenceFromAlignment(final String seq, final String cigar, final String md,
boolean includeReferenceBasesForDeletions,
final String expectedReference) {
final SAMRecord rec = new SAMRecord(null);
rec.setReadName("test");
rec.setReadString(seq);
rec.setCigarString(cigar);
rec.setAttribute(SAMTag.MD.name(), md);
final byte[] refBases = SequenceUtil.makeReferenceFromAlignment(rec, includeReferenceBasesForDeletions);
Assert.assertEquals(StringUtil.bytesToString(refBases), expectedReference);
}
@DataProvider(name = "makeReferenceFromAlignment")
public Object[][] testMakeReferenceFromAlignmentDataProvider() {
return new Object[][] {
{"ACGTACGTACGT", "12M2H", "4GAAA4", true, "ACGTGAAAACGT"},
{"ACGTACGTACGT", "2H12M", "12", false, "ACGTACGTACGT"},
{"ACGTACGTACGT", "4M4I4M2H", "8", false, "ACGT----ACGT"},
{"ACGTACGTACGT", "2S4M2I4M2S", "8", false, "00GTAC--ACGT00"},
{"ACGTACGTACGT", "6M2D6M2H", "4GA^TT0TG4", true, "ACGTGATTTGACGT"},
{"ACGTACGTACGT", "6M2D6M2H", "4GA^TT0TG4", false, "ACGTGATGACGT"},
// When CIGAR has N, MD will not have skipped bases.
{"ACGTACGTACGT", "6M2N6M2H", "4GA0TG4", true, "ACGTGANNTGACGT"},
{"ACGTACGTACGT", "6M2N6M2H", "4GA0TG4", false, "ACGTGATGACGT"},
{"ACGTACGTACGT", "6M2N6M2H", "4GATG4", true, "ACGTGANNTGACGT"},
{"ACGTACGTACGT", "6M2N6M2H", "4GATG4", false, "ACGTGATGACGT"},
};
}
@Test(dataProvider = "countInsertedAndDeletedBasesTestCases")
public void testCountInsertedAndDeletedBases(final String cigarString, final int insertedBases, final int deletedBases) {
final TextCigarCodec codec = new TextCigarCodec();
final Cigar cigar = codec.decode(cigarString);
Assert.assertEquals(SequenceUtil.countInsertedBases(cigar), insertedBases);
Assert.assertEquals(SequenceUtil.countDeletedBases(cigar), deletedBases);
}
@DataProvider(name = "countInsertedAndDeletedBasesTestCases")
public Object[][] countInsertedAndDeletedBasesTestCases() {
return new Object[][] {
{"2H2S32M", 0, 0},
{"2H2S32M12I2M2I3M", 14, 0},
{"32M2D10M", 0, 2},
{"32M2D10M3D1M", 0, 5},
{"2H2S32M12I2M3D1M2I3M2D1M", 14, 5}
};
}
}