/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.variant.variantcontext;
// the imports for unit testing.
import htsjdk.variant.VariantBaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class VariantContextUnitTest extends VariantBaseTest {
Allele A, Aref, C, T, Tref;
Allele del, delRef, ATC, ATCref;
// A [ref] / T at 10
String snpLoc = "chr1";
int snpLocStart = 10;
int snpLocStop = 10;
// - / ATC [ref] from 20-22
String delLoc = "chr1";
int delLocStart = 20;
int delLocStop = 22;
// - [ref] / ATC from 20-20
String insLoc = "chr1";
int insLocStart = 20;
int insLocStop = 20;
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
@BeforeSuite
public void before() {
del = Allele.create("A");
delRef = Allele.create("A", true);
A = Allele.create("A");
C = Allele.create("C");
Aref = Allele.create("A", true);
T = Allele.create("T");
Tref = Allele.create("T", true);
ATC = Allele.create("ATC");
ATCref = Allele.create("ATC", true);
}
@BeforeMethod
public void beforeTest() {
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
}
@Test
public void testDetermineTypes() {
Allele ACref = Allele.create("AC", true);
Allele AC = Allele.create("AC");
Allele AT = Allele.create("AT");
Allele C = Allele.create("C");
Allele CAT = Allele.create("CAT");
Allele TAref = Allele.create("TA", true);
Allele TA = Allele.create("TA");
Allele TC = Allele.create("TC");
Allele symbolic = Allele.create("<FOO>");
// test REF
List<Allele> alleles = Arrays.asList(Tref);
VariantContext vc = snpBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);
// test SNPs
alleles = Arrays.asList(Tref, A);
vc = snpBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
alleles = Arrays.asList(Tref, A, C);
vc = snpBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
// test MNPs
alleles = Arrays.asList(ACref, TA);
vc = snpBuilder.alleles(alleles).stop(snpLocStop+1).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.MNP);
alleles = Arrays.asList(ATCref, CAT, Allele.create("GGG"));
vc = basicBuilder.alleles(alleles).stop(snpLocStop+2).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.MNP);
// test INDELs
alleles = Arrays.asList(Aref, ATC);
vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(ATCref, A);
vc = basicBuilder.alleles(alleles).stop(snpLocStop+2).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(Tref, TA, TC);
vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(ATCref, A, AC);
vc = basicBuilder.alleles(alleles).stop(snpLocStop+2).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
alleles = Arrays.asList(ATCref, A, Allele.create("ATCTC"));
vc = basicBuilder.alleles(alleles).stop(snpLocStop+2).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
// test MIXED
alleles = Arrays.asList(TAref, T, TC);
vc = basicBuilder.alleles(alleles).stop(snpLocStop+1).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
alleles = Arrays.asList(TAref, T, AC);
vc = basicBuilder.alleles(alleles).stop(snpLocStop+1).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
alleles = Arrays.asList(ACref, ATC, AT);
vc = basicBuilder.alleles(alleles).stop(snpLocStop+1).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
alleles = Arrays.asList(Aref, T, symbolic);
vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);
// test SYMBOLIC
alleles = Arrays.asList(Tref, symbolic);
vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC);
}
@Test
public void testMultipleSNPAlleleOrdering() {
final List<Allele> allelesNaturalOrder = Arrays.asList(Aref, C, T);
final List<Allele> allelesUnnaturalOrder = Arrays.asList(Aref, T, C);
VariantContext naturalVC = snpBuilder.alleles(allelesNaturalOrder).make();
VariantContext unnaturalVC = snpBuilder.alleles(allelesUnnaturalOrder).make();
Assert.assertEquals(new ArrayList<Allele>(naturalVC.getAlleles()), allelesNaturalOrder);
Assert.assertEquals(new ArrayList<Allele>(unnaturalVC.getAlleles()), allelesUnnaturalOrder);
}
@Test
public void testCreatingSNPVariantContext() {
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = snpBuilder.alleles(alleles).make();
Assert.assertEquals(vc.getChr(), snpLoc);
Assert.assertEquals(vc.getStart(), snpLocStart);
Assert.assertEquals(vc.getEnd(), snpLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
Assert.assertTrue(vc.isSNP());
Assert.assertFalse(vc.isIndel());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertFalse(vc.isSimpleIndel());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), Aref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), T);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingRefVariantContext() {
List<Allele> alleles = Arrays.asList(Aref);
VariantContext vc = snpBuilder.alleles(alleles).make();
Assert.assertEquals(vc.getChr(), snpLoc);
Assert.assertEquals(vc.getStart(), snpLocStart);
Assert.assertEquals(vc.getEnd(), snpLocStop);
Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType());
Assert.assertFalse(vc.isSNP());
Assert.assertFalse(vc.isIndel());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertFalse(vc.isSimpleIndel());
Assert.assertFalse(vc.isMixed());
Assert.assertFalse(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 1);
Assert.assertEquals(vc.getReference(), Aref);
Assert.assertEquals(vc.getAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAlleles().size(), 0);
//Assert.assertEquals(vc.getAlternateAllele(0), T);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingDeletionVariantContext() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
Assert.assertEquals(vc.getChr(), delLoc);
Assert.assertEquals(vc.getStart(), delLocStart);
Assert.assertEquals(vc.getEnd(), delLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertTrue(vc.isSimpleDeletion());
Assert.assertTrue(vc.isSimpleIndel());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), ATCref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), del);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingComplexSubstitutionVariantContext() {
List<Allele> alleles = Arrays.asList(Tref, ATC);
VariantContext vc = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, alleles).make();
Assert.assertEquals(vc.getChr(), insLoc);
Assert.assertEquals(vc.getStart(), insLocStart);
Assert.assertEquals(vc.getEnd(), insLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertFalse(vc.isSimpleIndel());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), Tref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), ATC);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testMatchingAlleles() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
}
@Test
public void testCreatingInsertionVariantContext() {
List<Allele> alleles = Arrays.asList(delRef, ATC);
VariantContext vc = insBuilder.alleles(alleles).make();
Assert.assertEquals(vc.getChr(), insLoc);
Assert.assertEquals(vc.getStart(), insLocStart);
Assert.assertEquals(vc.getEnd(), insLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertTrue(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertTrue(vc.isSimpleIndel());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), delRef);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), ATC);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingPartiallyCalledGenotype() {
List<Allele> alleles = Arrays.asList(Aref, C);
Genotype g = GenotypeBuilder.create("foo", Arrays.asList(C, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g).make();
Assert.assertTrue(vc.isSNP());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphicInSamples());
Assert.assertTrue(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getGenotype("foo"), g);
Assert.assertEquals(vc.getCalledChrCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called
Assert.assertEquals(vc.getCalledChrCount(Aref), 0);
Assert.assertEquals(vc.getCalledChrCount(C), 1);
Assert.assertFalse(vc.getGenotype("foo").isHet());
Assert.assertFalse(vc.getGenotype("foo").isHom());
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
Assert.assertFalse(vc.getGenotype("foo").isHom());
Assert.assertTrue(vc.getGenotype("foo").isMixed());
Assert.assertEquals(vc.getGenotype("foo").getType(), GenotypeType.MIXED);
}
@Test (expectedExceptions = Exception.class)
public void testBadConstructorArgs1() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref)).make();
}
@Test (expectedExceptions = Exception.class)
public void testBadConstructorArgs2() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del)).make();
}
@Test (expectedExceptions = Exception.class)
public void testBadConstructorArgs3() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(del)).make();
}
@Test (expectedExceptions = Throwable.class)
public void testBadConstructorArgs4() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Collections.<Allele>emptyList()).make();
}
@Test (expectedExceptions = Exception.class)
public void testBadConstructorArgsDuplicateAlleles1() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(Aref, T, T)).make();
}
@Test (expectedExceptions = Exception.class)
public void testBadConstructorArgsDuplicateAlleles2() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(Aref, A)).make();
}
@Test (expectedExceptions = Throwable.class)
public void testBadLoc1() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
}
@Test (expectedExceptions = Throwable.class)
public void testBadID1() {
new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, Arrays.asList(Aref, T)).id(null).make();
}
@Test (expectedExceptions = Exception.class)
public void testBadID2() {
new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, Arrays.asList(Aref, T)).id("").make();
}
@Test (expectedExceptions = Throwable.class)
public void testBadPError() {
new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref)).log10PError(0.5).make();
}
@Test
public void testAccessingSimpleSNPGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
.genotypes(g1, g2, g3).make();
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphicInSamples());
Assert.assertTrue(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getSampleNames().size(), 3);
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(vc.getGenotypes().get("AA"), g1);
Assert.assertEquals(vc.getGenotype("AA"), g1);
Assert.assertEquals(vc.getGenotypes().get("AT"), g2);
Assert.assertEquals(vc.getGenotype("AT"), g2);
Assert.assertEquals(vc.getGenotypes().get("TT"), g3);
Assert.assertEquals(vc.getGenotype("TT"), g3);
Assert.assertTrue(vc.hasGenotype("AA"));
Assert.assertTrue(vc.hasGenotype("AT"));
Assert.assertTrue(vc.hasGenotype("TT"));
Assert.assertFalse(vc.hasGenotype("foo"));
Assert.assertFalse(vc.hasGenotype("TTT"));
Assert.assertFalse(vc.hasGenotype("at"));
Assert.assertFalse(vc.hasGenotype("tt"));
Assert.assertEquals(vc.getCalledChrCount(), 6);
Assert.assertEquals(vc.getCalledChrCount(Aref), 3);
Assert.assertEquals(vc.getCalledChrCount(T), 3);
}
@Test
public void testAccessingCompleteGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, ATC);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC));
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC));
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
.genotypes(g1, g2, g3, g4, g5, g6).make();
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphicInSamples());
Assert.assertTrue(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getGenotypes().size(), 6);
Assert.assertEquals(3, vc.getGenotypes(Arrays.asList("AA", "Td", "dd")).size());
Assert.assertEquals(10, vc.getCalledChrCount());
Assert.assertEquals(3, vc.getCalledChrCount(Aref));
Assert.assertEquals(4, vc.getCalledChrCount(T));
Assert.assertEquals(3, vc.getCalledChrCount(ATC));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
}
@Test
public void testAccessingRefGenotypes() {
List<Allele> alleles1 = Arrays.asList(Aref, T);
List<Allele> alleles2 = Arrays.asList(Aref);
List<Allele> alleles3 = Arrays.asList(Aref, T);
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
Genotype g3 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
.genotypes(g1, g2, g3).make();
Assert.assertTrue(vc.hasGenotypes());
Assert.assertTrue(vc.isMonomorphicInSamples());
Assert.assertFalse(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(4, vc.getCalledChrCount());
Assert.assertEquals(4, vc.getCalledChrCount(Aref));
Assert.assertEquals(0, vc.getCalledChrCount(T));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
}
}
@Test
public void testFilters() {
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1, g2).make();
Assert.assertTrue(vc.isNotFiltered());
Assert.assertFalse(vc.isFiltered());
Assert.assertEquals(0, vc.getFilters().size());
Assert.assertFalse(vc.filtersWereApplied());
Assert.assertNull(vc.getFiltersMaybeNull());
vc = new VariantContextBuilder(vc).filters("BAD_SNP_BAD!").make();
Assert.assertFalse(vc.isNotFiltered());
Assert.assertTrue(vc.isFiltered());
Assert.assertEquals(1, vc.getFilters().size());
Assert.assertTrue(vc.filtersWereApplied());
Assert.assertNotNull(vc.getFiltersMaybeNull());
Set<String> filters = new HashSet<String>(Arrays.asList("BAD_SNP_BAD!", "REALLY_BAD_SNP", "CHRIST_THIS_IS_TERRIBLE"));
vc = new VariantContextBuilder(vc).filters(filters).make();
Assert.assertFalse(vc.isNotFiltered());
Assert.assertTrue(vc.isFiltered());
Assert.assertEquals(3, vc.getFilters().size());
Assert.assertTrue(vc.filtersWereApplied());
Assert.assertNotNull(vc.getFiltersMaybeNull());
}
@Test
public void testGetGenotypeCounts() {
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("A.", Arrays.asList(Aref, Allele.NO_CALL));
Genotype g5 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
// we need to create a new VariantContext each time
VariantContext vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getHetCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getHomRefCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getHomVarCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getMixedCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getNoCallCount());
}
@Test
public void testVCFfromGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, C, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g5 = GenotypeBuilder.create("AC", Arrays.asList(Aref, C));
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
VariantContext vc125 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName(), g5.getSampleName())), true);
Assert.assertTrue(vc12.isPolymorphicInSamples());
Assert.assertTrue(vc23.isPolymorphicInSamples());
Assert.assertTrue(vc1.isMonomorphicInSamples());
Assert.assertTrue(vc4.isMonomorphicInSamples());
Assert.assertTrue(vc14.isMonomorphicInSamples());
Assert.assertTrue(vc125.isPolymorphicInSamples());
Assert.assertTrue(vc12.isSNP());
Assert.assertTrue(vc12.isVariant());
Assert.assertTrue(vc12.isBiallelic());
Assert.assertFalse(vc1.isSNP());
Assert.assertFalse(vc1.isVariant());
Assert.assertFalse(vc1.isBiallelic());
Assert.assertTrue(vc23.isSNP());
Assert.assertTrue(vc23.isVariant());
Assert.assertTrue(vc23.isBiallelic());
Assert.assertFalse(vc4.isSNP());
Assert.assertFalse(vc4.isVariant());
Assert.assertFalse(vc4.isBiallelic());
Assert.assertFalse(vc14.isSNP());
Assert.assertFalse(vc14.isVariant());
Assert.assertFalse(vc14.isBiallelic());
Assert.assertTrue(vc125.isSNP());
Assert.assertTrue(vc125.isVariant());
Assert.assertFalse(vc125.isBiallelic());
Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
Assert.assertEquals(4, vc125.getCalledChrCount(Aref));
}
public void testGetGenotypeMethods() {
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
GenotypesContext gc = GenotypesContext.create(g1, g2, g3);
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
Assert.assertEquals(vc.getGenotype("AA"), g1);
Assert.assertEquals(vc.getGenotype("AT"), g2);
Assert.assertEquals(vc.getGenotype("TT"), g3);
Assert.assertEquals(vc.getGenotype("CC"), null);
Assert.assertEquals(vc.getGenotypes(), gc);
Assert.assertEquals(vc.getGenotypes(Arrays.asList("AA", "AT")), Arrays.asList(g1, g2));
Assert.assertEquals(vc.getGenotypes(Arrays.asList("AA", "TT")), Arrays.asList(g1, g3));
Assert.assertEquals(vc.getGenotypes(Arrays.asList("AA", "AT", "TT")), Arrays.asList(g1, g2, g3));
Assert.assertEquals(vc.getGenotypes(Arrays.asList("AA", "AT", "CC")), Arrays.asList(g1, g2));
Assert.assertEquals(vc.getGenotype(0), g1);
Assert.assertEquals(vc.getGenotype(1), g2);
Assert.assertEquals(vc.getGenotype(2), g3);
}
// --------------------------------------------------------------------------------
//
// Test allele merging
//
// --------------------------------------------------------------------------------
private class GetAllelesTest {
List<Allele> alleles;
String name;
private GetAllelesTest(String name, Allele... arg) {
this.name = name;
this.alleles = Arrays.asList(arg);
}
public String toString() {
return String.format("%s input=%s", name, alleles);
}
}
@DataProvider(name = "getAlleles")
public Object[][] mergeAllelesData() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{new GetAllelesTest("A*", Aref)});
tests.add(new Object[]{new GetAllelesTest("A*/C", Aref, C)});
tests.add(new Object[]{new GetAllelesTest("A*/C/T", Aref, C, T)});
tests.add(new Object[]{new GetAllelesTest("A*/T/C", Aref, T, C)});
tests.add(new Object[]{new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC)});
tests.add(new Object[]{new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC)});
tests.add(new Object[]{new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C)});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "getAlleles")
public void testMergeAlleles(GetAllelesTest cfg) {
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
Assert.assertEquals(vc.getAlternateAlleles(), altAlleles, "VC alt alleles not the same as input alt alleles");
for ( int i = 0; i < cfg.alleles.size(); i++ ) {
final Allele inputAllele = cfg.alleles.get(i);
Assert.assertTrue(vc.hasAllele(inputAllele));
if ( inputAllele.isReference() ) {
final Allele nonRefVersion = Allele.create(inputAllele.getBases(), false);
Assert.assertTrue(vc.hasAllele(nonRefVersion, true));
Assert.assertFalse(vc.hasAllele(nonRefVersion, false));
}
Assert.assertEquals(inputAllele, vc.getAllele(inputAllele.getBaseString()));
Assert.assertEquals(inputAllele, vc.getAllele(inputAllele.getBases()));
if ( i > 0 ) { // it's an alt allele
Assert.assertEquals(inputAllele, vc.getAlternateAllele(i-1));
}
}
final Allele missingAllele = Allele.create("AACCGGTT"); // does not exist
Assert.assertNull(vc.getAllele(missingAllele.getBases()));
Assert.assertFalse(vc.hasAllele(missingAllele));
Assert.assertFalse(vc.hasAllele(missingAllele, true));
}
private class SitesAndGenotypesVC {
VariantContext vc, copy;
String name;
private SitesAndGenotypesVC(String name, VariantContext original) {
this.name = name;
this.vc = original;
this.copy = new VariantContextBuilder(original).make();
}
public String toString() {
return String.format("%s input=%s", name, vc);
}
}
@DataProvider(name = "SitesAndGenotypesVC")
public Object[][] MakeSitesAndGenotypesVCs() {
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
VariantContext sites = new VariantContextBuilder("sites", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).make();
VariantContext genotypes = new VariantContextBuilder(sites).source("genotypes").genotypes(g1, g2, g3).make();
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{new SitesAndGenotypesVC("sites", sites)});
tests.add(new Object[]{new SitesAndGenotypesVC("genotypes", genotypes)});
return tests.toArray(new Object[][]{});
}
// --------------------------------------------------------------------------------
//
// Test modifying routines
//
// --------------------------------------------------------------------------------
@Test(dataProvider = "SitesAndGenotypesVC")
public void runModifyVCTests(SitesAndGenotypesVC cfg) {
VariantContext modified = new VariantContextBuilder(cfg.vc).loc("chr2", 123, 123).make();
Assert.assertEquals(modified.getChr(), "chr2");
Assert.assertEquals(modified.getStart(), 123);
Assert.assertEquals(modified.getEnd(), 123);
modified = new VariantContextBuilder(cfg.vc).id("newID").make();
Assert.assertEquals(modified.getID(), "newID");
Set<String> newFilters = Collections.singleton("newFilter");
modified = new VariantContextBuilder(cfg.vc).filters(newFilters).make();
Assert.assertEquals(modified.getFilters(), newFilters);
// test the behavior when the builder's attribute object is null
modified = new VariantContextBuilder(modified).attributes(null).make();
Assert.assertTrue(modified.getAttributes().isEmpty());
modified = new VariantContextBuilder(modified).attributes(null).rmAttribute("AC").make();
Assert.assertTrue(modified.getAttributes().isEmpty());
modified = new VariantContextBuilder(modified).attributes(null).attribute("AC", 1).make();
Assert.assertEquals(modified.getAttribute("AC"), 1);
// test the behavior when the builder's attribute object is not initialized
modified = new VariantContextBuilder(modified.getSource(), modified.getChr(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make();
// test normal attribute modification
modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make();
Assert.assertEquals(modified.getAttribute("AC"), 1);
modified = new VariantContextBuilder(modified).attribute("AC", 2).make();
Assert.assertEquals(modified.getAttribute("AC"), 2);
Genotype g1 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT2", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT2", Arrays.asList(T, T));
GenotypesContext gc = GenotypesContext.create(g1,g2,g3);
modified = new VariantContextBuilder(cfg.vc).genotypes(gc).make();
Assert.assertEquals(modified.getGenotypes(), gc);
modified = new VariantContextBuilder(cfg.vc).noGenotypes().make();
Assert.assertTrue(modified.getGenotypes().isEmpty());
// test that original hasn't changed
Assert.assertEquals(cfg.vc.getChr(), cfg.copy.getChr());
Assert.assertEquals(cfg.vc.getStart(), cfg.copy.getStart());
Assert.assertEquals(cfg.vc.getEnd(), cfg.copy.getEnd());
Assert.assertEquals(cfg.vc.getAlleles(), cfg.copy.getAlleles());
Assert.assertEquals(cfg.vc.getAttributes(), cfg.copy.getAttributes());
Assert.assertEquals(cfg.vc.getID(), cfg.copy.getID());
Assert.assertEquals(cfg.vc.getGenotypes(), cfg.copy.getGenotypes());
Assert.assertEquals(cfg.vc.getLog10PError(), cfg.copy.getLog10PError());
Assert.assertEquals(cfg.vc.getFilters(), cfg.copy.getFilters());
}
// --------------------------------------------------------------------------------
//
// Test subcontext
//
// --------------------------------------------------------------------------------
private class SubContextTest {
Set<String> samples;
boolean updateAlleles;
private SubContextTest(Collection<String> samples, boolean updateAlleles) {
this.samples = new HashSet<String>(samples);
this.updateAlleles = updateAlleles;
}
public String toString() {
return String.format("%s samples=%s updateAlleles=%b", "SubContextTest", samples, updateAlleles);
}
}
@DataProvider(name = "SubContextTest")
public Object[][] MakeSubContextTest() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( boolean updateAlleles : Arrays.asList(true, false)) {
tests.add(new Object[]{new SubContextTest(Collections.<String>emptySet(), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Collections.singleton("MISSING"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Collections.singleton("AA"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Collections.singleton("AT"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Collections.singleton("TT"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Arrays.asList("AA", "AT"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Arrays.asList("AA", "AT", "TT"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Arrays.asList("AA", "AT", "MISSING"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Arrays.asList("AA", "AT", "TT", "MISSING"), updateAlleles)});
tests.add(new Object[]{new SubContextTest(Arrays.asList("AA", "AT", "AC"), updateAlleles)});
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "SubContextTest")
public void runSubContextTest(SubContextTest cfg) {
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("AC", Arrays.asList(Aref, C));
GenotypesContext gc = GenotypesContext.create(g1, g2, g3, g4);
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, C, T)).genotypes(gc).make();
VariantContext sub = vc.subContextFromSamples(cfg.samples, cfg.updateAlleles);
// unchanged attributes should be the same
Assert.assertEquals(sub.getChr(), vc.getChr());
Assert.assertEquals(sub.getStart(), vc.getStart());
Assert.assertEquals(sub.getEnd(), vc.getEnd());
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
Assert.assertEquals(sub.getFilters(), vc.getFilters());
Assert.assertEquals(sub.getID(), vc.getID());
Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
Set<Genotype> expectedGenotypes = new HashSet<Genotype>();
if ( cfg.samples.contains(g1.getSampleName()) ) expectedGenotypes.add(g1);
if ( cfg.samples.contains(g2.getSampleName()) ) expectedGenotypes.add(g2);
if ( cfg.samples.contains(g3.getSampleName()) ) expectedGenotypes.add(g3);
if ( cfg.samples.contains(g4.getSampleName()) ) expectedGenotypes.add(g4);
GenotypesContext expectedGC = GenotypesContext.copy(expectedGenotypes);
// these values depend on the results of sub
if ( cfg.updateAlleles ) {
// do the work to see what alleles should be here, and which not
List<Allele> expectedAlleles = new ArrayList<Allele>();
expectedAlleles.add(Aref);
Set<Allele> genotypeAlleles = new HashSet<Allele>();
for ( final Genotype g : expectedGC )
genotypeAlleles.addAll(g.getAlleles());
genotypeAlleles.remove(Aref);
// ensure original allele order
for (Allele allele: vc.getAlleles())
if (genotypeAlleles.contains(allele))
expectedAlleles.add(allele);
Assert.assertEquals(sub.getAlleles(), expectedAlleles);
} else {
// not updating alleles -- should be the same
Assert.assertEquals(sub.getAlleles(), vc.getAlleles());
}
// same sample names => success
Assert.assertTrue(sub.getGenotypes().getSampleNames().equals(expectedGC.getSampleNames()));
}
// --------------------------------------------------------------------------------
//
// Test sample name functions
//
// --------------------------------------------------------------------------------
private class SampleNamesTest {
List<String> sampleNames;
List<String> sampleNamesInOrder;
private SampleNamesTest(List<String> sampleNames, List<String> sampleNamesInOrder) {
this.sampleNamesInOrder = sampleNamesInOrder;
this.sampleNames = sampleNames;
}
public String toString() {
return String.format("%s samples=%s order=%s", "SampleNamesTest", sampleNames, sampleNamesInOrder);
}
}
@DataProvider(name = "SampleNamesTest")
public Object[][] MakeSampleNamesTest() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("1"), Arrays.asList("1"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("2", "1"), Arrays.asList("1", "2"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("1", "2"), Arrays.asList("1", "2"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("1", "2", "3"), Arrays.asList("1", "2", "3"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("2", "1", "3"), Arrays.asList("1", "2", "3"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("2", "3", "1"), Arrays.asList("1", "2", "3"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("3", "1", "2"), Arrays.asList("1", "2", "3"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("3", "2", "1"), Arrays.asList("1", "2", "3"))});
tests.add(new Object[]{new SampleNamesTest(Arrays.asList("NA2", "NA1"), Arrays.asList("NA1", "NA2"))});
return tests.toArray(new Object[][]{});
}
private final static void assertGenotypesAreInOrder(Iterable<Genotype> gIt, List<String> names) {
int i = 0;
for ( final Genotype g : gIt ) {
Assert.assertEquals(g.getSampleName(), names.get(i), "Unexpected genotype ordering");
i++;
}
}
@Test(dataProvider = "SampleNamesTest")
public void runSampleNamesTest(SampleNamesTest cfg) {
GenotypesContext gc = GenotypesContext.create(cfg.sampleNames.size());
for ( final String name : cfg.sampleNames ) {
gc.add(GenotypeBuilder.create(name, Arrays.asList(Aref, T)));
}
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
// same sample names => success
Assert.assertTrue(vc.getSampleNames().equals(new HashSet<String>(cfg.sampleNames)), "vc.getSampleNames() = " + vc.getSampleNames());
Assert.assertEquals(vc.getSampleNamesOrderedByName(), cfg.sampleNamesInOrder, "vc.getSampleNamesOrderedByName() = " + vc.getSampleNamesOrderedByName());
assertGenotypesAreInOrder(vc.getGenotypesOrderedByName(), cfg.sampleNamesInOrder);
assertGenotypesAreInOrder(vc.getGenotypesOrderedBy(cfg.sampleNames), cfg.sampleNames);
}
@Test
public void testGenotypeCounting() {
Genotype noCall = GenotypeBuilder.create("nocall", Arrays.asList(Allele.NO_CALL));
Genotype mixed = GenotypeBuilder.create("mixed", Arrays.asList(Aref, Allele.NO_CALL));
Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(Aref, Aref));
Genotype het = GenotypeBuilder.create("het", Arrays.asList(Aref, T));
Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(T, T));
List<Genotype> allGenotypes = Arrays.asList(noCall, mixed, homRef, het, homVar);
final int nCycles = allGenotypes.size() * 10;
for ( int i = 0; i < nCycles; i++ ) {
int nNoCall = 0, nNoCallAlleles = 0, nA = 0, nT = 0, nMixed = 0, nHomRef = 0, nHet = 0, nHomVar = 0;
int nSamples = 0;
GenotypesContext gc = GenotypesContext.create();
for ( int j = 0; j < i; j++ ) {
nSamples++;
Genotype g = allGenotypes.get(j % allGenotypes.size());
final String name = String.format("%s_%d%d", g.getSampleName(), i, j);
gc.add(GenotypeBuilder.create(name, g.getAlleles()));
switch ( g.getType() ) {
case NO_CALL: nNoCall++; nNoCallAlleles++; break;
case HOM_REF: nA += 2; nHomRef++; break;
case HET: nA++; nT++; nHet++; break;
case HOM_VAR: nT += 2; nHomVar++; break;
case MIXED: nA++; nNoCallAlleles++; nMixed++; break;
default: throw new RuntimeException("Unexpected genotype type " + g.getType());
}
}
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
Assert.assertEquals(vc.getNSamples(), nSamples);
if ( nSamples > 0 ) {
Assert.assertEquals(vc.isPolymorphicInSamples(), nT > 0);
Assert.assertEquals(vc.isMonomorphicInSamples(), nT == 0);
}
Assert.assertEquals(vc.getCalledChrCount(), nA + nT);
Assert.assertEquals(vc.getCalledChrCount(Allele.NO_CALL), nNoCallAlleles);
Assert.assertEquals(vc.getCalledChrCount(Aref), nA);
Assert.assertEquals(vc.getCalledChrCount(T), nT);
Assert.assertEquals(vc.getNoCallCount(), nNoCall);
Assert.assertEquals(vc.getHomRefCount(), nHomRef);
Assert.assertEquals(vc.getHetCount(), nHet);
Assert.assertEquals(vc.getHomVarCount(), nHomVar);
Assert.assertEquals(vc.getMixedCount(), nMixed);
}
}
@Test
public void testSetAttribute() {
VariantContextBuilder builder = new VariantContextBuilder();
builder.attribute("Test", "value");
}
}