package htsjdk.variant.vcf;
import htsjdk.tribble.util.ParsingUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
public class VCFEncoderTest {
@DataProvider(name = "VCFWriterDoubleFormatTestData")
public Object[][] makeVCFWriterDoubleFormatTestData() {
final List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{1.0, "1.00"});
tests.add(new Object[]{10.1, "10.10"});
tests.add(new Object[]{10.01, "10.01"});
tests.add(new Object[]{10.012, "10.01"});
tests.add(new Object[]{10.015, "10.02"});
tests.add(new Object[]{0.0, "0.00"});
tests.add(new Object[]{0.5, "0.500"});
tests.add(new Object[]{0.55, "0.550"});
tests.add(new Object[]{0.555, "0.555"});
tests.add(new Object[]{0.5555, "0.556"});
tests.add(new Object[]{0.1, "0.100"});
tests.add(new Object[]{0.050, "0.050"});
tests.add(new Object[]{0.010, "0.010"});
tests.add(new Object[]{0.012, "0.012"});
tests.add(new Object[]{0.0012, "1.200e-03"});
tests.add(new Object[]{1.2e-4, "1.200e-04"});
tests.add(new Object[]{1.21e-4, "1.210e-04"});
tests.add(new Object[]{1.212e-5, "1.212e-05"});
tests.add(new Object[]{1.2123e-6, "1.212e-06"});
tests.add(new Object[]{Double.POSITIVE_INFINITY, "Infinity"});
tests.add(new Object[]{Double.NEGATIVE_INFINITY, "-Infinity"});
tests.add(new Object[]{Double.NaN, "NaN"});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "VCFWriterDoubleFormatTestData")
public void testVCFWriterDoubleFormatTestData(final double d, final String expected) {
Assert.assertEquals(VCFEncoder.formatVCFDouble(d), expected, "Failed to pretty print double in VCFWriter");
}
@DataProvider(name = "MissingFormatTestData")
public Object[][] makeMissingFormatTestData() {
final VCFHeader header = createSyntheticHeader(Arrays.asList("Sample1"));
final VCFEncoder dropMissing = new VCFEncoder(header, false, false);
final VCFEncoder keepMissing = new VCFEncoder(header, false, true);
final VariantContextBuilder baseVC = new VariantContextBuilder().chr("1").start(1).stop(1).noID().passFilters().log10PError(1).alleles("A", "C");
final GenotypeBuilder baseGT = new GenotypeBuilder("Sample1").alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
final Map<Allele, String> alleleMap = new HashMap<Allele, String>(3);
final List<String> formatKeys = Arrays.asList("GT", "AA", "BB");
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE);
alleleMap.put(Allele.create("A", true), "0");
alleleMap.put(Allele.create("C", false), "1");
final List<Object[]> tests = new ArrayList<Object[]>();
VariantContext vc = baseVC.genotypes(baseGT.attribute("AA", "a").make()).make();
tests.add(new Object[]{dropMissing, vc, "./.:a", alleleMap, formatKeys});
tests.add(new Object[]{keepMissing, vc, "./.:a:.", alleleMap, formatKeys});
baseGT.noAttributes();
vc = baseVC.genotypes(baseGT.attribute("AA", "a").attribute("BB", 2).make()).make();
tests.add(new Object[]{dropMissing, vc, "./.:a:2", alleleMap, formatKeys});
tests.add(new Object[]{keepMissing, vc, "./.:a:2", alleleMap, formatKeys});
baseGT.noAttributes();
vc = baseVC.genotypes(baseGT.make()).make();
tests.add(new Object[]{dropMissing, vc, "./.", alleleMap, formatKeys});
tests.add(new Object[]{keepMissing, vc, "./.:.:.", alleleMap, formatKeys});
baseGT.noAttributes();
vc = baseVC.genotypes(baseGT.attribute("BB", 2).make()).make();
tests.add(new Object[]{dropMissing, vc, "./.:.:2", alleleMap, formatKeys});
tests.add(new Object[]{keepMissing, vc, "./.:.:2", alleleMap, formatKeys});
baseGT.noAttributes();
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "MissingFormatTestData")
public void testMissingFormatFields(final VCFEncoder encoder, final VariantContext vc, final String expectedLastColumn, final Map<Allele, String> alleleMap, final List<String> genotypeFormatKeys) {
final StringBuilder sb = new StringBuilder();
final String[] columns = new String[5];
encoder.addGenotypeData(vc, alleleMap, genotypeFormatKeys, sb);
final int nCol = ParsingUtils.split(sb.toString(), columns, VCFConstants.FIELD_SEPARATOR_CHAR);
Assert.assertEquals(columns[nCol-1], expectedLastColumn, "Format fields don't handle missing data in the expected way");
}
private Set<VCFHeaderLine> createSyntheticMetadata() {
final Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
metaData.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "1"), 0));
metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x"));
metaData.add(new VCFFormatHeaderLine("AA", 1, VCFHeaderLineType.String, "aa"));
metaData.add(new VCFFormatHeaderLine("BB", 1, VCFHeaderLineType.Integer, "bb"));
return metaData;
}
private VCFHeader createSyntheticHeader() {
return new VCFHeader(createSyntheticMetadata());
}
private VCFHeader createSyntheticHeader(final List<String> samples) {
return new VCFHeader(createSyntheticMetadata(), samples);
}
}