package edu.mayo.bior.pipeline;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.*;
import com.tinkerpop.pipes.Pipe;
import edu.mayo.pipes.JSON.DrillPipe;
import edu.mayo.pipes.JSON.tabix.SameVariantPipe;
import edu.mayo.pipes.UNIX.CatPipe;
import edu.mayo.pipes.UNIX.GrepEPipe;
import edu.mayo.pipes.UNIX.GrepPipe;
import edu.mayo.pipes.bioinformatics.VCF2VariantPipe;
import edu.mayo.pipes.history.*;
import edu.mayo.pipes.util.metadata.Metadata;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.tinkerpop.pipes.util.Pipeline;
import edu.mayo.pipes.PrintPipe;
import edu.mayo.pipes.util.test.FileCompareUtils;
import edu.mayo.pipes.util.test.PipeTestUtils;
/*
* Tests if VCFGeneratorPipe is converting a tab BIOR generated data into VCF and headers are added
*
*/
public class VCFGeneratorPipeTest {
@Before
public void clearStaticHistoryBefore(){
History.clearMetaData(); // clean up after ourselves
}
@After
public void clearStaticHistoryAfter(){
History.clearMetaData(); // clean up after ourselves
}
@Test
public void testVCFGeneratorPipeWithoutMetadata() {
List<String> input = Arrays.asList(
"##Header start",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tBIOR.SNPeff.Effect",
"1\t10144\trs144773400\tTA\tT\t.\t.\t.\tDeleterious"
);
Pipeline pipe = new Pipeline(
new HistoryInPipe(),
new VCFGeneratorPipe(),
new HistoryOutPipe()
//new PrintPipe()
);
pipe.setStarts(input);
//pipe.setStarts(Arrays.asList("src/test/resources/testData/metadata/validvcf.vcf"));
List<String> expected = Arrays.asList(
"##Header start",
"##INFO=<ID=BIOR.SNPeff.Effect,Number=.,Type=String,Description=\"BioR property file missing description\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
"1\t10144\trs144773400\tTA\tT\t.\t.\tBIOR.SNPeff.Effect=Deleterious"
);
List<String> actual = PipeTestUtils.getResults(pipe);
for(int i=0;i<expected.size();i++){
assertEquals(expected.get(i), actual.get(i));
}
}
public final List<String> header = Arrays.asList(
"##fileformat=VCFv4.0",
"##BIOR=<ID=\"bior.ToTJson\",Operation=\"bior_vcf_to_tjson\",DataType=\"JSON\",ShortUniqueName=\"ToTJson\">",
"##BIOR=<ID=\"bior.brca1.dbsnp.tsv.gz\",Operation=\"bior_same_variant\",DataType=\"JSON\",ShortUniqueName=\"brca1.dbsnp.tsv.gz\",Path=\"REPLACEMEbrca1.dbsnp.tsv.gz\">",
"##BIOR=<ID=\"bior.genes\",Operation=\"bior_overlap\",DataType=\"JSON\",ShortUniqueName=\"genes\",Path=\"REPLACEMEgenes.tsv.bgz\">",
"##BIOR=<ID=\"bior.genes.HGNC\",Operation=\"bior_drill\",DataType=\"String\",Field=\"HGNC\",FieldDescription=\"\",ShortUniqueName=\"genes\",Path=\"REPLACEMEgenes.tsv.bgz\">",
"##BIOR=<ID=\"bior.genes.2\",Operation=\"bior_lookup\",DataType=\"JSON\",ShortUniqueName=\"genes\",Path=\"REPLACEMEgenes.tsv.bgz\">",
"##BIOR=<ID=\"bior.vep\",Operation=\"bior_vep\",DataType=\"JSON\",ShortUniqueName=\"vep\",Description=\"Tool from Ensembl that predicts the functional consequences of known and unknown variants.\",Version=\"2.7\",Build=\"Ensembl Release 69\",DataSourceProperties=\"REPLACEMEvep.datasource.properties\",ColumnProperties=\"REPLACEMEvep.column.properties\">",
"##BIOR=<ID=\"bior.snpeff\",Operation=\"bior_snpeff\",DataType=\"JSON\",ShortUniqueName=\"snpeff\",Description=\"Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes).\",Version=\"2.0.5d\",Build=\"GRCh37.64 (default)\",DataSourceProperties=\"REPLACEMEsnpeff.datasource.properties\",ColumnProperties=\"REPLACEMEsnpeff.column.properties\">",
"##BIOR=<ID=\"bior.snpeff.Effect_impact\",Operation=\"bior_drill\",DataType=\"String\",Field=\"Effect_impact\",FieldDescription=\"\",ShortUniqueName=\"snpeff\",Description=\"Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes).\",Version=\"2.0.5d\",Build=\"GRCh37.64 (default)\",DataSourceProperties=\"REPLACEMEsnpeff.datasource.properties\",ColumnProperties=\"REPLACEMEsnpeff.column.properties\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tbior.ToTJson\tbior.brca1.dbsnp.tsv.gz\tbior.genes.HGNC\tbior.genes.2\tbior.vep\tbior.snpeff.Effect_impact"
);
@Test
public void testGetBIORColumnsFromMetadata(){
String[] orig = header.get(9).split("\t");//split the header line
VCFGeneratorPipe v = new VCFGeneratorPipe();
List<String> col = v.getBIORColumnsFromMetadata(header);
for(int i=0; i<orig.length; i++){
//System.out.println(orig[i]);
if(orig[i].startsWith("bior.")){
assertTrue(col.contains(orig[i]));
}
}
}
public final List<String> data = Arrays.asList(
"21\t26960070\trs116645811\tG\tA\t.\t.\t.\t{\"CHROM\":\"21\",\"POS\":\"26960070\",\"ID\":\"rs116645811\",\"REF\":\"G\",\"ALT\":\"A\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\".\":true},\"_id\":\"rs116645811\",\"_type\":\"variant\",\"_landmark\":\"21\",\"_refAllele\":\"G\",\"_altAlleles\":[\"A\"],\"_minBP\":26960070,\"_maxBP\":26960070}\t{}\t.\t{}\t{\"Allele\":\"A\",\"Gene\":\"ENSG00000154719\",\"Feature\":\"ENST00000307301\",\"Feature_type\":\"Transcript\",\"Consequence\":\"missense_variant\",\"cDNA_position\":\"1043\",\"CDS_position\":\"1001\",\"Protein_position\":\"334\",\"Amino_acids\":\"T/M\",\"Codons\":\"aCg/aTg\",\"HGNC\":\"MRPL39\",\"SIFT\":\"tolerated(0.05)\",\"PolyPhen\":\"benign(0.001)\",\"SIFT_TERM\":\"tolerated\",\"SIFT_Score\":0.05,\"PolyPhen_TERM\":\"benign\",\"PolyPhen_Score\":0.001}\tMODERATE",
"22\t29138293\trs17885497\tT\tC\t.\t.\tThisColumnShouldNotBeAltered\t{\"CHROM\":\"22\",\"POS\":\"29138293\",\"ID\":\"rs17885497\",\"REF\":\"T\",\"ALT\":\"C\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"ThisColumnShouldNotBeAltered\":true},\"_id\":\"rs17885497\",\"_type\":\"variant\",\"_landmark\":\"22\",\"_refAllele\":\"T\",\"_altAlleles\":[\"C\"],\"_minBP\":29138293,\"_maxBP\":29138293}\t{}\t.\t{}\t{}\tLOW",
"21\t38439640\trs73901833\tT\tC\t.\t.\tA column with spaces in it (this should not be passed to VEP, otherwise it will cause problems). This line has multiple sift and polyphen scores\t{\"CHROM\":\"21\",\"POS\":\"38439640\",\"ID\":\"rs73901833\",\"REF\":\"T\",\"ALT\":\"C\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"A column with spaces in it (this should not be passed to VEP, otherwise it will cause problems). This line has multiple sift and polyphen scores\":true},\"_id\":\"rs73901833\",\"_type\":\"variant\",\"_landmark\":\"21\",\"_refAllele\":\"T\",\"_altAlleles\":[\"C\"],\"_minBP\":38439640,\"_maxBP\":38439640}\t{}\t.\t{}\t{\"Allele\":\"C\",\"Gene\":\"ENSG00000185808\",\"Feature\":\"ENST00000399098\",\"Feature_type\":\"Transcript\",\"Consequence\":\"missense_variant\",\"cDNA_position\":\"555\",\"CDS_position\":\"118\",\"Protein_position\":\"40\",\"Amino_acids\":\"I/V\",\"Codons\":\"Ata/Gta\",\"HGNC\":\"PIGP\",\"SIFT\":\"tolerated(0.79)\",\"PolyPhen\":\"benign(0.003)\",\"SIFT_TERM\":\"tolerated\",\"SIFT_Score\":0.79,\"PolyPhen_TERM\":\"benign\",\"PolyPhen_Score\":0.003}\tMODERATE",
"17\t41209681\t41209681\tT\tA\t.\t.\tA variant within the BRCA1 gene \t{\"CHROM\":\"17\",\"POS\":\"41209681\",\"ID\":\"41209681\",\"REF\":\"T\",\"ALT\":\"A\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"A variant within the BRCA1 gene\":true},\"_id\":\"41209681\",\"_type\":\"variant\",\"_landmark\":\"17\",\"_refAllele\":\"T\",\"_altAlleles\":[\"A\"],\"_minBP\":41209681,\"_maxBP\":41209681}\t{\"CHROM\":\"17\",\"POS\":\"41209681\",\"ID\":\"rs181436152\",\"REF\":\"T\",\"ALT\":\"A\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"RSPOS\":41209681,\"GMAF\":5.0E-4,\"dbSNPBuildID\":135,\"SSR\":0,\"SAO\":0,\"VP\":\"050000080005000014000100\",\"GENEINFO\":\"BRCA1:672\",\"WGT\":1,\"VC\":\"SNV\",\"INT\":true,\"ASP\":true,\"KGPhase1\":true,\"KGPROD\":true},\"_id\":\"rs181436152\",\"_type\":\"variant\",\"_landmark\":\"17\",\"_refAllele\":\"T\",\"_altAlleles\":[\"A\"],\"_minBP\":41209681,\"_maxBP\":41209681}\t1100\t{\"_type\":\"gene\",\"_landmark\":\"17\",\"_strand\":\"-\",\"_minBP\":41196312,\"_maxBP\":41277500,\"gene\":\"BRCA1\",\"gene_synonym\":\"BRCAI; BRCC1; BROVCA1; IRIS; PNCA4; PPP1R53; PSCP; RNF53\",\"note\":\"breast cancer 1, early onset; Derived by automated computational analysis using gene prediction method: BestRefseq.\",\"GeneID\":\"672\",\"HGNC\":\"1100\",\"HPRD\":\"00218\",\"MIM\":\"113705\"}\t{}\tMODIFIER"
);
public final List<String> colmns = Arrays.asList(
"bior.ToTJson",
"bior.brca1.dbsnp.tsv.gz",
"bior.genes.HGNC",
"bior.genes.2",
"bior.vep",
"bior.snpeff.Effect_impact"
);
@Test
public void testGetBIORColumnsFromHeader(){
VCFGeneratorPipe v = new VCFGeneratorPipe();
createHistory();
List<String> col = v.getBIORColumnsFromHeader(History.getMetaData().getColumns(), v.getBIORColumnsFromMetadata(header));
//ensure the sets are equal
for(String c : col){
assertTrue(colmns.contains(c));
}
for(String c1 : colmns){
assertTrue(col.contains(c1));
}
}
@Test
public void testGetBiorColumnsIndexes(){
VCFGeneratorPipe v = new VCFGeneratorPipe();
createHistory();
List<String> orig = v.getBIORColumnsFromMetadata(header); //all the column headers
Map<Integer,String> kv = v.getBiorColumnsIndexes(new History(), orig); //init static vars above
Integer count = 8;
for(Integer key : kv.keySet()){
String value = kv.get(key);
assertEquals(key, count);
assertEquals(colmns.get(count-8), value);
count++;
}
}
@Test
public void testInfoDataPair(){
VCFGeneratorPipe v = new VCFGeneratorPipe();
createHistory();
v.populateHeaderLinesForHeaderKeys(); //need to call this, never a problem in a pipeline
String inf1 = v.infoDataPair("foo", "bar");
assertEquals(";foo=bar",inf1);
String inf2 = v.infoDataPair("foo", "a,b,c");
assertEquals(";foo=a|b|c",inf2);
String inf3 = v.infoDataPair("foo", "a b c");
assertEquals(";foo=a_b__c",inf3);
String inf4 = v.infoDataPair("foo", "a;b;c");
assertEquals(";foo=a|b|c",inf4);
String inf5 = v.infoDataPair("foo", "a:b=c");
assertEquals(";foo=a:b:c",inf5);
String inf6 = v.infoDataPair("foo", "a -_b=,c;d");
assertEquals(";foo=a_-_b:|c|d",inf6);
}
@Test
public void testRemoveColumnHeader(){
VCFGeneratorPipe v = new VCFGeneratorPipe();
createHistory();
List<String> orig = v.getBIORColumnsFromMetadata(header); //all the bior column headers
orig.add("INFO"); //add another just to test functionality needed for bior_annotate
Map<Integer,String> biorindexes = v.getBiorColumnsIndexes(new History(), orig); //init static vars above
HistoryMetaData hmd = v.removeColumnHeader(History.getMetaData(), biorindexes);
//System.out.println(hmd.getColumns().size());
for(ColumnMetaData cmd : hmd.getColumns()){
String col = cmd.getColumnName();
assertTrue(!col.contains("bior."));
assertTrue(!col.contains("INFO"));
}
}
public final List<String> happypath = Arrays.asList(
"##fileformat=VCFv4.0",
"##INFO=<ID=bior.genes.HGNC,Number=.,Type=String,Description=\"BioR property file missing description\">",
"##INFO=<ID=bior.snpeff.Effect_impact,Number=.,Type=String,Description=\"Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes).\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tbior.ToTJson\tbior.brca1.dbsnp.tsv.gz\tbior.genes.HGNC\tbior.genes.2\tbior.vep\tbior.snpeff.Effect_impact"
);
@Test
public void testAddColumnheadersHappyPath(){
VCFGeneratorPipe v = new VCFGeneratorPipe();
createHistory();
List<String> orig = v.getBIORColumnsFromMetadata(header); //all the bior column headers
Map<Integer,String> biorindexes = v.getBiorColumnsIndexes(new History(), orig); //init static vars above
HistoryMetaData hmd = v.removeColumnHeader(History.getMetaData(), biorindexes);
//first happy path - there exists a ##BIOR line for each column header
v.addColumnheaders(hmd.getOriginalHeader(),null, null);
int i = 0;
for (String s : hmd.getOriginalHeader()) {
assertEquals(happypath.get(i), s); //note that it is ok that #CHROM line is wrong, because HistoryOutPipe will clean that up for us.
i++;
}
//not in the metadata, but need to build an ##INFO - build default info string
//in the metadata but there is not in column header
}
public final List<String> notmetaInInfo = Arrays.asList(
"##fileformat=VCFv4.0",
"##INFO=<ID=bior.genes.HGNC,Number=.,Type=String,Description=\"BioR property file missing description\">",
"##INFO=<ID=bior.snpeff.Effect_impact,Number=.,Type=String,Description=\"Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes).\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tbior.ToTJson\tbior.brca1.dbsnp.tsv.gz\tbior.genes.HGNC\tbior.genes.2\tbior.vep\tbior.snpeff.Effect_impact"
);
@Test
public void testAddColumnheadersNotInMetadata(){
VCFGeneratorPipe v = new VCFGeneratorPipe();
createHistory();
List<String> orig = v.getBIORColumnsFromMetadata(header); //all the bior column headers
Map<Integer,String> biorindexes = v.getBiorColumnsIndexes(new History(), orig); //init the static vars above
HistoryMetaData hmd = v.removeColumnHeader(History.getMetaData(), biorindexes);
//not in the metadata, but need to build an ##INFO - build default info string
v.addColumnheaders(hmd.getOriginalHeader(),null, null);
int i = 0;
for (String s : hmd.getOriginalHeader()) {
assertEquals(notmetaInInfo.get(i), s); //note that it is ok that #CHROM line is wrong, because HistoryOutPipe will clean that up for us.
i++;
}
//in the metadata but there is not in column header
}
public void createHistory(){
Pipeline p = new Pipeline(
new HistoryInPipe(),
new HistoryOutPipe()
);
p.setStarts(createInput());
while(p.hasNext()){
p.next();
}
}
public List<String> createInput(){
ArrayList<String> input = new ArrayList<String>();
input.addAll(header);
input.addAll(data);
return input;
}
public final List<String> output = Arrays.asList(
"##fileformat=VCFv4.0",
"##INFO=<ID=bior.genes.HGNC,Number=.,Type=String,Description=\"BioR property file missing description\">",
"##INFO=<ID=bior.snpeff.Effect_impact,Number=.,Type=String,Description=\"Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes).\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
"21\t26960070\trs116645811\tG\tA\t.\t.\tbior.snpeff.Effect_impact=MODERATE",
"22\t29138293\trs17885497\tT\tC\t.\t.\tThisColumnShouldNotBeAltered;bior.snpeff.Effect_impact=LOW",
"21\t38439640\trs73901833\tT\tC\t.\t.\tA column with spaces in it (this should not be passed to VEP, otherwise it will cause problems). This line has multiple sift and polyphen scores;bior.snpeff.Effect_impact=MODERATE",
"17\t41209681\t41209681\tT\tA\t.\t.\tA variant within the BRCA1 gene ;bior.genes.HGNC=1100;bior.snpeff.Effect_impact=MODIFIER"
);
@Test
public void testCurrentInput(){
Pipeline p = new Pipeline(
new HistoryInPipe(),
new VCFGeneratorPipe(),
new HistoryOutPipe()
//new PrintPipe()
);
p.setStarts(createInput());
for(int i=0; p.hasNext(); i++){
String s = (String)p.next();
assertEquals(output.get(i), s);
}
}
@Test
public void testRemoveAnnotation(){
String result = data.get(1).replaceAll("\\{.*","");
this.createHistory();
History h = new History();
VCFGeneratorPipe v = new VCFGeneratorPipe();
List<String> orig = v.getBIORColumnsFromMetadata(header); //all the bior column headers
Map<Integer,String> biorindexes = v.getBiorColumnsIndexes(h, orig); //init the static vars above
List<String> elements = Arrays.asList( data.get(1).split("\t") );
for(String s: elements){
h.add(s);
}
v.removeAnnotationColumns(h, biorindexes);
StringBuilder sb = new StringBuilder();
for(String s : h){
sb.append(s);
sb.append("\t");
}
assertEquals(result, sb.toString());
}
public final List<String> correctHeaderOut = Arrays.asList(
"##INFO=<ID=bior.dbSNP137.INFO.RSPOS,Number=1,Type=Integer,Description=\"Chromosome position reported in dbSNP\">",
"##INFO=<ID=bior.dbSNP137.INFO.RV,Number=1,Type=Flag,Description=\"RV Desc\">",
"##INFO=<ID=bior.dbSNP137.INFO.GMAF,Number=1,Type=Float,Description=\"GMAF Desc\">"
);
public final List<String> correctDataOut = Arrays.asList(
"AC=39;AF=0.342;AN=114;BaseQRankSum=-2.185;DP=22;Dels=0.00;FS=4.193;HaplotypeScore=0.0000;MLEAC=37;MLEAF=0.325;MQ=70.00;MQ0=0;MQRankSum=-0.282;QD=20.22;ReadPosRankSum=-1.128;bior.dbSNP137.INFO.GMAF=0.4734;bior.dbSNP137.INFO.RSPOS=28218100;bior.dbSNP137.INFO.RV"
);
//head -n 250 /data/VCFExamples/BATCH4.vcf | bior_vcf_to_tjson | bior_same_variant -d /data/catalogs/dbSNP/137/00-All.tsv.bgz | bior_drill -p INFO.RSPOS -p INFO.RV -p INFO.GMAF
@Test
public void testRealData() throws IOException {
int start = 9;
int end = 105;
int[] cut = new int[end-start];
int j = 0;
for(int i=start;i<end;i++){ cut[j] = i; j++; }
String catalog = "src/test/resources/metadata/BATCH4/dbSNP.tsv.bgz";
Metadata tojson = new Metadata("bior_vcf_to_json");
Metadata md = new Metadata(catalog, "bior_same_variant");
History.clearMetaData();
String paths[] = new String[]{"INFO.RSPOS","INFO.RV","INFO.GMAF"};
Metadata mddrill = new Metadata(-1, "bior_drill", false, paths);
ArrayList<Metadata> mdlist = new ArrayList<Metadata>();
mdlist.add(tojson); mdlist.add(md); mdlist.add(mddrill);
//In this example, there are 3 drill paths, all need to be added to the info and one bior_same_variant - that gets dropped
Pipeline p = new Pipeline(
new CatPipe(),
new HistoryInPipe(mdlist),
new VCF2VariantPipe(),
new SameVariantPipe(catalog),
new DrillPipe(false, paths),
new VCFGeneratorPipe(),
//new HCutPipe(cut),
new HistoryOutPipe()
//new GrepEPipe("##"),
//new GrepPipe("#"),
//new PrintPipe()
);
p.setStarts(Arrays.asList("src/test/resources/metadata/BATCH4/BATCH4.vcf"));
boolean chromReached = false;
int dcount = 0;
boolean[] checks = new boolean[4];
for(int i=0; i<checks.length; i++){checks[i] = false;}
String prev = "";
for(int i=0; p.hasNext(); i++){
String s = (String) p.next();
if(s.startsWith("#CHROM")) chromReached = true;
//check that the info lines exist, are in the correct location, and are correctly formatted
if(s.startsWith("##INFO=<ID=bior.dbSNP137.INFO.RSPOS")){
assertEquals(correctHeaderOut.get(0),s);
assertTrue(prev.startsWith("##INFO=<ID=STR,Number=0,Type=Flag,Description=\"Variant is a short tandem repeat\">"));
checks[0] = true;
}
if(s.startsWith("##INFO=<ID=bior.dbSNP137.INFO.RV")){
assertEquals(correctHeaderOut.get(1),s);
checks[1] = true;
}
if(s.startsWith("##INFO=<ID=bior.dbSNP137.INFO.GMAF")){
assertEquals(correctHeaderOut.get(2),s);
checks[2] = true;
}
//check the data lines
if(dcount == 1){
String[] split = s.split("\t");
assertEquals(correctDataOut.get(0),split[7]);
checks[3] = true;
}
//if(i>250) break;
if(chromReached) dcount++;
prev = s;
}
for(int i=0; i<checks.length;i++){
assertTrue(checks[i]);//assert that all of the checks where actually done!
}
}
public final List<String> arrayInput = Arrays.asList(
"##fileformat=VCFv4.0",
"##BIOR=<ID=\"bior.JsonArray\",Operation=\"bior_foo\",Number=.,ShortUniqueName=JsonArray>", //data type not defined to make sure test works for float, string and int
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tbior.JsonArray",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData\t[\"A\",\"B\",\"C\"]",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData\t[1,2,3]",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData\t[1.1,2.2,3.3]"
);
public final List<String> arrayOutput = Arrays.asList(
"##fileformat=VCFv4.0",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData;bior.JsonArray=A,B,C",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData;bior.JsonArray=1,2,3",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData;bior.JsonArray=1.1,2.2,3.3"
);
@Test
public void testSanatizeJSONArray(){
//if the user somehow creates a json array and wants that injected into the info column, then this will test that that is sanitized
System.out.println("Testing Sanatize JSON Array");
Pipeline p = new Pipeline(
new HistoryInPipe(),
new VCFGeneratorPipe(),
new HistoryOutPipe()
);
p.setStarts(arrayInput);
List<String> actual = PipeTestUtils.getResults(p);
PipeTestUtils.assertListsEqual(arrayOutput, actual);
}
@Test
public void testVCFizeAnnotate() throws IOException{
System.out.println("Testing to see if we can VCFize an annotate output");
Pipeline p = new Pipeline(
new CatPipe(),
new HistoryInPipe(),
new VCFGeneratorPipe(),
new HistoryOutPipe()
//new PrintPipe()
);
p.setStarts(Arrays.asList("src/test/resources/vcfizer/annotate.vcf"));
List<String> expected = FileCompareUtils.loadFile("src/test/resources/vcfizer/annotateVcfized.vcf");
List<String> actual = PipeTestUtils.getResults(p);
PipeTestUtils.assertListsEqual(expected, actual);
}
public final List<String> vcfizeCompressInput = Arrays.asList(
"##fileformat=VCFv4.0",
"##BIOR=<ID=\"bior.JsonArray\",Operation=\"bior_compress\",DataType=\"String\",Field=\"JsonArray\",FieldDescription=\"List of Strings\",ShortUniqueName=\"JsonArray\",Delimiter=\"|\",Path=\"REPLACEMEgenes.tsv.bgz\">",
"##BIOR=<ID=\"bior.JsonArray2\",Operation=\"bior_compress\",DataType=\"String\",Field=\"JsonArray2\",FieldDescription=\"List of Numbers\",ShortUniqueName=\"JsonArray2\",Delimiter=\",\",Path=\"REPLACEMEgenes.tsv.bgz\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tbior.JsonArray\tbior.JsonArray2",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData\tA|B|C\t1,2,3"
);
public final List<String> vcfizeCompressOutput = Arrays.asList(
"##fileformat=VCFv4.0",
"##INFO=<ID=bior.JsonArray,Number=.,Type=String,Description=\"List of Strings\">",
"##INFO=<ID=bior.JsonArray2,Number=.,Type=String,Description=\"List of Numbers\">",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
"chr1\t10000\trs00020\tA\tC\t0\t.\tInfoData;bior.JsonArray=A,B,C;bior.JsonArray2=1,2,3"
);
@Test
public void testVCFizeCompress(){
System.out.println("Compress output converts some number fields into strings (usually with pipe) test to make sure that this works");
Pipe p = new Pipeline(
new HistoryInPipe(),
new VCFGeneratorPipe(),
new HistoryOutPipe()
//new PrintPipe()
);
p.setStarts(vcfizeCompressInput);
for(int i=0; p.hasNext();i++){
String s = (String) p.next();
assertEquals(vcfizeCompressOutput.get(i),s);
}
}
}