/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package edu.mayo.bior.pipeline; import com.tinkerpop.pipes.util.Pipeline; import edu.mayo.pipes.InputStreamPipe; import edu.mayo.pipes.JSON.DrillPipe; import edu.mayo.pipes.JSON.tabix.OverlapPipe; import edu.mayo.pipes.MergePipe; import edu.mayo.pipes.PrintPipe; import edu.mayo.pipes.SplitPipe; import edu.mayo.pipes.UNIX.CatGZPipe; import edu.mayo.pipes.UNIX.CatPipe; import edu.mayo.pipes.UNIX.GrepEPipe; import edu.mayo.pipes.bioinformatics.VCF2VariantPipe; import edu.mayo.pipes.history.HistoryInPipe; import edu.mayo.pipes.history.HistoryOutPipe; import edu.mayo.pipes.util.metadata.Metadata; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.Test; import static org.junit.Assert.assertEquals; /** * * @author m102417 */ public class IntegrationTestSprint1 { public String geneFile = "src/test/resources/genes.tsv.bgz"; //public String dbSNP = "src/test/resources/dbsnp20k.vcf.gz"; public String dbSNP = "src/test/resources/dbsnp20k.vcf.gz"; public final List<String> outputMeta = Arrays.asList( "##fileformat=VCFv4.0", "##fileDate=20120616", "##source=dbSNP", "##dbSNP_BUILD_ID=137", "##reference=GRCh37.p5", "##phasing=partial", "##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "##FILTER=<ID=NC,Description=\"Inconsistent Genotype Submission For At Least One Sample\">", "##BIOR=<ID=\"bior.ToTJson\",Operation=\"bior_vcf_to_json\",DataType=\"JSON\",ShortUniqueName=\"ToTJson\">", "##BIOR=<ID=\"bior.dbsnp20k.vcf.gz\",Operation=\"bior_overlap\",DataType=\"JSON\",ShortUniqueName=\"dbsnp20k.vcf.gz\",Path=\"src/test/resources/dbsnp20k.vcf.gz\">", "##BIOR=<ID=\"bior.dbsnp20k.vcf.gz.gene\",Operation=\"bior_drill\",DataType=\"STRING\",Field=\"gene\",FieldDescription=\"\",ShortUniqueName=\"dbsnp20k.vcf.gz\",Path=\"src/test/resources/dbsnp20k.vcf.gz\">", "#CHROM POS ID REF ALT QUAL FILTER INFO bior.ToTJson bior.dbsnp20k.vcf.gz.gene", "1 11014 rs28484712 G A . . RSPOS=11014;dbSNPBuildID=125;SSR=0;SAO=0;VP=050000000005000002000100;WGT=1;VC=SNV;ASP;OTHERKG {\"CHROM\":\"1\",\"POS\":\"11014\",\"ID\":\"rs28484712\",\"REF\":\"G\",\"ALT\":\"A\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"RSPOS\":11014,\"dbSNPBuildID\":125,\"SSR\":0,\"SAO\":0,\"VP\":\"050000000005000002000100\",\"WGT\":1,\"VC\":\"SNV\",\"ASP\":true,\"OTHERKG\":true},\"_id\":\"rs28484712\",\"_type\":\"variant\",\"_landmark\":\"1\",\"_refAllele\":\"G\",\"_altAlleles\":[\"A\"],\"_minBP\":11014,\"_maxBP\":11014} LOC100506145", "1 11022 rs28775022 G A . . RSPOS=11022;dbSNPBuildID=125;SSR=0;SAO=0;VP=050000000005000002000100;WGT=1;VC=SNV;ASP;OTHERKG {\"CHROM\":\"1\",\"POS\":\"11022\",\"ID\":\"rs28775022\",\"REF\":\"G\",\"ALT\":\"A\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"RSPOS\":11022,\"dbSNPBuildID\":125,\"SSR\":0,\"SAO\":0,\"VP\":\"050000000005000002000100\",\"WGT\":1,\"VC\":\"SNV\",\"ASP\":true,\"OTHERKG\":true},\"_id\":\"rs28775022\",\"_type\":\"variant\",\"_landmark\":\"1\",\"_refAllele\":\"G\",\"_altAlleles\":[\"A\"],\"_minBP\":11022,\"_maxBP\":11022} LOC100506145", "1 11081 rs10218495 G T . . RSPOS=11081;dbSNPBuildID=119;SSR=0;SAO=0;VP=050000000009000102000100;WGT=1;VC=SNV;CFL;GNO;OTHERKG {\"CHROM\":\"1\",\"POS\":\"11081\",\"ID\":\"rs10218495\",\"REF\":\"G\",\"ALT\":\"T\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"RSPOS\":11081,\"dbSNPBuildID\":119,\"SSR\":0,\"SAO\":0,\"VP\":\"050000000009000102000100\",\"WGT\":1,\"VC\":\"SNV\",\"CFL\":true,\"GNO\":true,\"OTHERKG\":true},\"_id\":\"rs10218495\",\"_type\":\"variant\",\"_landmark\":\"1\",\"_refAllele\":\"G\",\"_altAlleles\":[\"T\"],\"_minBP\":11081,\"_maxBP\":11081} LOC100506145", "1 12214 rs202068986 C G . . RSPOS=12214;dbSNPBuildID=137;SSR=0;SAO=0;VP=050000000a05000002000100;WGT=1;VC=SNV;NSM;REF;ASP;OTHERKG {\"CHROM\":\"1\",\"POS\":\"12214\",\"ID\":\"rs202068986\",\"REF\":\"C\",\"ALT\":\"G\",\"QUAL\":\".\",\"FILTER\":\".\",\"INFO\":{\"RSPOS\":12214,\"dbSNPBuildID\":137,\"SSR\":0,\"SAO\":0,\"VP\":\"050000000a05000002000100\",\"WGT\":1,\"VC\":\"SNV\",\"NSM\":true,\"REF\":true,\"ASP\":true,\"OTHERKG\":true},\"_id\":\"rs202068986\",\"_type\":\"variant\",\"_landmark\":\"1\",\"_refAllele\":\"C\",\"_altAlleles\":[\"G\"],\"_minBP\":12214,\"_maxBP\":12214} DDX11L1" ); /** * This test, integrates several components that should work together as scripts piped together, but don't appear to work * from the command line (exit prematurely) * The script that exits is: * zcat 00-All.vcf.gz | bior_vcf_to_tjson | bior_overlap -d /data/catalogs/NCBIGene/GRCh37_p10/genes.tsv.bgz | grep -v "##INFO.*" | grep -v "{}" | bior_drill -p gene */ @Test public void testIntegrationOfComponentsInJVM() throws IOException{ System.out.println("Integration Test of Several Components inside of the JVM"); String[] dpath = new String[]{"gene"}; Metadata tojson = new Metadata("bior_vcf_to_json"); Metadata overlap = new Metadata(dbSNP, "bior_overlap"); Metadata drill = new Metadata(-1, "bior_drill",false, dpath); ArrayList<Metadata> ops = new ArrayList<Metadata>(); ops.add(tojson); ops.add(overlap); ops.add(drill); Pipeline p = new Pipeline( new CatPipe(), //updates from greg allow it to handle zipped files new HistoryInPipe(ops), new VCF2VariantPipe(), new OverlapPipe(geneFile), new DrillPipe(false, dpath), new HistoryOutPipe(), new GrepEPipe("##INFO.*"), // remove all INFO rows new GrepEPipe("\\.$") // remove non-matching rows ); p.setStarts(Arrays.asList(dbSNP)); for(int i=0;i<outputMeta.size();i++){ String s = (String) p.next(); assertEquals(outputMeta.get(i),s.trim()); } } }