package com.github.lindenb.jvarkit.tools.gnomad; import java.io.Closeable; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.lang.JvarkitException; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress; import com.github.lindenb.jvarkit.util.vcf.ContigPosRef; import com.github.lindenb.jvarkit.util.vcf.TabixVcfFileReader; import com.github.lindenb.jvarkit.util.vcf.VcfIterator; import htsjdk.samtools.util.AbstractIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.AbstractVCFCodec; import htsjdk.variant.vcf.VCFFilterHeaderLine; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; /** BEGIN_DOC ## Manifest the manifest is a tab delimited file containing 3 columns. It's used to map a contig to a URI * 1st column is a keyword 'exome' or 'genome' * 2d column is a contig name e.g: '1' . Use '*' for 'any' chromosome * 3d column is a URL or file path where to find the data ## Example: ``` curl -s "https://storage.googleapis.com/gnomad-public/release-170228/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz" |\ gunzip -c | head -n 400 |\ java -jar ~/src/jvarkit-git/dist/vcfgnomad.jar -ac -gf IN_GNOMAD (...) 1 13595 . AGT A 379.68 AC0;IN_GNOMAD;RF AB_HIST_ALL=0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;AB_HIST_ALT=0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;AB_MEDIAN=1.44068e-01;AC=0;AC_AFR=0;AC_AMR=0;AC_ASJ=0;AC_EAS=0;AC_FIN=0;AC_Female=0;AC_Male=0;AC_NFE=0;AC_OTH=0;AC_POPMAX=.;AC_SAS=0;AC_raw=1;AF=0.00000e+00;AF_AFR=0.00000e+00;AF_AMR=0.00000e+00;AF_ASJ=0.00000e+00;AF_EAS=0.00000e+00;AF_FIN=0.00000e+00;AF_Female=0.00000e+00;AF_Male=0.00000e+00;AF_NFE=0.00000e+00;AF_OTH=0.00000e+00;AF_POPMAX=.;AF_SAS=0.00000e+00;AF_raw=9.99900e-06;AN=50778;AN_AFR=4986;AN_AMR=10892;AN_ASJ=1274;AN_EAS=7560;AN_FIN=694;AN_Female=24940;AN_Male=25838;AN_NFE=17556;AN_OTH=1486;AN_POPMAX=.;AN_SAS=6330;AN_raw=100010;AS_FilterStatus=RF|AC0;AS_RF=1.49748e-01;BaseQRankSum=-4.60000e-01;CSQ=-|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|||||||||||1|766|-1||deletion|1|HGNC|38034||||||||||||||||||||||||||||||||||||||||||,-|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|||||||||||1|766|-1||deletion|1|HGNC|38034|YES|||||||||||||||||||||||||||||||||||||||||,-|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000450305|transcribed_unprocessed_pseudogene|6/6||ENST00000450305.2:n.561_562delTG||558-559||||||1||1||deletion|1|HGNC|37102|||||||||||||3|||||||||||||||||||||||||||||,-|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript|3/3||ENST00000456328.2:n.847_848delTG||844-845||||||1||1||deletion|1|HGNC|37102|YES||||||||||||3|||||||||||||||||||||||||||||,-|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|||||||||||1|807|-1||deletion|1|HGNC|38034||||||||||||||||||||||||||||||||||||||||||,-|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000515242|transcribed_unprocessed_pseudogene|3/3||ENST00000515242.2:n.840_841delTG||837-838||||||1||1||deletion|1|HGNC|37102|||||||||||||3|||||||||||||||||||||||||||||,-|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000518655|transcribed_unprocessed_pseudogene|3/4||ENST00000518655.2:n.678_679delTG||675-676||||||1||1||deletion|1|HGNC|37102|||||||||||||3|||||||||||||||||||||||||||||,-|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene|||||||||||1|814|-1||deletion|1|HGNC|38034||||||||||||||||||||||||||||||||||||||||||,-|downstream_gene_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|||||||||||1|766|-1||deletion|1|HGNC|38034||||||||||||||||||||||||||||||||||||||||||,-|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001576075|CTCF_binding_site|||||||||||1||||deletion|1||||||||||||||||||||||||||||||||||||||||||||;ClippingRankSum=5.63000e-01;DP=2519792;DP_HIST_ALL=20921|3680|466|85|62|97|652|4365|4551|3656|2891|2039|1464|1114|954|811|688|497|352|310;DP_HIST_ALT=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;DP_MEDIAN=118;DREF_MEDIAN=3.98107e-38;FS=1.59250e+01;GC=25389,0,0;GC_AFR=2493,0,0;GC_AMR=5446,0,0;GC_ASJ=637,0,0;GC_EAS=3780,0,0;GC_FIN=347,0,0;GC_Female=12470,0,0;GC_Male=12919,0,0;GC_NFE=8778,0,0;GC_OTH=743,0,0;GC_SAS=3165,0,0;GC_raw=50004,1,0;GQ_HIST_ALL=11211|8535|2038|2055|803|203|195|95|28|49|65|37|115|64|88|117|164|34|237|23872;GQ_HIST_ALT=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1;GQ_MEDIAN=99;Hom=0;Hom_AFR=0;Hom_AMR=0;Hom_ASJ=0;Hom_EAS=0;Hom_FIN=0;Hom_Female=0;Hom_Male=0;Hom_NFE=0;Hom_OTH=0;Hom_SAS=0;Hom_raw=0;InbreedingCoeff=-4.37000e-02;MQ=3.15600e+01;MQRankSum=-8.97000e-01;POPMAX=.;QD=3.22000e+00;ReadPosRankSum=-1.23200e+00;SOR=1.09000e-01;VQSLOD=-1.83100e+00;VQSR_NEGATIVE_TRAIN_SITE;VQSR_culprit=QD;gnomad.exome.AC_AFR=0;gnomad.exome.AC_AMR=0;gnomad.exome.AC_ASJ=0;gnomad.exome.AC_EAS=0;gnomad.exome.AC_FIN=0;gnomad.exome.AC_Female=0;gnomad.exome.AC_Male=0;gnomad.exome.AC_NFE=0;gnomad.exome.AC_OTH=0;gnomad.exome.AC_raw=1;gnomad.exome.AN_AFR=4986;gnomad.exome.AN_AMR=10892;gnomad.exome.AN_ASJ=1274;gnomad.exome.AN_EAS=7560;gnomad.exome.AN_FIN=694;gnomad.exome.AN_Female=24940;gnomad.exome.AN_Male=25838;gnomad.exome.AN_NFE=17556;gnomad.exome.AN_OTH=1486;gnomad.exome.AN_raw=100010;gnomad.genome.AC_AFR=0;gnomad.genome.AC_AMR=0;gnomad.genome.AC_ASJ=0;gnomad.genome.AC_EAS=0;gnomad.genome.AC_FIN=0;gnomad.genome.AC_Female=0;gnomad.genome.AC_Male=0;gnomad.genome.AC_NFE=0;gnomad.genome.AC_OTH=0;gnomad.genome.AC_raw=1;gnomad.genome.AN_AFR=8680;gnomad.genome.AN_AMR=794;gnomad.genome.AN_ASJ=224;gnomad.genome.AN_EAS=1592;gnomad.genome.AN_FIN=3490;gnomad.genome.AN_Female=13274;gnomad.genome.AN_Male=16168;gnomad.genome.AN_NFE=13754;gnomad.genome.AN_OTH=908;gnomad.genome.AN_raw=30500 ``` ## Note to self: Another alternative with VariantAnnotator, but I think it slower... (javascript / Makefile generation) ```javascript out.print(" ${java.exe} -jar ${gatk.jar} -R $(REF) -L $(addsuffix .tmp.vcf,$@) -T VariantAnnotator --variant $(addsuffix .tmp.vcf,$@) -o $(addsuffix .tmp2.vcf,$@) --resourceAlleleConcordance "); out.print(" --resource:gnomad_exome /commun/data/pubdb/broadinstitute.org/gnomad/release-170228/vcf/exome/gnomad.exomes.r2.0.1.sites.vcf.gz "); out.print("$(foreach A,${GFIELDS}, -E gnomad_exome.${A} ) "); var genome="/commun/data/pubdb/broadinstitute.org/gnomad/release-170228/vcf/genome/gnomad.genomes.r2.0.1.sites."+chrom+".vcf.gz"; out.print("$(if $(realpath "+genome+"), --resource:gnomad_genome "+genome+" $(foreach A,${GFIELDS}, -E gnomad_genome.${A} ) )"); ``` ## Generating jar helper for knime (for the people in my lab) generate big jar ``` $ cd jvarkit $ rm -rf tmp && mkdir tmp && echo '1.jar:2.jar:...N.jar:vcfgnomad.jar' | tr ":" "\n" | sort | uniq | while read F; do unzip -o $F -d tmp ; done && jar cvf vcfgnomad4knime.jar -C tmp . && rm -rf tmp ``` Open KNIME we're going to create the following workflow : http://imgur.com/a/QcrKW * create a new Node `java Snippet` * in the tab 'additional libraries', add 'vcfgnomad4knime.jar'. * in the tab 'java snippet'. Declare the following inputs: `c_CHROM,c_POS,c_REF,c_ALT`, the output string  `GNOMAD`. And insert the following code: ```java // Your custom imports: import com.github.lindenb.jvarkit.tools.gnomad.VcfGnomad.KnimeAdapter; // Enter your code here: System.setProperty("http.proxyHost","cache.ha.univ-nantes.fr"); System.setProperty("https.proxyHost","cache.ha.univ-nantes.fr"); System.setProperty("http.proxyPort","3128"); System.setProperty("https.proxyPort","3128"); final KnimeAdapter app= new KnimeAdapter(); if(app.instanceMain(new String[]{"-ac",c_CHROM,String.valueOf(c_POS),c_REF,c_ALT})==0) { out_GNOMAD = app.getOutputString(); } else { out_GNOMAD = "."; } // expression end ``` END_DOC */ @Program(name="vcfgnomad",description="Peek annotations from gnomad",keywords={"vcf","annotation","gnomad"}) public class VcfGnomad extends Launcher{ private static final Logger LOG = Logger.build(VcfGnomad.class).make(); /** allele specific population in gnomad */ private final static String POPS[]=new String[]{"AFR", "AMR", "ASJ", "EAS", "FIN", "NFE", "OTH", "Male", "Female","SAS", "raw", "POPMAX"}; /** 'ome'-type section */ private enum OmeType {exome,genome}; /** entries mapping chromosome/type->vcf.gz */ private List<ManifestEntry> manifestEntries=new ArrayList<>(); private final Map<String,Integer> contig2tid; private class ManifestEntry implements Closeable { OmeType omeType; String contig; String uri; TabixVcfFileReader gnomadTabix=null; int buffferChromEnd=0; final Map<ContigPosRef,VariantContext> buffer=new HashMap<>(); @Override public void close() throws IOException { CloserUtil.close(gnomadTabix); this.buffer.clear(); this.buffferChromEnd=0; this.gnomadTabix=null; } public void open() throws IOException { this.gnomadTabix=new TabixVcfFileReader(this.uri); } /** find matching variant in tabix file, use a buffer to avoid multiple random accesses */ VariantContext findMatching(final ContigPosRef userCtx) { //past last buffer ? refill buffer if(this.buffferChromEnd <= userCtx.getPos()) { buffer.clear(); this.buffferChromEnd = userCtx.getPos() + VcfGnomad.this.gnomadBufferSize; final Iterator<VariantContext> iter=this.gnomadTabix.iterator( userCtx.getContig(), Math.max(0,userCtx.getPos()-1), this.buffferChromEnd ); while(iter.hasNext()) { final VariantContext ctx = iter.next(); final ContigPosRef key= new ContigPosRef(ctx); this.buffer.put(key,ctx); } CloserUtil.close(iter); } return this.buffer.get(userCtx); } } @Parameter(names={"-o","--output"},description="Output file. Optional . Default: stdout") private File outputFile = null; @Parameter(names={"-m","--manifest"},description="manifest file descibing how to map a contig to an URI . 3 columns: 1) exome|genome 2) contig 3) path or URL.") private File manifestFile=null; @Parameter(names={"-filtered","--filtered"},description="Skip Filtered") private boolean skipFiltered=false; @Parameter(names={"-gf","--gnomadFilter"},description="if defined, add this FILTER when the variant is found in nomad") private String inGnomadFilterName=null; @Parameter(names={"-ac","--alleleconcordance"},description="ALL Alt allele must be found in gnomad before setting a FILTER") private boolean alleleconcordance=false; @Parameter(names={"--noAlleleCount"},description="do Not Insert AC /Allele Count") private boolean doNotInsertAlleleCount=false; @Parameter(names={"--noAlleleNumber"},description="do Not Insert AN /Allele Number") private boolean doNotInsertAlleleNumber=false; @Parameter(names={"--noAlleleFreq"},description="do Not Insert AF /Allele Freq.") private boolean doNotInsertAlleleFreq=false; @Parameter(names={"--bufferSize"},description="When we're looking for variant in Exac, load the variants for 'N' bases instead of doing a random access for each variant") private int gnomadBufferSize=100000; private class InfoField { final OmeType ome; final String tag; final boolean is_AC; final VCFHeaderLineType lineType; final List<Object> attributes=new ArrayList<>(); InfoField(String tag, OmeType ome,boolean is_AC,final VCFHeaderLineType lineType) { this.tag=tag; this.ome=ome; this.is_AC = is_AC; this.lineType=lineType; } public String getOutputTag() { return "gnomad_"+ this.ome.name()+"_"+this.tag; } VCFInfoHeaderLine makeVCFInfoHeaderLine() { if(!is_AC) { return new VCFInfoHeaderLine( getOutputTag(),1, this.lineType, "Field "+this.tag+" extracted from Gnomad ("+ome.name()+")" ); } else { return new VCFInfoHeaderLine( getOutputTag(),VCFHeaderLineCount.A, this.lineType, "Field "+this.tag+" extracted from Gnomad ("+ome.name()+")" ); } } void fill(final VariantContext ctx,final VariantContext gnomadCtx) { this.attributes.clear(); if(!is_AC) { int att=gnomadCtx.getAttributeAsInt(this.tag, -9999); if(att>=0) { this.attributes.add(att); } else { this.attributes.add(null); } } else { final List<Allele> galts=gnomadCtx.getAlternateAlleles(); final List<String> gatts = gnomadCtx.getAttributeAsStringList(this.tag,null); for(final Allele a:ctx.getAlternateAlleles()) { Object found=null; //final int idx=gnomadCtx.getAlleleIndex(a);//non idx(REF)==0 final int idx=galts.indexOf(a); if(idx>=0) { if(idx<gatts.size() && gatts.get(idx)!=null && !gatts.get(idx).equals(".")) { switch(this.lineType) { case Integer: found=Integer.parseInt(gatts.get(idx));break; case Float: found=Float.parseFloat(gatts.get(idx));break; default: throw new JvarkitException.ShouldNeverHappen(this.lineType.name()); } } } this.attributes.add(found); } } } } private String normalizeContig(final String contig) { if(contig.startsWith("chr")) return contig.substring(3); return contig; } @Override protected int doVcfToVcf( final String inputName, final VcfIterator iter, final VariantContextWriter out ) { final ManifestEntry ome2manifest[]=new ManifestEntry[OmeType.values().length]; Arrays.fill(ome2manifest,null); try { final List<InfoField> infoFields=new ArrayList<>(); for(OmeType ome:OmeType.values()) { for(final String pop: POPS) { if(!doNotInsertAlleleCount) infoFields.add(new InfoField("AC_"+pop,ome,true,VCFHeaderLineType.Integer)); if(!doNotInsertAlleleFreq) infoFields.add(new InfoField("AF_"+pop,ome,true,VCFHeaderLineType.Float)); if(!doNotInsertAlleleNumber) infoFields.add(new InfoField("AN_"+pop,ome,pop.equals("POPMAX"),VCFHeaderLineType.Integer)); } if(!doNotInsertAlleleCount) infoFields.add(new InfoField("AC",ome,true,VCFHeaderLineType.Integer)); if(!doNotInsertAlleleFreq) infoFields.add(new InfoField("AF",ome,true,VCFHeaderLineType.Float)); if(!doNotInsertAlleleNumber) infoFields.add(new InfoField("AN",ome,false,VCFHeaderLineType.Integer)); } String prevContig=null; final VCFHeader h2=new VCFHeader(iter.getHeader()); if(inGnomadFilterName!=null) { h2.addMetaDataLine(new VCFFilterHeaderLine(inGnomadFilterName,"Variant is in Gnomad")); } for(final InfoField infoField: infoFields) { h2.addMetaDataLine(infoField.makeVCFInfoHeaderLine()); } out.writeHeader(h2); final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(h2); while(iter.hasNext()) { final VariantContext ctx = progress.watch(iter.next()); if(this.skipFiltered && ctx.isFiltered() ) { out.add(ctx); continue; } final String ensemblContig=normalizeContig(ctx.getContig()); /* CONTIG has changed, update the CONTIG */ if(prevContig==null || !prevContig.equals(ctx.getContig())) { LOG.debug("Data for "+ctx.getContig()); prevContig=ctx.getContig(); for(OmeType ome: OmeType.values()) { ManifestEntry newEntry = null; for(final ManifestEntry e: this.manifestEntries) { if(e.omeType!=ome) continue; if(e.contig.equals("*")) { //accept } else if(!e.contig.equals(ensemblContig)){ continue; } newEntry=e; break; } if(newEntry==null) { LOG.warn("No Gnomad Data for "+ctx.getContig()+" / "+ome); } final ManifestEntry prevEntry = ome2manifest[ome.ordinal()]; if(prevEntry==null && newEntry==null){ ome2manifest[ome.ordinal()]=null; } else if(newEntry!=null && prevEntry!=null && prevEntry.uri.equals(newEntry.uri)) { // no need to re-open //continue with prev entry } else if(newEntry==null && prevEntry!=null) { prevEntry.close(); ome2manifest[ome.ordinal()]=null; } else { if(prevEntry!=null) prevEntry.close(); ome2manifest[ome.ordinal()]=newEntry; LOG.info("opening "+newEntry.uri); newEntry.open(); } } } /** END UPDATE CONTIG */ for(final InfoField infoField: infoFields) { infoField.attributes.clear(); } boolean setfilter=false; // lopp over exome and genome data for(int i=0;i< ome2manifest.length;++i) { ManifestEntry entry = ome2manifest[i]; if(entry==null) continue; final VariantContext ctx2=entry.findMatching(new ContigPosRef(ctx)); if(ctx2==null) continue; for(final InfoField infoField: infoFields) { if(infoField.ome!=entry.omeType) continue; infoField.fill(ctx, ctx2); } if(this.alleleconcordance) { //stream all ALT. return false if we found one ALT that is not found in Gnomad setfilter = !ctx.getAlternateAlleles().stream(). filter(A->!ctx2.getAlternateAlleles().contains(A)). findAny().isPresent(); } else { setfilter=true; } } final VariantContextBuilder vcb=new VariantContextBuilder(ctx); if(setfilter && this.inGnomadFilterName!=null) { vcb.filter(inGnomadFilterName); } for(final InfoField infoField: infoFields) { if(infoField.attributes.isEmpty()) continue; if(!infoField.attributes.stream().filter(N->N!=null).findAny().isPresent()) continue; vcb.attribute(infoField.getOutputTag(), infoField.attributes); } out.add(vcb.make()); } progress.finish(); return 0; } catch (final Exception e) { LOG.error(e); return -1; } finally { CloserUtil.close(Arrays.asList(ome2manifest)); } } public VcfGnomad() { contig2tid=new HashMap<>(25); for(int i=1;i<=22;++i) contig2tid.put(String.valueOf(i), i); contig2tid.put("X",23); contig2tid.put("Y",24); } @Override public int doWork(final List<String> args) { if(this.gnomadBufferSize < 10) { this.gnomadBufferSize = 10; } if(this.manifestFile==null) { LOG.info("Building default manifest file..."); for(OmeType ot: OmeType.values()) { for(int i=1;i<= 23;++i) { final ManifestEntry entry=new ManifestEntry(); entry.omeType=ot; switch(i) { default: entry.contig=String.valueOf(i);break; case 23: entry.contig="X";break; } if(ot==OmeType.genome) { entry.uri = "https://storage.googleapis.com/gnomad-public/release-170228/vcf/genomes/gnomad.genomes.r2.0.1.sites."+entry.contig+".vcf.gz"; } else { entry.uri = "https://storage.googleapis.com/gnomad-public/release-170228/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz"; } this.manifestEntries.add(entry); } } LOG.info("Building default manifest file... Done"); } else { try { Files.lines(this.manifestFile.toPath()).forEach(L->{ if(L.startsWith("#") || L.trim().isEmpty()) return; final String tokens[]=L.split("[\t]"); if(tokens.length<3) throw new JvarkitException.TokenErrors("Expected 3 words",tokens); final ManifestEntry entry=new ManifestEntry(); entry.omeType=OmeType.valueOf(tokens[0]); entry.contig = tokens[1].trim(); entry.uri=tokens[2].trim(); VcfGnomad.this.manifestEntries.add(entry); }); } catch(final IOException err) { LOG.error(err); return -1; } } return doVcfToVcf(args, outputFile); } /** For SolenaLS & Julien in knime. temporary solution */ @Deprecated public static final class KnimeAdapter extends VcfGnomad { private String outputString=""; private static class SingletonVcfIterator extends AbstractIterator<VariantContext> implements VcfIterator { final VCFHeader header=new VCFHeader(); final Iterator<VariantContext> delegate; SingletonVcfIterator(VariantContext ctx) { this.delegate = Collections.singletonList(ctx).iterator(); } @Override public AbstractVCFCodec getCodec() { throw new JvarkitException.ShouldNeverHappen("getCodec"); } @Override public VCFHeader getHeader() { return header; } @Override protected VariantContext advance() { return this.delegate.hasNext()?this.delegate.next():null; } @Override public void close() throws IOException { } } private static class SingletonWriter implements VariantContextWriter { VariantContext variant=null; @Override public void writeHeader(VCFHeader arg0) { } @Override public void add(VariantContext ctx) { this.variant=ctx; } @Override public void close() { } @Override public boolean checkError() {return false;} } @Override protected int doVcfToVcf(final List<String> inputs,final File outorNull) { super.gnomadBufferSize=5; if(inputs.size()!=4) throw new JvarkitException.UserError("expected 4 fields : CHROM/POS/REF/ALTS"); final List<Allele> alleles=new ArrayList<>(); final Allele ref= Allele.create(inputs.get(2),true); final long start = Long.parseLong(inputs.get(1)); alleles.add(ref); alleles.addAll(Arrays.stream(inputs.get(3).split("[,]")).map( S->Allele.create(S,false) ).collect(Collectors.toList())); final VariantContext vc=new VariantContextBuilder("knime", inputs.get(0), start, start + ref.length() -1, alleles ).make(); final SingletonWriter w=new SingletonWriter(); int ret= doVcfToVcf("knime", new SingletonVcfIterator(vc), w); if(ret!=0 || w.variant==null) return -1; for(final String key:w.variant.getAttributes().keySet()) { if(!(this.outputString==null || this.outputString.isEmpty())) { this.outputString+=";"; } this.outputString+=key+"="+String.join(",",w.variant.getAttributeAsStringList(key, ".")); } return ret; } public String getOutputString() { return this.outputString; } public static void main(String[] args) { final KnimeAdapter app= new KnimeAdapter(); if(app.instanceMain(args)==0) { app.stdout().println(app.outputString); }; } } public static void main(String[] args) { new VcfGnomad().instanceMainWithExit(args); } }