package com.github.lindenb.jvarkit.tools.vcfdo; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFFilterHeaderLine; import htsjdk.variant.vcf.VCFHeader; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.util.doid.DiseaseOntoglogyTree; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.github.lindenb.jvarkit.util.vcf.VcfIterator; @Program(name="vcffilterdoid",description="Set Filters for VCF annotated with SNPEFF or VEP tesing if Genes are part / are not part of a GeneOntology tree. ") public class VcfFilterDoid extends AbstractVCFDiseaseOntology { private static final Logger LOG= Logger.build(AbstractVCFDiseaseOntology.class).make(); @Parameter(names={"-o","--out"},description="Output File") private File outputFile=null; /* list of DOID accessions for gene having a DOID-term children of the user output. */ @Parameter(names="-C",description="list of DOID accessions for gene having a DOID-term children of the user output") private Set<String> CHILD_OF=new HashSet<String>(); /* Filter name */ private String FILTER="DOID"; @Override protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) { try { super.readDiseaseOntoglogyTree(); Set<DiseaseOntoglogyTree.Term> positive_terms=new HashSet<DiseaseOntoglogyTree.Term>(CHILD_OF.size()); for(String acn:CHILD_OF) { DiseaseOntoglogyTree.Term t=super.diseaseOntoglogyTree.getTermByAccession(acn); if(t==null) { throw new IOException("Cannot find GO acn "+acn); } positive_terms.add(t); } LOG.info("positive_terms:"+positive_terms); VCFHeader header=r.getHeader(); super.readDiseaseOntoglogyAnnotations(); super.loadEntrezGenes(header.getSequenceDictionary()); VCFHeader h2=new VCFHeader(header.getMetaDataInInputOrder(),header.getSampleNamesInOrder()); h2.addMetaDataLine( new VCFFilterHeaderLine( FILTER ,"Flag DOID terms "+CHILD_OF+" using "+super.DOI_INPUT+" and "+super.DOI_ANN )); w.writeHeader(h2); while(r.hasNext()) { VariantContext ctx=r.next(); VariantContextBuilder b=new VariantContextBuilder(ctx); Set<Integer> geneIds=getGeneIds(ctx); boolean set_positive=false; for(Integer GN:geneIds) { Set<DiseaseOntoglogyTree.Term> goset=super.gene2doid.get(GN); if(goset==null) continue; for(DiseaseOntoglogyTree.Term goParent:positive_terms) { for(DiseaseOntoglogyTree.Term t:goset) { if(goParent.hasDescendant(t.getAcn())) { set_positive=true; break; } } if(set_positive) break; } if(set_positive) break; } Set<String> filters=new HashSet<String>(ctx.getFilters()); if(set_positive) { filters.add(FILTER); } b.filters(filters); w.add(b.make()); } return 0; } catch(Exception err) { LOG.error(err); return -1; } } @Override public int doWork(List<String> args) { System.err.println("I'm working on this. Sorry. Exiting"); if("A".equals("A")) System.exit(-1); return doVcfToVcf(args,outputFile); } public static void main(String[] args) { new VcfFilterDoid().instanceMainWithExit(args); } }