package com.github.lindenb.jvarkit.tools.vcfcomposite; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.SortingCollection; import htsjdk.tribble.readers.LineIterator; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; import com.beust.jcommander.Parameter; import com.beust.jcommander.ParametersDelegate; import com.github.lindenb.jvarkit.io.IOUtils; import com.github.lindenb.jvarkit.util.EqualRangeIterator; import com.github.lindenb.jvarkit.util.Pedigree; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.github.lindenb.jvarkit.util.picard.AbstractDataCodec; import com.github.lindenb.jvarkit.util.vcf.ContigPosRef; import com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser; import com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory; import com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser; import com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory; @Program(name="vcfcomposite",description="TODO",keywords={"vcf","disease","annotation","pedigree"}) public class VCFComposite extends Launcher { private static final Logger LOG= Logger.build(VCFComposite.class).make(); @Parameter(names={"-ped","--pedigree"},description="Pedigree file",required=true) private File pedigreeFile=null; @Parameter(names={"-o","--out"},description="OUtput file") private File outputFile=null; @Parameter(names={"-acceptFiltered","--acceptFiltered"},description="Accept FILTERED variants") private boolean acceptFiltered=false; @Parameter(names={"-acceptID","--acceptID"},description="Accept variants with ID") private boolean acceptID=false; @Parameter(names={"-m","--model"},description="Model type",required=true) private Type modelType=null; @Parameter(names={"-models"},description="List the available models and exits",help=true) private boolean listModels=false; @ParametersDelegate private WritingSortingCollection writingSortingCollection = new WritingSortingCollection(); private Pedigree pedigree=null; private static class GeneAndVariant { String gene; String ctxLine; } private static class GeneAndVariantCodec extends AbstractDataCodec<GeneAndVariant> { public GeneAndVariantCodec() { } @Override public GeneAndVariant decode(DataInputStream dis) throws IOException { final GeneAndVariant gav=new GeneAndVariant(); try { gav.gene=dis.readUTF(); } catch(IOException err) { return null; } gav.ctxLine = super.readString(dis); return gav; } @Override public void encode(DataOutputStream dos, GeneAndVariant o) throws IOException { dos.writeUTF(o.gene); super.writeString(dos, o.ctxLine); } @Override public AbstractDataCodec<GeneAndVariant> clone() { return new GeneAndVariantCodec(); } } private final Predicate<Genotype> deNovoGenotype = new Predicate<Genotype>() { @Override public boolean test(Genotype t) { Pedigree.Person c = pedigree.getPersonById(t.getSampleName()); if(c==null) return false; return false; } }; public static enum Type { //RecessiveHomVar(""), RecessiveComposite("child carry at least two HET variant in the same gene unit.") ; private final String desc; Type(final String desc ) { this.desc = desc; } public String getDescription() { return desc; } }; private abstract class DiseaseModel { public List<Genotype> getGenotypesOfAffected(final VariantContext ctx) { return ctx.getGenotypes().stream().filter(G->{ final Pedigree.Person p= pedigree.getPersonById(G.getSampleName()); if(p==null) return false; if(!p.isAffected()) return false; return true; }).collect(Collectors.toList()); } public List<Genotype> getGenotypesOfUnaffected(final VariantContext ctx) { return ctx.getGenotypes().stream().filter(G->{ final Pedigree.Person p= pedigree.getPersonById(G.getSampleName()); if(p==null) return false; if(!p.isUnaffected()) return false; return true; }).collect(Collectors.toList()); } public abstract boolean accept(final VariantContext ctx); public abstract void scan(final String geneName,List<VariantContext> variants,PrintWriter out); } private class RecessiveHomVar extends DiseaseModel { @Override public boolean accept(final VariantContext ctx) { if(!getGenotypesOfAffected(ctx).stream().anyMatch(G->G.isHomVar())) return false; if(getGenotypesOfUnaffected(ctx).stream().anyMatch(G->G.isHomVar())) return false; return true; } @Override public void scan(final String geneName,List<VariantContext> variants,PrintWriter out) { LOG.info(geneName); } } /* affected individual contains at least two HET variants */ private class RecessiveComposite extends DiseaseModel { /** affected can be HET or HOM_VAR */ private boolean isGenotypeForAffected(final Genotype g) { if(g==null) return false; return g.isHet() || g.isHomVar(); } @Override public boolean accept(final VariantContext ctx) { if(!getGenotypesOfAffected(ctx).stream(). anyMatch(G->isGenotypeForAffected(G))) return false; return true; } @Override public void scan(final String geneName,final List<VariantContext> variants,final PrintWriter out) { if(variants.size()<2) return; /* loop over affected */ for(final Pedigree.Person c: pedigree.getAffected()) { for(int x=0;x+1< variants.size();++x) { final Genotype gcx = variants.get(x).getGenotype(c.getId()); // child variant n°y must be HOM_VAR or HET if(gcx==null || !isGenotypeForAffected(gcx)) continue; // search for the second snp for(int y=x+1;y< variants.size();++y) { final Genotype gcy = variants.get(y).getGenotype(c.getId()); // child variant n°y must be HOM_VAR or HET if(gcy==null || !isGenotypeForAffected(gcy)) continue; boolean unaffected_are_ok=true; //check unaffected indididual don't have same haplotype for(final Pedigree.Person unaffected: pedigree.getUnaffected()) { final Genotype gux = variants.get(x).getGenotype(unaffected.getId()); final Genotype guy = variants.get(y).getGenotype(unaffected.getId()); if(gux!=null && guy!=null && gux.sameGenotype(gcx, true) && guy.sameGenotype(gcy, true) ) { unaffected_are_ok=false; break; } } if(unaffected_are_ok) { out.print(geneName); out.print("\t"); out.print(c.getFamily()); out.print("\t"); out.print(c.getId()); out.print("\t"); out.print(new ContigPosRef(variants.get(x))); out.print("\t"); out.print(gcx.getAlleles().stream().map(A->A.getDisplayString()).collect(Collectors.joining("/"))); out.print("\t"); out.print(new ContigPosRef(variants.get(y))); out.print("\t"); out.print(gcy.getAlleles().stream().map(A->A.getDisplayString()).collect(Collectors.joining("/"))); out.println(); } } } } } } protected DiseaseModel createModel() { if(this.modelType==null) throw new NullPointerException(); switch(this.modelType) { //case RecessiveHomVar: return new RecessiveHomVar(); case RecessiveComposite: return new RecessiveComposite(); default: throw new IllegalStateException(); } } @Override public int doWork(final List<String> args) { PrintWriter out=null; try { out= super.openFileOrStdoutAsPrintWriter(this.outputFile); if(listModels) { for(final Type t:Type.values()) { out.println(t.name()); out.println("\t"+t.getDescription()); } out.flush(); return 0; } this.pedigree = Pedigree.newParser().parse(pedigreeFile); if(this.pedigree.getAffected().isEmpty()) { LOG.error("No Affected sample in "+this.pedigreeFile); return -1; } if(this.pedigree.getUnaffected().isEmpty()) { LOG.error("No Unaffected sample in "+this.pedigreeFile); return -1; } final DiseaseModel model = this.createModel(); final String inputName= super.oneFileOrNull(args); final LineIterator r= (inputName==null? IOUtils.openStreamForLineIterator(stdin()): IOUtils.openURIForLineIterator(inputName) ); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader)codec.readActualHeader(r); final AnnPredictionParser annParser=new AnnPredictionParserFactory(header).get(); final VepPredictionParser vepParser=new VepPredictionParserFactory(header).get(); //final VCFHeader h2=new VCFHeader(header.getMetaDataInInputOrder(),header.getSampleNamesInOrder()); //h2.addMetaDataLine(new VCFInfoHeaderLine(this.TAG,1,VCFHeaderLineType.String,"Values from bigwig file: "+BIGWIG)); SortingCollection<GeneAndVariant> sorting=null; String prevContig=null; for(;;) { String line; final VariantContext ctx; if(r.hasNext()) { line = r.next(); ctx=codec.decode(line); } else { line=null; ctx=null; } if(ctx==null || !ctx.getContig().equals(prevContig)) { if(sorting!=null) { LOG.debug("Dump contig "+prevContig); sorting.doneAdding(); CloseableIterator<GeneAndVariant> iter2=sorting.iterator(); EqualRangeIterator<GeneAndVariant> eqiter= new EqualRangeIterator<>(iter2,(A,B)->A.gene.compareTo(B.gene)); while(eqiter.hasNext()) { final List<GeneAndVariant> variants=eqiter.next(); model.scan( variants.get(0).gene, variants.stream(). map(L->codec.decode(L.ctxLine)). collect(Collectors.toList()), out ); } eqiter.close(); iter2.close(); sorting.cleanup(); } sorting=null; if(ctx==null) break; prevContig=ctx.getContig(); } if(!ctx.isVariant()) continue; if(!acceptFiltered && ctx.isFiltered()) continue; if(!acceptID && ctx.hasID()) continue; if(!model.accept(ctx)) continue; final Set<String> geneKeys = new HashSet<>(); for(final AnnPredictionParser.AnnPrediction pred: annParser.getPredictions(ctx)) { geneKeys.addAll(pred.getGeneKeys().stream().map(S->ctx.getContig()+"_"+S).collect(Collectors.toSet())); } for(final VepPredictionParser.VepPrediction pred: vepParser.getPredictions(ctx)) { geneKeys.addAll(pred.getGeneKeys().stream().map(S->ctx.getContig()+"_"+S).collect(Collectors.toSet())); } if(sorting==null) { sorting = SortingCollection.newInstance(GeneAndVariant.class, new GeneAndVariantCodec(), (A,B)->{int i=A.gene.compareTo(B.gene);if(i!=0) return i;return A.ctxLine.compareTo(B.ctxLine);}, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpDirectories() ); sorting.setDestructiveIteration(true); } for(final String gk:geneKeys) { final GeneAndVariant gav=new GeneAndVariant(); gav.gene = gk; gav.ctxLine = line; sorting.add(gav); } } out.flush(); out.close(); out=null; return 0; } catch(Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(out); } } public static void main(String[] args) { new VCFComposite().instanceMainWithExit(args); } }