package com.github.lindenb.jvarkit.tools.biostar;
import java.io.File;
import java.util.List;
import com.beust.jcommander.Parameter;
import com.github.lindenb.jvarkit.util.jcommander.Launcher;
import com.github.lindenb.jvarkit.util.jcommander.Program;
import com.github.lindenb.jvarkit.util.log.Logger;
import com.github.lindenb.jvarkit.util.picard.GenomicSequence;
import com.github.lindenb.jvarkit.util.vcf.VcfIterator;
import com.github.lindenb.semontology.Term;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
/*
BEGIN_DOC
## Example
```
$ java -jar dist/biostar251649.jar -n 10 -R tests/ref.fa tests/mutations.vcf
##INFO=<ID=SEQ3_10,Number=1,Type=String,Description="Sequence on the 3' of mutation">
##INFO=<ID=SEQ5_10,Number=1,Type=String,Description="Sequence on the 5' of mutation">
(...)
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3 S4
rotavirus 51 . A G 22.55 . AC1=2;AF1=0.25;BQB=1;DP=944;DP4=849,0,93,0;FQ=23.7972;G3=0.75,0,0.25;HWE=0.033921;MQ=60;MQ0F=0;MQB=1;PV4=1,1,1,1;RPB=0.993129;SEQ3_10=GATGGTAAGC;SEQ5_10=TCTACTCAGC;SGB=-61.9012;VDB=3.53678e-05 GT:PL 0/0:0,255,134 0/0:0,255,127 0/0:0,255,137 1/1:70,255,0
rotavirus 91 . A T 5.45 . AC1=1;AF1=0.124963;BQB=0.951201;DP=1359;DP4=1134,0,225,0;FQ=5.8713;MQ=60;MQ0F=0;MQB=1;PV4=1,4.80825e-05,1,1;RPB=0.0393173;SEQ3_10=GTTGTTGCTG;SEQ5_10=TTGAAGCTGC;SGB=-369.163;VDB=0.313337 GT:PL 0/0:0,255,133 0/1:40,0,31 0/0:0,255,134 0/0:0,255,82
```
END_DOC
*/
@Program(name="biostar251649",
description=" Annotating the flanking bases of SNPs in a VCF file",
biostars=251649,
keywords={"vcf","annotation","sequence","reference"},
terms = Term.ID_0000015
)
public class Biostar251649 extends Launcher
{
private static final Logger LOG= Logger.build(Biostar251649.class).make();
@Parameter(names={"-o","--out"},description="output file or stdout")
private File outputFile = null;
@Parameter(names="-5",description="Left tag")
private String leftTag="SEQ5_";
@Parameter(names="-3",description="Right tag")
private String rightTag="SEQ3_";
@Parameter(names="-n",description="number of bases")
private int extend=1;
@Parameter(names={"-r","-R","--reference"},
description=INDEXED_FASTA_REFERENCE_DESCRIPTION,
required=true,
converter=Launcher.IndexedFastaSequenceFileConverter.class
)
private IndexedFastaSequenceFile faidx = null;
@Override
protected int doVcfToVcf(String inputName, VcfIterator in,
VariantContextWriter w)
{
try {
final VCFHeader header = new VCFHeader(in.getHeader());
VCFInfoHeaderLine info5 = new VCFInfoHeaderLine(leftTag+extend,
1, VCFHeaderLineType.String,"Sequence on the 5' of mutation");
VCFInfoHeaderLine info3 = new VCFInfoHeaderLine(rightTag+extend,
1, VCFHeaderLineType.String,"Sequence on the 3' of mutation");
if(header.getInfoHeaderLine(info5.getID())!=null)
{
LOG.error("tag "+info5.getID()+" already present in VCF header");
return -1;
}
if(header.getInfoHeaderLine(info3.getID())!=null)
{
LOG.error("tag "+info3.getID()+" already present in VCF header");
return -1;
}
header.addMetaDataLine(info5);
header.addMetaDataLine(info3);
GenomicSequence chrom= null;
w.writeHeader(header);
while(in.hasNext()) {
final VariantContext ctx = in.next();
if(chrom==null || !chrom.getChrom().equals(ctx.getContig())) {
chrom = new GenomicSequence(this.faidx,ctx.getContig());
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if(ctx.getStart()>0)
{
final StringBuilder sb = new StringBuilder(this.extend);
for(int i=0;i< this.extend;++i)
{
final int pos0 = (ctx.getStart()-2)-i;
if(pos0<= 0) continue;
sb.insert(0, chrom.charAt(pos0));
}
if(sb.length()>0) vcb.attribute(info5.getID(),sb.toString());
}
{
final StringBuilder sb = new StringBuilder(this.extend);
for(int i=0;i< this.extend;++i)
{
int pos0 = ctx.getEnd()+i;
if(pos0>= chrom.length()) break;
sb.append(chrom.charAt(pos0));
}
if(sb.length()>0) vcb.attribute(info3.getID(),sb.toString());
}
w.add(vcb.make());
}
return 0;
}
catch(Exception err)
{
LOG.error(err);
return -1;
}
finally
{
CloserUtil.close(faidx);
}
}
@Override
public int doWork(List<String> args)
{
return doVcfToVcf(args, outputFile);
}
public static void main(String[] args)
{
new Biostar251649().instanceMainWithExit(args);
}
}