/**
*
*/
package com.github.lindenb.jvarkit.tools.misc;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import com.beust.jcommander.Parameter;
import com.github.lindenb.jvarkit.util.jcommander.Launcher;
import com.github.lindenb.jvarkit.util.jcommander.Program;
import com.github.lindenb.jvarkit.util.log.Logger;
import com.github.lindenb.jvarkit.util.picard.GenomicSequence;
import com.github.lindenb.jvarkit.util.vcf.VCFUtils;
import com.github.lindenb.jvarkit.util.vcf.VcfIterator;
@Program(name="nozerovariationvcf",description="cat a whole VCF, or, if there is no variant, creates a fake one")
public class NoZeroVariationVCF extends Launcher
{
private static final Logger LOG = Logger.build(NoZeroVariationVCF.class).make();
@Parameter(names={"-o","--output"},description="Output file. Optional . Default: stdout")
private File outputFile = null;
@Parameter(names={"-R","-r","--reference"},description=INDEXED_FASTA_REFERENCE_DESCRIPTION)
private File faidx = null;
private IndexedFastaSequenceFile indexedFastaSequenceFile;
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
final VCFHeader header=in.getHeader();
if(in.hasNext())
{
LOG.info("found a variant in VCF.");
VCFUtils.copyHeaderAndVariantsTo(in, out);
}
else
{
LOG.info("no a variant in VCF. Creating a fake Variant");
header.addMetaDataLine(new VCFFilterHeaderLine("FAKESNP", "Fake SNP created because vcf input was empty."));
VCFFormatHeaderLine gtHeaderLine=header.getFormatHeaderLine(VCFConstants.GENOTYPE_KEY);
if(gtHeaderLine==null)
{
LOG.info("Adding GT to header");
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
}
gtHeaderLine=header.getFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY);
if(gtHeaderLine==null)
{
LOG.info("Adding GQ to header");
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
}
out.writeHeader(header);
SAMSequenceDictionary dict=this.indexedFastaSequenceFile.getSequenceDictionary();
//choose random chrom, best is 'random' , but not 1...23,X,Y, etc...
String chrom=dict.getSequence(0).getSequenceName();
for(final SAMSequenceRecord ssr:dict.getSequences())
{
String ssn=ssr.getSequenceName();
if(ssn.contains("_")) { chrom=ssn; break;}
}
for(final SAMSequenceRecord ssr:dict.getSequences())
{
String ssn=ssr.getSequenceName();
if(ssn.toLowerCase().contains("random")) { chrom=ssn; break;}
if(ssn.toLowerCase().contains("gl")) { chrom=ssn; break;}
}
GenomicSequence gseq=new GenomicSequence(this.indexedFastaSequenceFile,
chrom
);
char ref='N';
char alt='N';
int POS=0;
for(POS=0;POS< gseq.length();++POS)
{
ref=Character.toUpperCase(gseq.charAt(POS));
if(ref=='N') continue;
switch(ref)
{
case 'A': alt='T'; break;
case 'T': alt='G'; break;
case 'G': alt='C'; break;
case 'C': alt='A'; break;
default:break;
}
if(alt=='N') continue;
break;
}
if(alt=='N') throw new RuntimeException("found only N");
VariantContextBuilder vcb=new VariantContextBuilder();
Allele a1=Allele.create((byte)ref,true);
Allele a2=Allele.create((byte)alt,false);
List<Allele> la1a2=new ArrayList<Allele>(2);
List<Genotype> genotypes=new ArrayList<Genotype>(header.getSampleNamesInOrder().size());
la1a2.add(a1);
la1a2.add(a2);
vcb.chr(gseq.getChrom());
vcb.start(POS+1);
vcb.stop(POS+1);
vcb.filter("FAKESNP");
vcb.alleles(la1a2);
vcb.log10PError(-0.1);
for(String sample:header.getSampleNamesInOrder())
{
final GenotypeBuilder gb=new GenotypeBuilder(sample);
if(genotypes.isEmpty()) {
gb.DP(1);
gb.GQ(1);
gb.alleles(la1a2);
gb.noAD();
gb.noPL();
genotypes.add(gb.make());
}
else
{
genotypes.add(GenotypeBuilder.createMissing(sample, 2));
}
}
vcb.genotypes(genotypes);
vcb.noID();
out.add(vcb.make());
}
return 0;
}
@Override
public int doWork(List<String> args) {
if(faidx==null)
{
LOG.error("Indexed fasta file missing.");
return -1;
}
try
{
this.indexedFastaSequenceFile=new IndexedFastaSequenceFile(faidx);
return doVcfToVcf(args,this.outputFile);
}
catch(Exception err)
{
LOG.error(err);
return -1;
}
finally
{
CloserUtil.close(this.indexedFastaSequenceFile);
this.indexedFastaSequenceFile =null;
}
}
public static void main(String[] args)
{
new NoZeroVariationVCF().instanceMainWithExit(args);
}
}