package com.github.lindenb.jvarkit.tools.misc;
import java.io.BufferedReader;
import java.util.List;
import java.util.regex.Pattern;
import htsjdk.samtools.util.CloserUtil;
import com.github.lindenb.jvarkit.util.jcommander.Launcher;
import com.github.lindenb.jvarkit.util.jcommander.Program;
import com.github.lindenb.jvarkit.util.log.Logger;
@Program(name="fixvcfformat",description="Fix PL format in VCF. Problem is described in http://gatkforums.broadinstitute.org/discussion/3453")
public class FixVcfFormat extends Launcher
{
private static Logger LOG=Logger.build(FixVcfFormat.class).make();
private FixVcfFormat()
{
}
@Override
public int doWork(List<String> args)
{
long n_var=0L;
BufferedReader in=null;
long n_fix=0L;
int report_mismatch_sample_call=0;
try
{
Pattern tab=Pattern.compile("[\t]");
Pattern colon=Pattern.compile("[\\:]");
Pattern comma=Pattern.compile("[,]");
in = super.openBufferedReader(oneFileOrNull(args));
String line;
while((line=in.readLine())!=null)
{
if(line.startsWith("#"))
{
if(line.startsWith("#CHROM"))
{
System.out.println("##FixVcfFormatCmd="+getProgramCommandLine().replace('\n', ' '));
System.out.println("##FixVcfFormatVersion="+getVersion());
}
System.out.println(line);
continue;
}
if(++n_var%10000==0)
{
LOG.info("Variant:"+n_var+" fix:"+n_fix);
}
String tokens[]=tab.split(line);
if(tokens.length<9)
{
LOG.warning("not enought column in "+line);
System.out.println(line);
continue;
}
String formats[]=colon.split(tokens[8]);
int PL_index=-1;
for(int i=0;i< formats.length;++i)
{
if(formats[i].equals("PL"))
{
PL_index=i;
break;
}
}
for(int i=0;i< 9;++i)
{
if(i>0) System.out.print("\t");
System.out.print(tokens[i]);
}
for(int sample=9;sample< tokens.length;++sample)
{
System.out.print("\t");
if(tokens[sample].equals("."))
{
System.out.print(tokens[sample]);
continue;
}
String calls[]=colon.split(tokens[sample]);
if(calls.length>formats.length)
{
LOG.error("not same number of columns between FORMAT and call:"+tokens[8]+" vs "+tokens[sample]);
return -1;
}
else if(calls.length<formats.length)
{
if(report_mismatch_sample_call<10)
{
LOG.warning("not same number of columns between FORMAT and call:"+tokens[8]+" vs "+tokens[sample]);
}
report_mismatch_sample_call++;
}
for(int i=0;i< calls.length;++i)
{
if(i>0) System.out.print(':');
if(i==PL_index && !calls[i].equals("."))
{
String pl_values[]=comma.split(calls[i]);
for(int j=0;j< pl_values.length;++j)
{
if(j>0) System.out.print(",");
if(pl_values[j].equals("."))
{
System.out.print("0");
++n_fix;
}
else
{
System.out.print(pl_values[j]);
}
}
}
else
{
System.out.print(calls[i]);
}
}
//FORMAT missing
for(int i=calls.length;i< formats.length ; ++i)
{
if(i>0) System.out.print(':');
System.out.print(".");
}
}
System.out.println();
if(System.out.checkError()) break;
}
LOG.info("Number of FIX: "+n_fix);
return 0;
}
catch(Exception err)
{
LOG.error(err);
return -1;
}
finally
{
CloserUtil.close(in);
}
}
/**
* @param args
*/
public static void main(String[] args) {
new FixVcfFormat().instanceMainWithExit(args);
}
}