package com.github.lindenb.jvarkit.tools.cgi;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.LineIteratorImpl;
import htsjdk.tribble.readers.SynchronousLineReader;
import htsjdk.variant.vcf.AbstractVCFCodec;
import htsjdk.variant.vcf.VCFHeader;
import com.github.lindenb.jvarkit.io.IOUtils;
import com.github.lindenb.jvarkit.util.jcommander.Program;
import com.github.lindenb.jvarkit.util.log.Logger;
import com.github.lindenb.jvarkit.util.vcf.VCFUtils;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.tribble.readers.TabixReader;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
@Program(name="vcfregistry.cg",description="")
public class VcfRegistryCGI extends AbstractCGI {
private static final Logger LOG=Logger.build(VcfRegistryCGI.class).make();
private static final String GROUPID_PARAM="g";
private static final String RGN_PARAM="r";
private Throwable lastException=null;
private static class Position
{
String chrom;
int pos;
}
private static class FileWithComment
{
File file;
String _desc;
FileWithComment(File file)
{
this.file=file;
this._desc="";
}
public String getDesc()
{
return _desc==null || _desc.isEmpty()?file.getName():_desc;
}
}
/** list of VCF path stored in a file */
private class GroupFile
extends FileWithComment
{
int index=0;
GroupFile(File file)
{
super(file);
}
}
private class VcfFile
extends FileWithComment
{
VcfFile(File file)
{
super(file);
}
}
private VcfRegistryCGI()
{
}
private Position parsePosition()
{
return parsePosition(getString(RGN_PARAM));
}
private Position parsePosition(String s)
{
if(s==null) return null;
int colon=s.indexOf(':');
if(colon<1) return null;
int pos;
try
{
pos=Integer.parseInt(s.substring(colon+1).trim());
}
catch(Exception err)
{
return null;
}
if(pos<1) return null;
Position position=new Position();
position.chrom=s.substring(0, colon).trim();
if(position.chrom.trim().isEmpty()) return null;
position.pos=pos;
return position;
}
/* get the path to the main config file */
private File getGroupFile()
{
String groupFileStr=null;
try
{
groupFileStr=getPreferences().get("vcf.registry.group.file", null);
if(groupFileStr==null) return null;
File f= new File(groupFileStr);
if(f.isFile() && f.canRead()) return f;
return null;
}
catch(IOException err)
{
lastException=err;
return null;
}
}
/* read the main config file and return all the group files */
private List<GroupFile> getGroupFiles()
{
File g=getGroupFile();
if(g==null) return Collections.emptyList();
BufferedReader in=null;
List<GroupFile> L=new ArrayList<GroupFile>();
try
{
in=IOUtils.openFileForBufferedReading(g);
int tab;
String line;
while((line=in.readLine())!=null)
{
if(line.startsWith("#") || line.isEmpty() || (tab=line.indexOf('\t'))<1)
continue;
File f=new File(line.substring(0,tab).trim());
GroupFile gf=new GroupFile(f);
if(!(gf.file.exists() && gf.file.isFile() && gf.file.canRead()))
{
in.close();
in=null;
throw new IOException("Bad file in "+line+" / "+g);
}
gf._desc=line.substring(tab+1).trim();
gf.index=L.size();
L.add(gf);
}
return L;
}
catch(Exception err)
{
this.lastException=err;
return Collections.emptyList();
}
finally
{
CloserUtil.close(in);
}
}
/** return all the VCF in a group file */
private List<VcfFile> getVcfFiles(GroupFile gf)
{
if(gf==null || gf.file==null) return Collections.emptyList();
BufferedReader in=null;
List<VcfFile> L=new ArrayList<VcfFile>();
try
{
in=IOUtils.openFileForBufferedReading(gf.file);
String line;
while((line=in.readLine())!=null)
{
if(line.startsWith("#") || line.isEmpty()) continue;
int tab=line.indexOf('\t');
if(tab==0) continue;
File f=new File(tab==-1?line:line.substring(0, tab));
if(!(f.exists() && f.isFile() && f.canRead()) )
{
in.close();in=null;
throw new FileNotFoundException("Error for "+f);
}
File tbi=new File(line+".tbi");
if(!(tbi.exists() && tbi.isFile() && tbi.canRead()) )
{
continue;
}
VcfFile vcf=new VcfFile(f);
if(tab>0) vcf._desc=line.substring(tab+1).trim();
L.add(vcf);
}
return L;
}
catch(IOException err)
{
this.lastException=err;
return Collections.emptyList();
}
finally
{
CloserUtil.close(in);
}
}
private void welcomePane()
{
setMimeHeaderPrinted(true);
System.out.print("Content-type: text/html;charset=utf-8\n");
System.out.println();
System.out.flush();
XMLStreamWriter w=null;
try
{
XMLOutputFactory xof=XMLOutputFactory.newFactory();
w=xof.createXMLStreamWriter(System.out,"UTF-8");
w.writeStartElement("html");
w.writeStartElement("body");
w.writeStartElement("h1");
w.writeCharacters("VCFRegistry");
w.writeEndElement();
w.writeStartElement("ul");
for(GroupFile gf:getGroupFiles())
{
w.writeStartElement("li");
w.writeStartElement("a");
w.writeAttribute("title",gf.file.getPath());
w.writeAttribute("href","?"+GROUPID_PARAM+"="+gf.index);
w.writeCharacters(gf.getDesc());
w.writeEndElement();
w.writeEndElement();
}
w.writeEndElement();
writeHTMLException(w, this.lastException);
writeHtmlFooter(w);
w.writeEndElement();//body
w.writeEndElement();//html
}
catch(Exception err)
{
LOG.error(err);
}
finally
{
if(w!=null)
{
try {w.flush();} catch(XMLStreamException err){}
CloserUtil.close(w);
}
CloserUtil.close(System.out);
}
}
private void printForm(
XMLStreamWriter w,
final GroupFile gf
) throws XMLStreamException
{
w.writeStartElement("form");
w.writeEmptyElement("input");
w.writeAttribute("type", "hidden");
w.writeAttribute("name",GROUPID_PARAM);
w.writeAttribute("value",String.valueOf(gf.index));
w.writeStartElement("div");
w.writeStartElement("label");
w.writeAttribute("for", "position");
w.writeCharacters("Position:");
w.writeEndElement();//label
w.writeEmptyElement("input");
w.writeAttribute("id", "position");
w.writeAttribute("type", "text");
w.writeAttribute("name",RGN_PARAM);
w.writeAttribute("placeholder","chrom:position");
String s=getString(RGN_PARAM);
w.writeAttribute("value",s==null?"":s);
w.writeEmptyElement("input");
w.writeAttribute("type", "submit");
w.writeAttribute("value","Submit");
w.writeEndElement();//div
w.writeEndElement();//form
}
private void doWork( XMLStreamWriter w,final GroupFile gf)
throws XMLStreamException
{
Position pos=parsePosition();
if(pos==null) return ;
w.writeStartElement("div");
w.writeStartElement("h2");
w.writeCharacters(pos.chrom+":"+pos.pos);
w.writeEndElement();
w.writeStartElement("table");
w.writeStartElement("thead");
w.writeStartElement("tr");
for(String header:new String[]{"CHROM","POS","ID","REF","QUAL","Sample","Alleles",
"DP","GQ","File"})
{
w.writeStartElement("th");
w.writeCharacters(header);
w.writeEndElement();//td
}
w.writeEndElement();//tr
w.writeEndElement();//thead
w.writeStartElement("tbody");
Set<String> samplesWithGenotypes=new HashSet<String>();
Set<String> allSamples=new HashSet<String>();
for(VcfFile f:getVcfFiles(gf))
{
TabixReader tabixReader=null;
TabixReader.Iterator iter=null;
BlockCompressedInputStream bgzin=null;
VCFHeader header=null;
AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator lineIterator=null;
for(int i=0;i< 2;i++)
{
try
{
if(i==0)
{
bgzin=new BlockCompressedInputStream(f.file);
lineIterator=new LineIteratorImpl(new SynchronousLineReader(bgzin));
header=(VCFHeader) vcfCodec.readActualHeader(lineIterator);
allSamples.addAll(header.getGenotypeSamples());
}
else
{
tabixReader=new TabixReader(f.file.getPath());
String line;
int[] x = tabixReader.parseReg(pos.chrom+":"+pos.pos+"-"+(pos.pos));
if(x!=null && x.length>2 && x[0]!=-1)
{
iter=tabixReader.query(x[0], x[1], x[2]);
}
else
{
}
while(iter!=null && (line=iter.next())!=null)
{
VariantContext var=vcfCodec.decode(line);
for(String sample:header.getSampleNamesInOrder())
{
final Genotype genotype=var.getGenotype(sample);
if(genotype==null || genotype.isNoCall()) continue;
if(!genotype.isAvailable()) continue;
samplesWithGenotypes.add(sample);
w.writeStartElement("tr");
w.writeStartElement("td");
w.writeCharacters(var.getContig());
w.writeEndElement();
w.writeStartElement("td");
w.writeCharacters(String.valueOf(var.getStart()));
w.writeEndElement();
if(var.hasID())
{
w.writeStartElement("td");
if( var.getID().matches("rs[0-9]+"))
{
w.writeStartElement("a");
w.writeAttribute("href",
"http://www.ncbi.nlm.nih.gov/snp/"+
var.getID().substring(2)
);
w.writeCharacters(var.getID());
w.writeEndElement();//a
}
else
{
w.writeCharacters(var.getID());
}
w.writeEndElement();//td
}
else
{
w.writeEmptyElement("td");
}
if(var.getReference()!=null)
{
w.writeStartElement("td");
w.writeCharacters(var.getReference().getBaseString());
w.writeEndElement();
}
else
{
w.writeEmptyElement("td");
}
if(var.hasLog10PError())
{
w.writeStartElement("td");
w.writeCharacters(String.valueOf((int)var.getPhredScaledQual()));
w.writeEndElement();
}
else
{
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(sample);
w.writeEndElement();
List<Allele> alleles=genotype.getAlleles();
w.writeStartElement("td");
w.writeStartElement("span");
if(genotype.isHomRef())
{
w.writeAttribute("style", "color:green;");
}
else if(genotype.isHomVar())
{
w.writeAttribute("style", "color:red;");
}
else if(genotype.isHet())
{
w.writeAttribute("style", "color:blue;");
}
for(int j=0;j< alleles.size();++j)
{
if(j>0) w.writeCharacters(genotype.isPhased()?"|":"/");
w.writeCharacters(alleles.get(j).getBaseString());
}
w.writeEndElement();//span
w.writeEndElement();
if(genotype.hasDP())
{
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getDP()));
w.writeEndElement();
}
else
{
w.writeEmptyElement("td");
}
if(genotype.hasGQ())
{
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getGQ()));
w.writeEndElement();
}
else
{
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(f.file.getName());
w.writeEndElement();
w.writeEndElement();//tr
w.flush();
}
}
}
}
catch(Exception err)
{
w.writeComment("BOUM "+err);
header=null;
lastException=err;
}
finally
{
CloserUtil.close(lineIterator);
CloserUtil.close(bgzin);
CloserUtil.close(tabixReader);
CloserUtil.close(iter);
}
if(i==0 && header==null) break;
}
w.flush();
}
w.writeEndElement();//tbody
w.writeEndElement();//table
allSamples.removeAll(samplesWithGenotypes);
if(!allSamples.isEmpty())
{
w.writeStartElement("h3");
w.writeCharacters("Samples not found");
w.writeEndElement();
w.writeStartElement("ol");
for(String sample:new TreeSet<String>(allSamples))
{
w.writeStartElement("li");
w.writeCharacters(sample);
w.writeEndElement();
}
w.writeEndElement();
}
writeHTMLException(w, this.lastException);
w.writeEndElement();//div
}
private void handleGroup(final GroupFile gf)
{
setMimeHeaderPrinted(true);
System.out.print("Content-type: text/html;charset=utf-8\n");
System.out.println();
System.out.flush();
Position rgn=parsePosition(getString(RGN_PARAM));
XMLStreamWriter w=null;
try
{
XMLOutputFactory xof=XMLOutputFactory.newFactory();
w=xof.createXMLStreamWriter(System.out,"UTF-8");
w.writeStartElement("html");
w.writeStartElement("body");
w.writeStartElement("h1");
w.writeCharacters(gf.getDesc());
w.writeEndElement();
printForm(w, gf);
if(rgn!=null)
{
doWork(w,gf);
}
else
{
w.writeStartElement("h2");
w.writeCharacters("Files");
w.writeEndElement();
w.writeStartElement("table");
w.writeStartElement("tr");
w.writeStartElement("th");
w.writeCharacters("File");
w.writeEndElement();//td
w.writeStartElement("td");
w.writeCharacters("Description");
w.writeEndElement();//td
w.writeEndElement();//tr
List<VcfFile> vcffiles=getVcfFiles(gf);
for(VcfFile f:vcffiles)
{
w.writeStartElement("tr");
w.writeStartElement("td");
w.writeCharacters(f.file.getPath());
w.writeEndElement();//td
w.writeStartElement("td");
w.writeCharacters(f.getDesc());
w.writeEndElement();//td
w.writeEndElement();//tr
}
w.writeEndElement();//table
}
writeHTMLException(w, this.lastException);
writeHtmlFooter(w);
w.writeEndElement();//body
w.writeEndElement();//html
}
catch(Exception err)
{
LOG.error(err);
}
finally
{
if(w!=null)
{
try {w.flush();} catch(XMLStreamException err){}
CloserUtil.close(w);
}
CloserUtil.close(System.out);
}
}
@Override
protected void doCGI()
{
GroupFile gf=null;
String groupidstr=getString(GROUPID_PARAM);
if(groupidstr!=null)
{
try {
int groupid=Integer.parseInt(groupidstr);
List<GroupFile> gfs=getGroupFiles();
if(groupid>=0 && groupid<gfs.size())
{
gf=gfs.get(groupid);
}
}
catch(Exception err)
{
}
}
if(gf!=null)
{
handleGroup(gf);
return;
}
welcomePane();
}
/**
* @param args
*/
public static void main(String[] args)
{
new VcfRegistryCGI().instanceMainWithExit(args);
}
}