package com.github.lindenb.jvarkit.util.vcf; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Map; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import com.github.lindenb.jvarkit.io.IOUtils; import htsjdk.samtools.util.CloserUtil; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFContigHeaderLine; import htsjdk.variant.vcf.VCFFilterHeaderLine; import htsjdk.variant.vcf.VCFFormatHeaderLine; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFInfoHeaderLine; /** * XML VCF writer * @author lindenb * */ public class XMLVcfWriterFactory { private static class XMLVcfWriter implements VariantContextWriter { private XMLStreamWriter writer=null; private OutputStream delegateOut=null; private VCFHeader header=null; private Map<String,XMLInfoHandler> info2handler= new HashMap<String,XMLInfoHandler>(); private Map<String,XMLFormatHandler> format2handler= new HashMap<String,XMLFormatHandler>(); @Override public boolean checkError() { if( delegateOut==null) return false; if( !(delegateOut instanceof java.io.PrintStream) ) return false; return java.io.PrintStream.class.cast(delegateOut).checkError(); } private XMLVcfWriter() { info2handler.put("DP4",new DP4Handler()); info2handler.put("PV4",new PV4Handler()); } @SuppressWarnings("unused") public void putInfoHandler(XMLInfoHandler handler) { this.info2handler.put(handler.getKey(), handler); } protected void start(String tag) throws XMLStreamException { this.writer.writeStartElement(tag); } protected void attribute(String tag,Object o) throws XMLStreamException { this.writer.writeAttribute(tag, String.valueOf(o)); } protected void end() throws XMLStreamException { this.writer.writeEndElement(); } protected void element(String tag,Object content) throws XMLStreamException { if(content==null) { this.writer.writeEmptyElement(tag); return; } start(tag); characters(content); end(); } protected void characters(Object content) throws XMLStreamException { if(content==null) return; this.writer.writeCharacters(String.valueOf(content)); } @Override public void writeHeader(VCFHeader header) { if(this.header!=null) throw new RuntimeException("Header was already written"); this.header=header; try { start("vcf"); start("header"); if(header.getInfoHeaderLines()!=null) { start("infos"); for(VCFInfoHeaderLine h:header.getInfoHeaderLines()) { start("info"); attribute("key",h.getID()); attribute("countType",h.getCountType()); if(h.getCountType()==VCFHeaderLineCount.INTEGER) { attribute("count",h.getCount()); } if(h.getValue()!=null && !h.getValue().isEmpty()) attribute("value",h.getValue()); characters(h.getDescription()); end(); if(!info2handler.containsKey(h.getID())) { XMLInfoHandler handler= new DefaultXMLInfoHandler(h); info2handler.put(h.getID(),handler); } } end(); } if(header.getFormatHeaderLines()!=null) { start("formats"); for(VCFFormatHeaderLine h: header.getFormatHeaderLines()) { start("format"); attribute("key",h.getID()); attribute("countType",h.getCountType()); if(h.getCountType()==VCFHeaderLineCount.INTEGER) { attribute("count",h.getCount()); } if(h.getValue()!=null && !h.getValue().isEmpty()) attribute("value",h.getValue()); characters(h.getDescription()); end(); if(!format2handler.containsKey(h.getID())) { XMLFormatHandler handler= new DefaultXMLFormatHandler(h); format2handler.put(h.getID(),handler); } } end(); } if(header.getFilterLines()!=null) { start("filters"); for(VCFFilterHeaderLine h: header.getFilterLines()) { start("filter"); element("key",h.getID()); characters(h.getValue()); end(); } end(); } if(header.getContigLines()!=null) { start("contigs"); for(VCFContigHeaderLine h:header.getContigLines()) { start("contig"); attribute("tid", h.getContigIndex()); element("key",h.getID()); characters(h.getValue()); end(); } end(); } if(header.getSampleNamesInOrder()!=null) { start("samples"); for(String name:header.getSampleNamesInOrder()) { start("sample"); attribute("index", header.getSampleNameToOffset().get(name)); characters(name); end(); } end(); } if(header.getMetaDataInInputOrder()!=null) { start("metas"); for(VCFHeaderLine meta:header.getMetaDataInInputOrder()) { if(meta.getKey().equals("INFO"))continue; if(meta.getKey().equals("FORMAT"))continue; if(meta.getKey().equals("contig"))continue; if(meta.getKey().equals("FILTER"))continue; if(meta.getKey().equals("fileformat"))continue; start("meta"); attribute("key", meta.getKey()); characters(meta.getValue()); end(); } end(); } end();//header start("variations"); characters("\n"); } catch (XMLStreamException e) { e.printStackTrace(); throw new RuntimeException(String.valueOf(e.getMessage()),e); } } // should we write genotypes or just sites? private boolean doNotWriteGenotypes=false; @Override public void add(VariantContext variant) { if(this.header==null) throw new RuntimeException("No header was written."); try { if ( doNotWriteGenotypes ) variant = new VariantContextBuilder(variant).noGenotypes().make(); start("variation"); element("chrom",variant.getContig()); element("start",variant.getStart()); element("end",variant.getEnd()); if(variant.hasID()) { element("id",variant.getID()); } element("ref",variant.getReference().getDisplayString()); if ( variant.isVariant() ) { for(Allele a:variant.getAlternateAlleles()) { element("alt",a.getDisplayString()); } } if(variant.hasLog10PError()) { element("qual", variant.getPhredScaledQual()); } if(variant.isFiltered() || variant.filtersWereApplied()) { start("filters"); if(variant.isFiltered()) { for(String s: variant.getFilters()) { element("filter",s); } } else if(variant.filtersWereApplied()) { element("filter",VCFConstants.PASSES_FILTERS_v4); } end(); } if(variant.getAttributes()!=null) { start("infos"); Map<String,Object> infos=variant.getAttributes(); for(String key:infos.keySet()) { XMLInfoHandler infoHandler=this.info2handler.get(key); if(infoHandler==null) continue; infoHandler.handle(this.header,this.writer, variant); } end(); } if(variant.hasGenotypes()) { start("genotypes"); for(String sample:variant.getSampleNames()) { Genotype g=variant.getGenotype(sample); if(g==null) continue; start("genotype"); attribute("available",g.isAvailable()); attribute("called",g.isCalled()); attribute("het",g.isHet()); attribute("hom",g.isHom()); attribute("homRef",g.isHomRef()); attribute("homVar",g.isHomVar()); attribute("mixed",g.isMixed()); attribute("noCall",g.isNoCall()); attribute("nonInformative",g.isNonInformative()); attribute("filtered",g.isFiltered()); attribute("phased",g.isPhased()); attribute("sample",g.getSampleName()); if(g.hasAD()) { start("AD"); for(int ad:g.getAD()) { element("value", ad); } end(); } if(g.hasDP()) { element("DP", g.getDP()); } if(g.hasGQ()) { element("GQ", g.getGQ()); } if(g.hasPL()) { start("PL"); int index=0; for(int v:g.getPL()) { start("value"); attribute("index", ++index); characters(v); end(); } end(); } start("alleles"); for(Allele a:g.getAlleles()) { if(a.isNoCall()) continue; if(a.getBaseString().isEmpty()) continue; if(a.getBaseString().equals(".")) continue; start("allele"); if(a.isReference()) attribute("ref", a.isReference()); if(a.isSymbolic()) attribute("symbolic",true); characters(a.getBaseString()); end(); } end(); Map<String,Object> xatt=g.getExtendedAttributes(); if(xatt!=null) { for(String key:xatt.keySet()) { XMLFormatHandler fmtHandler=this.format2handler.get(key); if(fmtHandler==null) continue; fmtHandler.handle(this.writer, variant,g); } } end(); } end(); } end();//variation characters("\n"); } catch (XMLStreamException e) { throw new RuntimeException(e); } } @Override public void close() { if(this.writer==null) return; if(this.header==null) throw new RuntimeException("No header was written."); try { end();//variations end();//vcf writer.close(); if(this.delegateOut!=null) {delegateOut.flush(); delegateOut.close();} this.writer=null; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("close failed",e); } } public static interface XMLFormatHandler { public String getKey(); public void handle( XMLStreamWriter w, final VariantContext ctx, final Genotype g ) throws XMLStreamException; } public static interface XMLInfoHandler { public String getKey(); public void handle( VCFHeader header, XMLStreamWriter w, final VariantContext ctx ) throws XMLStreamException; } public static abstract class AbstractXMLInfoHandler implements XMLInfoHandler { protected void handleObject(VCFHeader header,XMLStreamWriter w,int index,Object o) throws XMLStreamException { w.writeStartElement(getKey()); if(index>=0) w.writeAttribute("index", String.valueOf(index)); w.writeCharacters(String.valueOf(o)); w.writeEndElement(); } @SuppressWarnings("rawtypes") protected void handleArray(VCFHeader header,XMLStreamWriter w,Collection array) throws XMLStreamException { int index=0; for(Object o2:array) handleObject(header,w,++index,o2); } @SuppressWarnings("rawtypes") @Override public void handle( VCFHeader header, XMLStreamWriter w, final VariantContext ctx ) throws XMLStreamException { Object o=ctx.getAttribute(this.getKey()); if(o==null) return; if(o.getClass().isArray()) { Object array[]=(Object[])o; if(array.length==0) return; handleArray(header,w,Arrays.asList(array)); } else if(o instanceof Collection) { Collection array=(Collection)o; if(array.isEmpty()) return; handleArray(header,w,array); } else { handleObject(header,w,-1,o); } } } public static class DefaultXMLInfoHandler extends AbstractXMLInfoHandler { private VCFInfoHeaderLine vihl; public DefaultXMLInfoHandler(VCFInfoHeaderLine vihl) { this.vihl=vihl; } @Override public String getKey() { return vihl.getID(); } } public static abstract class AbstractXMLFormatHandler implements XMLFormatHandler { protected void handleObject(XMLStreamWriter w,Object o) throws XMLStreamException { w.writeStartElement(getKey()); w.writeCharacters(String.valueOf(o)); w.writeEndElement(); } @SuppressWarnings("rawtypes") @Override public void handle( XMLStreamWriter w, final VariantContext ctx, final Genotype g ) throws XMLStreamException { Object o=g.getExtendedAttribute(this.getKey()); if(o==null) return; if(o.getClass().isArray()) { Object array[]=(Object[])o; if(array.length==0) return; for(Object o2:array) handleObject(w,o2); } else if(o instanceof Collection) { Collection array=(Collection)o; if(array.isEmpty()) return; for(Object o2:array) handleObject(w,o2); } else { handleObject(w,o); } } } public static class DefaultXMLFormatHandler extends AbstractXMLFormatHandler { private VCFFormatHeaderLine vfhl; public DefaultXMLFormatHandler(VCFFormatHeaderLine vfhl) { this.vfhl=vfhl; } @Override public String getKey() { return vfhl.getID(); } } private static class DP4Handler extends AbstractXMLInfoHandler { @Override protected void handleObject(VCFHeader header, XMLStreamWriter w, int index, Object o) throws XMLStreamException { } @SuppressWarnings("rawtypes") @Override protected void handleArray(VCFHeader header, XMLStreamWriter w, Collection array) throws XMLStreamException { if(array.size()!=4) return; w.writeStartElement(getKey()); int i=0; for(Object o:array) { switch(i) { case 0: w.writeStartElement("ref-forward"); break; case 1: w.writeStartElement("ref-reverse"); break; case 2: w.writeStartElement("alt-forward"); break; case 3: w.writeStartElement("alt-reverse"); break; default: throw new XMLStreamException("bad index"); } w.writeCharacters(String.valueOf(o)); w.writeEndElement(); i++; } w.writeEndElement(); } @Override public String getKey() { return "DP4"; } } private static class PV4Handler extends AbstractXMLInfoHandler { @Override protected void handleObject(VCFHeader header, XMLStreamWriter w, int index, Object o) throws XMLStreamException { } @Override protected void handleArray(VCFHeader header, XMLStreamWriter w, @SuppressWarnings("rawtypes") Collection array) throws XMLStreamException { if(array.size()!=4) return; w.writeStartElement(getKey()); int i=0; for(Object o:array) { switch(i) { case 0: w.writeStartElement("strand-bias"); break; case 1: w.writeStartElement("baseQ-bias"); break; case 2: w.writeStartElement("mapQ-bias"); break; case 3: w.writeStartElement("tail-distance-bias"); break; default: throw new XMLStreamException("bad index"); } w.writeCharacters(String.valueOf(o)); w.writeEndElement(); i++; } w.writeEndElement(); } @Override public String getKey() { return "PV4"; } } } private File outputFile=null; private XMLVcfWriterFactory() { } public void setOutputFile(File out) { this.outputFile=out; } public static XMLVcfWriterFactory newInstance() { return new XMLVcfWriterFactory(); } public VariantContextWriter createVariantContextWriter() throws IOException { XMLVcfWriter w=new XMLVcfWriter(); try { XMLOutputFactory xmlfactory= XMLOutputFactory.newInstance(); if(this.outputFile!=null) { w.delegateOut=IOUtils.openFileForWriting(this.outputFile); } else { w.delegateOut=System.out; } w.writer = xmlfactory.createXMLStreamWriter(w.delegateOut); } catch (XMLStreamException e) { CloserUtil.close(w.writer);; CloserUtil.close(w.delegateOut);; throw new IOException(e); } return w; } }