/* The MIT License (MIT) Copyright (c) 2015 Pierre Lindenbaum Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. BEGIN_DOC 4th column of the BED indexed with TABIX is a XML string. It will be processed with the xslt-stylesheet and should procuce a xml result <properties><entry key='key1'>value1</property><property key='key2'>values1</property></properies> INFO fields. Carriage returns will be removed." Parameters to be passed to the stylesheet: vcfchrom (string) vcfpos(int) vcfref(string) vcfalt(string). END_DOC */ package com.github.lindenb.jvarkit.tools.vcftabixml; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import java.util.regex.Pattern; import javax.xml.bind.JAXBContext; import javax.xml.bind.Unmarshaller; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; import javax.xml.bind.annotation.XmlValue; import javax.xml.transform.OutputKeys; import javax.xml.transform.Templates; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.github.lindenb.jvarkit.util.vcf.VcfIterator; import htsjdk.tribble.readers.TabixReader; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderVersion; import htsjdk.variant.vcf.VCFInfoHeaderLine; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.io.IOUtils; @Program(name="vcftabixml",description=" annotate a value from a vcf+xml file") public class VCFTabixml extends Launcher { private static final Logger LOG=Logger.build(VCFTabixml.class).make(); @Parameter(names={"-o","--output"},description="Output file. Optional . Default: stdout") private File fileout = null; @Parameter(names="-B",description=" BED file indexed with tabix. The 4th column *is* a XML string.)",required=true) private String BEDFILE=null; @Parameter(names="-xsl",description="x xslt-stylesheet. REQUIRED. Should produce a valid set of INFO field.",required=true) private File STYLESHEET=null; @Parameter(names="-F",description="file containing extra INFO headers line to add version: 4.1",required=true) public File TAGS=null; private Templates stylesheet=null; @XmlRootElement(name="entry") @XmlAccessorType(XmlAccessType.FIELD) public static class Property { @XmlAttribute(name="key") public String key; @XmlValue public String value; @Override public String toString() { return ""+key+"="+value+";"; } } @XmlRootElement(name="properties") @XmlAccessorType(XmlAccessType.PROPERTY) public static class Properties { private List<Property> property=new ArrayList<Property>(); @XmlElement(name="entry") public List<Property> getProperty() { return property; } public void setProperty(List<Property> property) { this.property = property; } @Override public String toString() { return property.toString(); } } @Override protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) { TabixReader tabixReader =null; try { LOG.info("opening BED"+BEDFILE); tabixReader=new TabixReader(this.BEDFILE); Pattern tab=Pattern.compile("[\t]"); LOG.info("loading xslt "+STYLESHEET); this.stylesheet=TransformerFactory.newInstance().newTemplates(new StreamSource(STYLESHEET)); Transformer transformer=this.stylesheet.newTransformer(); transformer.setOutputProperty(OutputKeys.METHOD,"xml"); final VCFHeader header=r.getHeader(); final VCFHeader h2=new VCFHeader(header); LOG.info("reading Tags "+TAGS); BufferedReader rT=IOUtils.openFileForBufferedReading(TAGS); String line; while((line=rT.readLine())!=null) { if(!line.startsWith(VCFHeader.METADATA_INDICATOR)) { throw new RuntimeException("should start with "+ VCFHeader.METADATA_INDICATOR +":"+line); } if (!line.startsWith(VCFConstants.INFO_HEADER_START) ) { throw new RuntimeException("should start with "+ VCFConstants.INFO_HEADER_START +":"+line); } VCFInfoHeaderLine hi=new VCFInfoHeaderLine(line.substring(7), VCFHeaderVersion.VCF4_1); if(hi.getCount()!=1) { throw new IllegalArgumentException("VCFHeaderLineCount not supported : "+hi); } switch(hi.getType()) { case String:break; default: throw new IllegalArgumentException("VCFHeaderLineTyoe not supported : "+hi); } LOG.info(hi.toString()); h2.addMetaDataLine(hi); } rT.close(); LOG.info("writing header"); w.writeHeader(h2); JAXBContext jaxbCtx=JAXBContext.newInstance(Properties.class,Property.class); Unmarshaller unmarshaller=jaxbCtx.createUnmarshaller(); while(r.hasNext()) { VariantContext ctx=r.next(); HashMap<String, Set<String>> insert=new LinkedHashMap<String,Set<String>>(); int[] array = tabixReader.parseReg(ctx.getContig()+":"+(ctx.getStart())+"-"+(ctx.getEnd()+1)); TabixReader.Iterator iter=null; if(array!=null && array.length==3 && array[0]!=-1 && array[1]>=0 && array[2]>=0) { iter=tabixReader.query(array[0],array[1],array[2]); } else { LOG.info("Cannot get "+ctx.getContig()+":"+(ctx.getStart())+"-"+(ctx.getEnd()+1)); } String line2=null; while(iter!=null && (line2=iter.next())!=null) { String tokens2[]=tab.split(line2,5); if(tokens2.length<4) { LOG.error("[VCFTabixml] VCF. Error not enough columns in tabix.line "+line2); return -1; } int chromStart=Integer.parseInt(tokens2[1]); int chromEnd=Integer.parseInt(tokens2[2]); if(chromStart+1!=chromEnd) { LOG.error("Error in "+this.BEDFILE+" extected start+1=end int "+tokens2[0]+":"+tokens2[1]+"-"+tokens2[2]); continue; } if(ctx.getStart()-1!=chromStart) continue; transformer.setParameter("vcfchrom",ctx.getContig()); transformer.setParameter("vcfpos",ctx.getStart()); transformer.setParameter("vcfref",ctx.getReference().getBaseString()); transformer.setParameter("vcfalt",ctx.getAltAlleleWithHighestAlleleCount().getBaseString()); try { StringWriter sw=new StringWriter(); StreamSource src=new StreamSource(new StringReader(tokens2[3])); StreamResult rez=new StreamResult(sw); transformer.transform(src, rez); Properties props=unmarshaller.unmarshal(new StreamSource(new StringReader(sw.toString())),Properties.class).getValue(); for(Property p:props.getProperty()) { if(p.key.isEmpty()) continue; if(h2.getInfoHeaderLine(p.key)==null) { LOG.info("ignoring key "+p.key+" you could set it to:\n"+ "##INFO=<ID="+p.key+",Number=1,Type=String,Description=\""+p.key+" from "+BEDFILE+"\">" ); continue; } Set<String> x=insert.get(p.key); if(x==null) { x=new LinkedHashSet<String>(); insert.put(p.key,x); } x.add(p.value); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("error",e); } } if(insert.isEmpty()) { w.add(ctx); continue; } VariantContextBuilder b=new VariantContextBuilder(ctx); for(String key:insert.keySet()) { for(String s2:insert.get(key)) { b.attribute(key,s2); break;//limit 1 } } w.add(b.make()); } return 0; } catch (IOException err) { err.printStackTrace(); return -1; } catch (Throwable err) { err.printStackTrace(); return -1; } } @Override public int doWork(List<String> args) { return doVcfToVcf(args, this.fileout); } public static void main(String[] args) throws Exception { new VCFTabixml().instanceMainWithExit(args); } }