package com.github.lindenb.jvarkit.tools.ga4gh; import java.io.File; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URLEncoder; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.util.HashSet; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.stream.Collectors; import javax.net.ssl.SSLContext; import org.apache.http.client.methods.HttpGet; import org.apache.http.entity.ContentType; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.github.lindenb.jvarkit.util.vcf.VcfIterator; import com.github.lindenb.semontology.Term; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.sleepycat.bind.tuple.StringBinding; import com.sleepycat.bind.tuple.TupleBinding; import com.sleepycat.bind.tuple.TupleInput; import com.sleepycat.bind.tuple.TupleOutput; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.Environment; import com.sleepycat.je.EnvironmentConfig; import com.sleepycat.je.LockMode; import com.sleepycat.je.OperationStatus; import com.sleepycat.je.Transaction; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; /* BEGIN_DOC END_DOC */ @Program( name="vcfannotwithbeacon", description="Annotate a VCF with ga4gh beacon", keywords={"ga4gh","beacon","vcf","annotation"}, terms={Term.ID_0000017} ) public class VcfAnnotWithBeacon extends Launcher { private static final Logger LOG=Logger.build(VcfAnnotWithBeacon.class).make(); @Parameter(names={"-o","--out"},description="output file . Default:stdout") private File outputFile = null; @Parameter(names={"-B","--bdb"},description="Optional BerkeleyDB directory to store result. Avoid to make the same calls to beacon") private File bdbDir = null; @Parameter(names={"--build"},description="genome build") private String genomeBuild = "HG19"; @Parameter(names={"--tag","-T"},description="INFO TAG") private String infoTag = "BEACON"; @Parameter(names={"--noupdate"},description="Don't query the variant already having the tag / do not update the existing annotation") private boolean dontUpdateIfInfoIsPresent = false; @Parameter(names={"--stopOnError"},description="Stop on network error.") private boolean stopOnNetworkError = false; @Parameter(names={"--baseurl"},description="Beacon Base URL API") private String baseurl="https://beacon-network.org/api"; @Parameter(names={"--cert"},description="ignore SSL certification errors") private boolean ignoreCertErrors = false; /** BerkeleyDB Environment to store results */ private Environment bdbEnv=null; /** BerkeleyDB beacon buffer */ private Database beaconDatabase=null; /** BerkeleyDB transaction */ private Transaction txn=null; private static class StoredResponse { long timeStamp; Set<String> foundIn=new HashSet<>(); } private static class StoredResponseBinding extends TupleBinding<StoredResponse> { @Override public StoredResponse entryToObject(TupleInput in) { StoredResponse st=new StoredResponse(); st.timeStamp = in.readLong(); int n= in.readInt(); for(int i=0;i< n;++i) st.foundIn.add(in.readString()); return st; } @Override public void objectToEntry(StoredResponse st, TupleOutput out) { out.writeLong(st.timeStamp); out.writeInt(st.foundIn.size()); for(final String sw:st.foundIn) out.writeString(sw.toString()); } } @Override protected int doVcfToVcf(String inputName,final VcfIterator iter,final VariantContextWriter out) { CloseableHttpClient httpClient=null; InputStream contentInputStream = null; try { final org.apache.http.impl.client.HttpClientBuilder hb=HttpClients.custom(); if (this.ignoreCertErrors) { // http://stackoverflow.com/questions/24720013/apache-http-client-ssl-certificate-error System.setProperty("jsse.enableSNIExtension", "false"); final SSLContext sslContext = org.apache.http.conn.ssl.SSLContexts.custom() .loadTrustMaterial(null, new org.apache.http.conn.ssl.TrustStrategy() { @Override public boolean isTrusted(final X509Certificate[] chain, final String authType) throws CertificateException { return true; } }).useTLS().build(); final org.apache.http.conn.ssl.SSLConnectionSocketFactory connectionFactory = new org.apache.http.conn.ssl.SSLConnectionSocketFactory( sslContext, new org.apache.http.conn.ssl.AllowAllHostnameVerifier()); hb.setSSLSocketFactory(connectionFactory); } httpClient = hb.build(); HttpGet httpGetRequest = null; final Set<String> available_chromosomes = new HashSet<>(); try { httpGetRequest = new HttpGet(baseurl+"/chromosomes"); httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType()); contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent(); JsonParser jsonparser = new JsonParser(); final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream)); Iterator<JsonElement> jsr = root.getAsJsonArray().iterator(); while (jsr.hasNext()) { final String ctg = jsr.next().getAsString(); available_chromosomes.add(ctg); } LOG.debug(available_chromosomes); } catch (final Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(contentInputStream); } final Set<String> available_alleles = new HashSet<>(); try { httpGetRequest = new HttpGet(baseurl+"/alleles"); httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType()); contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent(); JsonParser jsonparser = new JsonParser(); final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream)); Iterator<JsonElement> jsr = root.getAsJsonArray().iterator(); while (jsr.hasNext()) { final String allele = jsr.next().getAsString(); available_alleles.add(allele); } LOG.debug(available_alleles); } catch (final Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(contentInputStream); } final StoredResponseBinding storedResponseBinding = new StoredResponseBinding(); final VCFHeader header = new VCFHeader(iter.getHeader()); final VCFInfoHeaderLine infoHeaderLine = new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Tag inserted with " + getProgramName()); header.addMetaDataLine(infoHeaderLine); DatabaseEntry key = new DatabaseEntry(); DatabaseEntry data = new DatabaseEntry(); out.writeHeader(header); while (iter.hasNext()) { final VariantContext ctx = iter.next(); if (!ctx.isVariant() || ctx.getReference().isSymbolic()) { out.add(ctx); continue; } if (ctx.hasAttribute(infoHeaderLine.getID()) && this.dontUpdateIfInfoIsPresent) { out.add(ctx); continue; } String beaconContig = ctx.getContig(); if (!available_chromosomes.contains(beaconContig)) { if (beaconContig.startsWith("chr")) { beaconContig = beaconContig.substring(3); } if (!available_chromosomes.contains(beaconContig)) { out.add(ctx); continue; } } final List<Allele> altAlleles = ctx.getAlternateAlleles(); if (altAlleles.isEmpty()) { out.add(ctx); continue; } final Set<String> newInfo = new HashSet<>(); for (final Allele alt : altAlleles) { if (alt.isSymbolic() || alt.isNoCall()) continue; final StringBuilder buildUrl = new StringBuilder(); buildUrl.append("chrom="); buildUrl.append(URLEncoder.encode(beaconContig, "UTF-8")); buildUrl.append("&pos="); /* * "Coordinate within a chromosome. Position is a number and is 0-based" * . */ buildUrl.append(ctx.getStart() - 1); buildUrl.append("&allele="); final String allele; if (ctx.getReference().length() > alt.length()) { allele = "D";// del } else if (ctx.getReference().length() > alt.length()) { allele = "I";// ins } else { allele = alt.getDisplayString(); } if (!available_alleles.contains(allele)) continue; buildUrl.append(allele); buildUrl.append("&ref="); buildUrl.append(URLEncoder.encode(this.genomeBuild, "UTF-8")); final String queryUrl = buildUrl.toString(); boolean foundInBdb = false; Set<String> foundIn = null; if (this.beaconDatabase != null) { StringBinding.stringToEntry(queryUrl, key); if (this.beaconDatabase.get(this.txn, key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) { StoredResponse response = storedResponseBinding.entryToObject(data); if (response.timeStamp < 0) // TODO check how old is // that data { response = null; this.beaconDatabase.delete(this.txn, key); } if (response != null) { foundInBdb = true; foundIn = response.foundIn; } } } if (foundIn == null) { foundIn = new HashSet<>(); try { httpGetRequest = new HttpGet(baseurl+"/responses?" + queryUrl); httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType()); LOG.debug(httpGetRequest.getURI()); contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent(); JsonParser jsonparser = new JsonParser(); final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream)); Iterator<JsonElement> jsr = root.getAsJsonArray().iterator(); while (jsr.hasNext()) { final JsonObject b = jsr.next().getAsJsonObject(); if (!(b.has("beacon") && b.has("response"))) continue; final String beacon_id = b.get("beacon").getAsJsonObject().get("id").getAsString(); final JsonElement response_prim = b.get("response"); if (response_prim.isJsonPrimitive() && response_prim.getAsBoolean()) { foundIn.add(beacon_id); } } } catch (final Exception err) { LOG.error(err); if (stopOnNetworkError) { throw new RuntimeIOException(err); } } finally { CloserUtil.close(contentInputStream); } } if (this.beaconDatabase != null && !foundInBdb) { StoredResponse response = new StoredResponse(); response.timeStamp = System.currentTimeMillis(); response.foundIn = foundIn; } // 17&pos=41244981&=G&ref=GRCh37") newInfo.addAll( foundIn.stream().map(S -> alt.getDisplayString() + "|" + S).collect(Collectors.toSet())); } if (newInfo.isEmpty()) { out.add(ctx); continue; } final VariantContextBuilder vcb = new VariantContextBuilder(ctx); vcb.attribute(infoHeaderLine.getID(), new ArrayList<String>(newInfo)); out.add(vcb.make()); } return 0; } catch(final Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(httpClient); } } @Override public int doWork(final List<String> args) { try { if(this.bdbDir!=null) { LOG.info("open BDB "+this.bdbDir); IOUtil.assertDirectoryIsWritable(this.bdbDir); final EnvironmentConfig envCfg=new EnvironmentConfig(); envCfg.setAllowCreate(true); envCfg.setReadOnly(false); this.bdbEnv = new Environment(this.bdbDir, envCfg); final DatabaseConfig cfg=new DatabaseConfig(); cfg.setAllowCreate(true); cfg.setReadOnly(false); this.beaconDatabase = this.bdbEnv.openDatabase(this.txn,"ga4ghBeaconBuffer",cfg); } return doVcfToVcf(args, outputFile); } catch(final Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(beaconDatabase); CloserUtil.close(bdbEnv); } } public static void main(String[] args) { new VcfAnnotWithBeacon().instanceMainWithExit(args); } }