/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.app.cli.main.executors.analysis; import com.google.protobuf.util.JsonFormat; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import io.grpc.ManagedChannel; import io.grpc.ManagedChannelBuilder; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.variant.vcf4.VcfUtils; import org.opencb.biodata.models.common.protobuf.service.ServiceTypesModel; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.protobuf.VariantProto; import org.opencb.biodata.tools.variant.converters.VariantContextToAvroVariantConverter; import org.opencb.commons.datastore.core.*; import org.opencb.opencga.app.cli.analysis.options.VariantCommandOptions; import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.results.VariantQueryResult; import org.opencb.opencga.server.grpc.AdminServiceGrpc; import org.opencb.opencga.server.grpc.GenericServiceModel; import org.opencb.opencga.server.grpc.VariantServiceGrpc; import org.opencb.opencga.storage.core.manager.variant.VariantCatalogQueryUtils; import org.opencb.opencga.storage.core.manager.variant.VariantStorageManager; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import java.io.IOException; import java.io.PrintStream; import java.util.*; import java.util.concurrent.TimeUnit; /** * Created by pfurio on 15/08/16. */ public class VariantCommandExecutor extends OpencgaCommandExecutor { private VariantCommandOptions variantCommandOptions; private ManagedChannel channel = null; public VariantCommandExecutor(VariantCommandOptions variantCommandOptions) { super(variantCommandOptions.commonCommandOptions); this.variantCommandOptions = variantCommandOptions; } @Override public void execute() throws Exception { logger.debug("Executing variant command line"); String subCommandString = getParsedSubCommand(variantCommandOptions.jCommander); QueryResponse queryResponse = null; switch (subCommandString) { case "index": queryResponse = index(); break; case "query": queryResponse = query(); break; default: logger.error("Subcommand not valid"); break; } // ObjectMapper objectMapper = new ObjectMapper(); // System.out.println(objectMapper.writeValueAsString(queryResponse.getResponse())); createOutput(queryResponse); } private QueryResponse index() throws CatalogException, IOException { logger.debug("Indexing variant(s)"); String fileIds = variantCommandOptions.indexVariantCommandOptions.fileId; ObjectMap o = new ObjectMap(); o.putIfNotNull(VariantStorageEngine.Options.STUDY_ID.key(), variantCommandOptions.indexVariantCommandOptions.study); o.putIfNotNull("outDir", variantCommandOptions.indexVariantCommandOptions.outdir); o.putIfNotNull("transform", variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.transform); o.putIfNotNull("load", variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.load); o.putIfNotNull(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.excludeGenotype); o.putIfNotNull("includeExtraFields", variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.extraFields); o.putIfNotNull("aggregated", variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.aggregated); o.putIfNotNull(VariantStorageEngine.Options.CALCULATE_STATS.key(), variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.calculateStats); o.putIfNotNull(VariantStorageEngine.Options.ANNOTATE.key(), variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.annotate); o.putIfNotNull(VariantStorageEngine.Options.RESUME.key(), variantCommandOptions.indexVariantCommandOptions.genericVariantIndexOptions.resume); // o.putIfNotNull("overwrite", variantCommandOptions.indexCommandOptions.overwriteAnnotations); o.putAll(variantCommandOptions.commonCommandOptions.params); // return openCGAClient.getFileClient().index(fileIds, o); return openCGAClient.getVariantClient().index(fileIds, o); } private QueryResponse query() throws CatalogException, IOException, InterruptedException { logger.debug("Listing variants of a study."); VariantCommandOptions.VariantQueryCommandOptions queryCommandOptions = variantCommandOptions.queryVariantCommandOptions; String study = resolveStudy(queryCommandOptions.study); ObjectMap params = new ObjectMap(); params.putIfNotNull(VariantDBAdaptor.VariantQueryParams.ID.key(), queryCommandOptions.genericVariantQueryOptions.id); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.REGION.key(), queryCommandOptions.genericVariantQueryOptions.region); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.CHROMOSOME.key(), // queryCommandOptions.queryVariantsOptions.chromosome); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.GENE.key(), queryCommandOptions.genericVariantQueryOptions.gene); params.putIfNotNull(VariantDBAdaptor.VariantQueryParams.TYPE.key(), queryCommandOptions.genericVariantQueryOptions.type); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.REFERENCE.key(), queryCommandOptions.queryVariantsOptions.reference); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ALTERNATE.key(), queryCommandOptions.queryVariantsOptions.alternate); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.RETURNED_STUDIES.key(), queryCommandOptions.genericVariantQueryOptions.returnStudy); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), queryCommandOptions.genericVariantQueryOptions.returnSample); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.RETURNED_FILES.key(), queryCommandOptions.queryVariantsOptions.returnFile); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), study); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.FILES.key(), queryCommandOptions.genericVariantQueryOptions.file); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.RETURNED_FILES.key(), queryCommandOptions.genericVariantQueryOptions.returnFile); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.FILTER.key(), queryCommandOptions.genericVariantQueryOptions.filter); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.STATS_MAF.key(), queryCommandOptions.genericVariantQueryOptions.maf); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.STATS_MGF.key(), queryCommandOptions.genericVariantQueryOptions.mgf); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.MISSING_ALLELES.key(), queryCommandOptions.genericVariantQueryOptions.missingAlleleCount); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.MISSING_GENOTYPES.key(), queryCommandOptions.genericVariantQueryOptions.missingGenotypeCount); // queryOptions.put(VariantDBAdaptor.VariantQueryParams.ANNOTATION_EXISTS.key(), // queryCommandOptions.annotationExists); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.GENOTYPE.key(), queryCommandOptions.genericVariantQueryOptions.sampleGenotype); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.SAMPLES.key(), queryCommandOptions.genericVariantQueryOptions.samples); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE.key(), queryCommandOptions.genericVariantQueryOptions.consequenceType); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_XREF.key(), queryCommandOptions.genericVariantQueryOptions.annotXref); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_BIOTYPE.key(), queryCommandOptions.genericVariantQueryOptions.geneBiotype); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_PROTEIN_SUBSTITUTION.key(), queryCommandOptions.genericVariantQueryOptions.proteinSubstitution); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_POLYPHEN.key(), queryCommandOptions.queryVariantsOptions.polyphen); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_SIFT.key(), queryCommandOptions.queryVariantsOptions.sift); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSERVATION.key(), queryCommandOptions.genericVariantQueryOptions.conservation); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), queryCommandOptions.genericVariantQueryOptions.populationMaf); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), queryCommandOptions.genericVariantQueryOptions.populationFreqs); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_POPULATION_REFERENCE_FREQUENCY.key(), // queryCommandOptions.reference_frequency); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_TRANSCRIPTION_FLAGS.key(), queryCommandOptions.genericVariantQueryOptions.flags); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_GENE_TRAITS_ID.key(), queryCommandOptions.genericVariantQueryOptions.geneTraitId); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_GENE_TRAITS_NAME.key(), queryCommandOptions.genericVariantQueryOptions.geneTraitName); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_HPO.key(), queryCommandOptions.genericVariantQueryOptions.hpo); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_GO.key(), queryCommandOptions.genericVariantQueryOptions.go); // params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_EXPRESSION.key(), queryCommandOptions.genericVariantQueryOptions.expression); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_PROTEIN_KEYWORDS.key(), queryCommandOptions.genericVariantQueryOptions.proteinKeywords); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_DRUG.key(), queryCommandOptions.genericVariantQueryOptions.drugs); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.ANNOT_FUNCTIONAL_SCORE.key(), queryCommandOptions.genericVariantQueryOptions.functionalScore); params.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.UNKNOWN_GENOTYPE.key(), queryCommandOptions.genericVariantQueryOptions.unknownGenotype); // params.put(QueryOptions.SORT, queryCommandOptions.sort); // queryOptions.putIfNotEmpty("merge", queryCommandOptions.merge); params.putIfNotEmpty(VariantCatalogQueryUtils.SAMPLE_FILTER.key(), queryCommandOptions.sampleFilter); QueryOptions options = new QueryOptions(); options.putIfNotEmpty(QueryOptions.INCLUDE, queryCommandOptions.dataModelOptions.include); options.putIfNotEmpty(QueryOptions.EXCLUDE, queryCommandOptions.dataModelOptions.exclude); options.put(QueryOptions.LIMIT, queryCommandOptions.numericOptions.limit); options.put(QueryOptions.SKIP, queryCommandOptions.numericOptions.skip); options.put(QueryOptions.COUNT, queryCommandOptions.numericOptions.count); options.putAll(variantCommandOptions.commonCommandOptions.params); params.put("samplesMetadata", queryCommandOptions.genericVariantQueryOptions.samplesMetadata); params.putIfNotEmpty("groupBy", queryCommandOptions.genericVariantQueryOptions.groupBy); params.put("histogram", queryCommandOptions.genericVariantQueryOptions.histogram); params.put("interval", queryCommandOptions.genericVariantQueryOptions.interval); boolean grpc = usingGrpcMode(queryCommandOptions.mode); if (!grpc) { if (queryCommandOptions.numericOptions.count) { return openCGAClient.getVariantClient().count(params, options); } else if (queryCommandOptions.genericVariantQueryOptions.samplesMetadata || StringUtils.isNoneEmpty(queryCommandOptions.genericVariantQueryOptions.groupBy) || queryCommandOptions.genericVariantQueryOptions.histogram) { return openCGAClient.getVariantClient().genericQuery(params, options); } else { options.put(QueryOptions.SKIP_COUNT, true); params.put(VariantDBAdaptor.VariantQueryParams.SAMPLES_METADATA.key(), true); if (queryCommandOptions.commonOptions.outputFormat.equalsIgnoreCase("vcf") || queryCommandOptions.commonOptions.outputFormat.equalsIgnoreCase("text")) { VariantQueryResult<Variant> variantQueryResult = openCGAClient.getVariantClient().query2(params, options); List<String> annotations = queryCommandOptions.genericVariantQueryOptions.annotations == null ? Collections.singletonList("gene") : Arrays.asList(queryCommandOptions.genericVariantQueryOptions.annotations.split(",")); printVcf(variantQueryResult, queryCommandOptions.study, annotations, System.out); return null; } else { return openCGAClient.getVariantClient().query(params, options); } } } else { ManagedChannel channel = getManagedChannel(); // We use a blocking stub to execute the query to gRPC VariantServiceGrpc.VariantServiceBlockingStub variantServiceBlockingStub = VariantServiceGrpc.newBlockingStub(channel); params.putAll(options); Query query = VariantStorageManager.getVariantQuery(params); Map<String, String> queryMap = new HashMap<>(); Map<String, String> queryOptionsMap = new HashMap<>(); for (String key : params.keySet()) { if (query.containsKey(key)) { queryMap.put(key, query.getString(key)); } else { queryOptionsMap.put(key, params.getString(key)); } } // We create the OpenCGA gRPC request object with the query, queryOptions and sessionId GenericServiceModel.Request request = GenericServiceModel.Request.newBuilder() .putAllQuery(queryMap) .putAllOptions(queryOptionsMap) .setSessionId(sessionId == null ? "" : sessionId) .build(); QueryResponse queryResponse; if (queryCommandOptions.numericOptions.count) { ServiceTypesModel.LongResponse countResponse = variantServiceBlockingStub.count(request); ServiceTypesModel.Response response = countResponse.getResponse(); queryResponse = new QueryResponse<>("", 0, response.getWarning(), response.getError(), new QueryOptions(params), Collections.singletonList( new QueryResult<>(response.getId(), 0, 1, 1, "", "", Collections.singletonList(countResponse.getValue())))); return queryResponse; } else if (queryCommandOptions.genericVariantQueryOptions.samplesMetadata || StringUtils.isNoneEmpty(queryCommandOptions.genericVariantQueryOptions.groupBy) || queryCommandOptions.genericVariantQueryOptions.histogram) { queryResponse = openCGAClient.getVariantClient().genericQuery(params, options); } else { Iterator<VariantProto.Variant> variantIterator = variantServiceBlockingStub.get(request); JsonFormat.Printer printer = JsonFormat.printer(); try (PrintStream printStream = new PrintStream(System.out)) { while (variantIterator.hasNext()) { VariantProto.Variant next = variantIterator.next(); printStream.println(printer.print(next)); } } queryResponse = null; } channel.shutdown().awaitTermination(2, TimeUnit.SECONDS); return queryResponse; } } private boolean usingGrpcMode(String mode) { boolean grpc; switch (mode.toUpperCase()) { case "AUTO": grpc = isGrpcAvailable() == null; if (grpc) { logger.debug("Using GRPC mode"); } else { logger.debug("Using REST mode"); } break; case "GRPC": RuntimeException exception = isGrpcAvailable(); if (exception != null) { throw exception; } grpc = true; break; case "REST": grpc = false; break; default: throw new IllegalArgumentException("Unknown mode " + mode); } return grpc; } protected synchronized ManagedChannel getManagedChannel() { if (channel == null) { // Connecting to the server host and port String grpcServerHost = clientConfiguration.getGrpc().getHost(); logger.debug("Connecting to gRPC server at '{}'", grpcServerHost); // We create the gRPC channel to the specified server host and port channel = ManagedChannelBuilder.forTarget(grpcServerHost) .usePlaintext(true) .build(); } return channel; } protected RuntimeException isGrpcAvailable() { // Connecting to the server host and port try { ManagedChannel channel = getManagedChannel(); AdminServiceGrpc.AdminServiceBlockingStub stub = AdminServiceGrpc.newBlockingStub(channel); ServiceTypesModel.MapResponse status = stub.status(GenericServiceModel.Request.getDefaultInstance()); return null; } catch (RuntimeException e) { return e; } } private void printVcf(VariantQueryResult<Variant> variantQueryResult, String study, List<String> annotations, PrintStream outputStream) { logger.debug("Samples from variantQueryResult: {}", variantQueryResult.getSamples()); Map<String, List<String>> samplePerStudy = new HashMap<>(); // Aggregated studies do not contain samples if (variantQueryResult.getSamples() != null) { // We have to remove the user and project from the Study name variantQueryResult.getSamples().forEach((st, sampleList) -> { String study1 = st.split(":")[1]; samplePerStudy.put(study1, sampleList); }); } // Prepare samples for the VCF header List<String> samples = null; if (StringUtils.isEmpty(study)) { if (samplePerStudy.size() == 1) { study = samplePerStudy.keySet().iterator().next(); samples = samplePerStudy.get(study); } } else { if (study.contains(":")) { study = study.split(":")[1]; } else { if (clientConfiguration.getAlias().get(study) != null) { study = clientConfiguration.getAlias().get(study); if (study.contains(":")) { study = study.split(":")[1]; } } } samples = samplePerStudy.get(study); } // TODO move this to biodata if (samples == null) { samples = new ArrayList<>(); } // Prepare other VCF fields List<String> cohorts = Arrays.asList("ALL", "MXL"); List<String> formats = new ArrayList<>(); List<String> formatTypes = new ArrayList<>(); List<Integer> formatArities = new ArrayList<>(); List<String> formatDescriptions = new ArrayList<>(); if (clientConfiguration.getVariant() != null && clientConfiguration.getVariant().getIncludeFormats() != null) { String studyConfigAlias = null; if (clientConfiguration.getVariant().getIncludeFormats().get(study) != null) { studyConfigAlias = study; } else { // Search for the study alias if (clientConfiguration.getAlias() != null) { for (Map.Entry<String, String> stringStringEntry : clientConfiguration.getAlias().entrySet()) { if (stringStringEntry.getValue().contains(study)) { studyConfigAlias = stringStringEntry.getKey(); logger.debug("Updating study name by alias (key) when including formats: from " + study + " to " + studyConfigAlias); break; } } } } // create format arrays (names, types, arities, descriptions) String formatFields = clientConfiguration.getVariant().getIncludeFormats().get(studyConfigAlias); if (formatFields != null) { String[] fields = formatFields.split(","); for (String field : fields) { String[] subfields = field.split(":"); if (subfields.length == 4) { formats.add(subfields[0]); formatTypes.add(subfields[1]); if (StringUtils.isEmpty(subfields[2]) || !StringUtils.isNumeric(subfields[2])) { formatArities.add(1); logger.debug("Invalid arity for format " + subfields[0] + ", updating arity to 1"); } else { formatArities.add(Integer.parseInt(subfields[2])); } formatDescriptions.add(subfields[3]); } else { // We do not need the extra information fields for "GT", "AD", "DP", "GQ", "PL". formats.add(subfields[0]); formatTypes.add(""); formatArities.add(0); formatDescriptions.add(""); } } } else { logger.debug("No formats found for: {}, setting default format: {}", study, VcfUtils.DEFAULT_SAMPLE_FORMAT); formats = VcfUtils.DEFAULT_SAMPLE_FORMAT; } } else { logger.debug("No formats found for: {}, setting default format: {}", study, VcfUtils.DEFAULT_SAMPLE_FORMAT); formats = VcfUtils.DEFAULT_SAMPLE_FORMAT; } // TODO: modify VcfUtils in biodata project to take into account the formatArities VCFHeader vcfHeader = VcfUtils.createVCFHeader(cohorts, annotations, formats, formatTypes, formatDescriptions, samples, null); VariantContextWriter variantContextWriter = VcfUtils.createVariantContextWriter(outputStream, vcfHeader.getSequenceDictionary(), null); VariantContextToAvroVariantConverter variantContextToAvroVariantConverter = new VariantContextToAvroVariantConverter(study, samples, annotations); variantContextWriter.writeHeader(vcfHeader); for (Variant variant : variantQueryResult.getResult()) { VariantContext variantContext = variantContextToAvroVariantConverter.from(variant); variantContextWriter.add(variantContext); } variantContextWriter.close(); outputStream.close(); } }