/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.server.rest.analysis;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.annotations.*;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResponse;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.catalog.db.api.SampleDBAdaptor;
import org.opencb.opencga.catalog.models.Job;
import org.opencb.opencga.catalog.models.Sample;
import org.opencb.opencga.core.exception.VersionException;
import org.opencb.opencga.storage.core.manager.variant.VariantCatalogQueryUtils;
import org.opencb.opencga.storage.core.manager.variant.VariantStorageManager;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.analysis.VariantSampleFilter;
import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager;
import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.*;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
import java.io.IOException;
import java.util.*;
import static org.opencb.opencga.storage.core.variant.VariantStorageEngine.Options.*;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.*;
/**
* Created by imedina on 17/08/16.
*/
@Path("/{version}/analysis/variant")
@Produces(MediaType.APPLICATION_JSON)
@Api(value = "Analysis - Variant", position = 4, description = "Methods for working with 'files' endpoint")
public class VariantAnalysisWSService extends AnalysisWSService {
public VariantAnalysisWSService(@Context UriInfo uriInfo, @Context HttpServletRequest httpServletRequest)
throws IOException, VersionException {
super(uriInfo, httpServletRequest);
}
public VariantAnalysisWSService(String version, @Context UriInfo uriInfo, @Context HttpServletRequest httpServletRequest)
throws IOException, VersionException {
super(version, uriInfo, httpServletRequest);
}
@GET
@Path("/index")
@ApiOperation(value = "Index variant files", position = 14, response = QueryResponse.class)
public Response index(@ApiParam(value = "(DEPRECATED) Comma separated list of file ids (files or directories)", hidden = true)
@QueryParam (value = "fileId") String fileIdStrOld,
@ApiParam(value = "Comma separated list of file ids (files or directories)", required = true)
@QueryParam(value = "file") String fileIdStr,
// Study id is not ingested by the analysis index command line. No longer needed.
@ApiParam(value = "(DEPRECATED) Study id", hidden = true) @QueryParam("studyId") String studyStrOld,
@ApiParam(value = "Study [[user@]project:]study where study and project can be either the id or alias")
@QueryParam("study") String studyStr,
@ApiParam("Output directory id") @QueryParam("outDir") String outDirStr,
@ApiParam("Boolean indicating that only the transform step will be run") @DefaultValue("false") @QueryParam("transform") boolean transform,
@ApiParam("Boolean indicating that only the load step will be run") @DefaultValue("false") @QueryParam("load") boolean load,
@ApiParam("Comma separated list of fields to be include in the index") @QueryParam("includeExtraFields") String includeExtraFields,
@ApiParam("Type of aggregated VCF file: none, basic, EVS or ExAC") @DefaultValue("none") @QueryParam("aggregated") String aggregated,
@ApiParam("Calculate indexed variants statistics after the load step") @DefaultValue("false") @QueryParam("calculateStats") boolean calculateStats,
@ApiParam("Annotate indexed variants after the load step") @DefaultValue("false") @QueryParam("annotate") boolean annotate,
@ApiParam("Overwrite annotations already present in variants") @DefaultValue("false") @QueryParam("overwrite") boolean overwriteAnnotations) {
if (StringUtils.isNotEmpty(fileIdStrOld)) {
fileIdStr = fileIdStrOld;
}
if (StringUtils.isNotEmpty(studyStrOld)) {
studyStr = studyStrOld;
}
Map<String, String> params = new LinkedHashMap<>();
addParamIfNotNull(params, "study", studyStr);
addParamIfNotNull(params, "outdir", outDirStr);
addParamIfTrue(params, "transform", transform);
addParamIfTrue(params, "load", load);
addParamIfNotNull(params, EXTRA_GENOTYPE_FIELDS.key(), includeExtraFields);
addParamIfNotNull(params, AGGREGATED_TYPE.key(), aggregated);
addParamIfTrue(params, CALCULATE_STATS.key(), calculateStats);
addParamIfTrue(params, ANNOTATE.key(), annotate);
addParamIfTrue(params, VariantAnnotationManager.OVERWRITE_ANNOTATIONS, overwriteAnnotations);
Set<String> knownParams = new HashSet<>();
knownParams.add("study");
knownParams.add("studyId");
knownParams.add("outDir");
knownParams.add("transform");
knownParams.add("load");
knownParams.add("includeExtraFields");
knownParams.add("aggregated");
knownParams.add("calculateStats");
knownParams.add("annotate");
knownParams.add("overwrite");
knownParams.add("sid");
knownParams.add("include");
knownParams.add("exclude");
// Add other params
query.forEach((key, value) -> {
if (!knownParams.contains(key)) {
if (value != null) {
params.put(key, value.toString());
}
}
});
logger.info("ObjectMap: {}", params);
try {
QueryResult queryResult = catalogManager.getFileManager().index(fileIdStr, studyStr, "VCF", params, sessionId);
return createOkResponse(queryResult);
} catch(Exception e) {
return createErrorResponse(e);
}
}
@GET
@Path("/query")
@ApiOperation(value = "Fetch variants from a VCF/gVCF file", position = 15, response = Variant[].class)
@ApiImplicitParams({
@ApiImplicitParam(name = QueryOptions.INCLUDE, value = "Fields included in the response, whole JSON path must be provided", example = "name,attributes", dataType = "string", paramType = "query"),
@ApiImplicitParam(name = QueryOptions.EXCLUDE, value = "Fields excluded in the response, whole JSON path must be provided", example = "id,status", dataType = "string", paramType = "query"),
@ApiImplicitParam(name = QueryOptions.LIMIT, value = "Number of results to be returned in the queries", dataType = "integer", paramType = "query"),
@ApiImplicitParam(name = QueryOptions.SKIP, value = "Number of results to skip in the queries", dataType = "integer", paramType = "query"),
@ApiImplicitParam(name = QueryOptions.COUNT, value = "Total number of results", dataType = "boolean", paramType = "query")
})
public Response getVariants(@ApiParam(value = "List of variant ids") @QueryParam("ids") String ids,
@ApiParam(value = "List of regions: {chr}:{start}-{end}") @QueryParam("region") String region,
@ApiParam(value = "List of chromosomes") @QueryParam("chromosome") String chromosome,
@ApiParam(value = "List of genes") @QueryParam("gene") String gene,
@ApiParam(value = "Variant type: [SNV, MNV, INDEL, SV, CNV]") @QueryParam("type") String type,
@ApiParam(value = "Reference allele") @QueryParam("reference") String reference,
@ApiParam(value = "Main alternate allele") @QueryParam("alternate") String alternate,
@ApiParam(value = "", required = true) @QueryParam("studies") String studies,
@ApiParam(value = "List of studies to be returned") @QueryParam("returnedStudies") String returnedStudies,
@ApiParam(value = "List of samples to be returned") @QueryParam("returnedSamples") String returnedSamples,
@ApiParam(value = "List of files to be returned.") @QueryParam("returnedFiles") String returnedFiles,
@ApiParam(value = "Variants in specific files") @QueryParam("files") String files,
@ApiParam(value = VariantDBAdaptor.FILTER_DESCR) @QueryParam("filter") String filter,
@ApiParam(value = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}") @QueryParam("maf") String maf,
@ApiParam(value = "Minor Genotype Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}") @QueryParam("mgf") String mgf,
@ApiParam(value = "Number of missing alleles: [{study:}]{cohort}[<|>|<=|>=]{number}") @QueryParam("missingAlleles") String missingAlleles,
@ApiParam(value = "Number of missing genotypes: [{study:}]{cohort}[<|>|<=|>=]{number}") @QueryParam("missingGenotypes") String missingGenotypes,
@ApiParam(value = "Specify if the variant annotation must exists.") @QueryParam("annotationExists") boolean annotationExists,
@ApiParam(value = "Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)* e.g. HG0097:0/0;HG0098:0/1,1/1") @QueryParam("genotype") String genotype,
@ApiParam(value = VariantDBAdaptor.SAMPLES_DESCR) @QueryParam("samples") String samples,
@ApiParam(value = VariantCatalogQueryUtils.SAMPLE_FILTER_DESC) @QueryParam("sampleFilter") String sampleFilter,
@ApiParam(value = "Consequence type SO term list. e.g. missense_variant,stop_lost or SO:0001583,SO:0001578") @QueryParam("annot-ct") String annot_ct,
@ApiParam(value = "XRef") @QueryParam("annot-xref") String annot_xref,
@ApiParam(value = "Biotype") @QueryParam("annot-biotype") String annot_biotype,
@ApiParam(value = "Polyphen, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. <=0.9 , =benign") @QueryParam("polyphen") String polyphen,
@ApiParam(value = "Sift, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. >0.1 , ~=tolerant") @QueryParam("sift") String sift,
// @ApiParam(value = "") @QueryParam("protein_substitution") String protein_substitution,
@ApiParam(value = "Conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1") @QueryParam("conservation") String conservation,
@ApiParam(value = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}") @QueryParam("annot-population-maf") String annotPopulationMaf,
@ApiParam(value = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}") @QueryParam("alternate_frequency") String alternate_frequency,
@ApiParam(value = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}") @QueryParam("reference_frequency") String reference_frequency,
@ApiParam(value = "List of transcript annotation flags. e.g. CCDS, basic, cds_end_NF, mRNA_end_NF, cds_start_NF, mRNA_start_NF, seleno") @QueryParam("annot-transcription-flags") String transcriptionFlags,
@ApiParam(value = "List of gene trait association id. e.g. \"umls:C0007222\" , \"OMIM:269600\"") @QueryParam("annot-gene-trait-id") String geneTraitId,
@ApiParam(value = "List of gene trait association names. e.g. \"Cardiovascular Diseases\"") @QueryParam("annot-gene-trait-name") String geneTraitName,
@ApiParam(value = "List of HPO terms. e.g. \"HP:0000545\"") @QueryParam("annot-hpo") String hpo,
@ApiParam(value = "List of GO (Genome Ontology) terms. e.g. \"GO:0002020\"") @QueryParam("annot-go") String go,
@ApiParam(value = "List of tissues of interest. e.g. \"tongue\"") @QueryParam("annot-expression") String expression,
@ApiParam(value = "List of protein variant annotation keywords") @QueryParam("annot-protein-keywords") String proteinKeyword,
@ApiParam(value = "List of drug names") @QueryParam("annot-drug") String drug,
@ApiParam(value = "Perform a full-text search on a list of traits") @QueryParam("traits") String traits,
@ApiParam(value = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3") @QueryParam("annot-functional-score") String functional,
@ApiParam(value = "Returned genotype for unknown genotypes. Common values: [0/0, 0|0, ./.]") @QueryParam("unknownGenotype") String unknownGenotype,
// @ApiParam(value = "Limit the number of returned variants. Max value: " + VariantFetcher.LIMIT_MAX) @DefaultValue(""+VariantFetcher.LIMIT_DEFAULT) @QueryParam("limit") int limit,
// @ApiParam(value = "Skip some number of variants.") @QueryParam("skip") int skip,
@ApiParam(value = "Returns the samples metadata group by study. Sample names will appear in the same order as their corresponding genotypes.", required = false) @QueryParam("samplesMetadata") boolean samplesMetadata,
@ApiParam(value = "Sort the results", required = false) @QueryParam("sort") boolean sort,
@ApiParam(value = "Group variants by: [ct, gene, ensemblGene]", required = false) @DefaultValue("") @QueryParam("groupBy") String groupBy,
@ApiParam(value = "Calculate histogram. Requires one region.", required = false) @DefaultValue("false") @QueryParam("histogram") boolean histogram,
@ApiParam(value = "Histogram interval size", required = false) @DefaultValue("2000") @QueryParam("interval") int interval,
@ApiParam(value = "Fetch summary data from Solr", required = false) @QueryParam("summary") boolean summary,
@ApiParam(value = "Merge results", required = false) @DefaultValue("false") @QueryParam("merge") boolean merge) {
try {
List<QueryResult> queryResults = new LinkedList<>();
QueryResult queryResult = null;
// Get all query options
QueryOptions queryOptions = new QueryOptions(uriInfo.getQueryParameters(), true);
queryOptions.put("summary", summary);
Query query = VariantStorageManager.getVariantQuery(queryOptions);
if (count) {
queryResult = variantManager.count(query, sessionId);
} else if (histogram) {
queryResult = variantManager.getFrequency(query, interval, sessionId);
} else if (StringUtils.isNotEmpty(groupBy)) {
queryResult = variantManager.groupBy(groupBy, query, queryOptions, sessionId);
} else {
queryResult = variantManager.get(query, queryOptions, sessionId);
// System.out.println("queryResult = " + jsonObjectMapper.writeValueAsString(queryResult));
// VariantQueryResult variantQueryResult = variantManager.get(query, queryOptions, sessionId);
// queryResults.add(variantQueryResult);
}
queryResults.add(queryResult);
return createOkResponse(queryResults);
} catch (Exception e) {
return createErrorResponse(e);
}
}
/**
* Do not use native values (like boolean or int), so they are null by default.
*/
private static class VariantQueryParams {
public String ids;
public String region;
public String chromosome;
public String gene;
public String type;
public String reference;
public String alternate;
public String studies;
public String returnedStudies;
public String returnedSamples;
public String returnedFiles;
public String files;
public String filter;
public String maf;
public String mgf;
public String missingAlleles;
public String missingGenotypes;
public Boolean annotationExists;
public String genotype;
@JsonProperty("annot-ct")
// @ApiModelProperty(name = "annot-ct")
public String annot_ct;
@JsonProperty("annot-xref")
public String annot_xref;
@JsonProperty("annot-biotype")
public String annot_biotype;
public String polyphen;
public String sift;
// public String protein_substitution;
public String conservation;
@JsonProperty("annot-population-maf")
public String annotPopulationMaf;
public String alternate_frequency;
public String reference_frequency;
@JsonProperty("annot-transcription-flags")
public String transcriptionFlags;
@JsonProperty("annot-gene-trait-id")
public String geneTraitId;
@JsonProperty("annot-gene-trait-name")
public String geneTraitName;
@JsonProperty("annot-hpo")
public String hpo;
@JsonProperty("annot-go")
public String go;
@JsonProperty("annot-expression")
public String expression;
@JsonProperty("annot-protein-keywords")
public String proteinKeyword;
@JsonProperty("annot-drug")
public String drug;
@JsonProperty("annot-functional-score")
public String functional;
public String unknownGenotype;
public boolean samplesMetadata = false;
public boolean sort = false;
public String groupBy;
public boolean histogram = false;
public int interval = 2000;
public boolean merge = false;
}
@POST
@Path("/query")
@ApiOperation(value = "Fetch variants from a VCF/gVCF file", position = 15, response = Variant[].class)
@ApiImplicitParams({
@ApiImplicitParam(name = "include", value = "Fields included in the response, whole JSON path must be provided",
example = "name,attributes", dataType = "string", paramType = "query"),
@ApiImplicitParam(name = "exclude", value = "Fields excluded in the response, whole JSON path must be provided",
example = "id,status", dataType = "string", paramType = "query"),
@ApiImplicitParam(name = "limit", value = "Number of results to be returned in the queries", dataType = "integer",
paramType = "query"),
@ApiImplicitParam(name = "skip", value = "Number of results to skip in the queries", dataType = "integer", paramType = "query"),
@ApiImplicitParam(name = "count", value = "Total number of results", dataType = "boolean", paramType = "query")
})
public Response getVariants(@ApiParam(name = "params", value = "Query parameters", required = true) VariantQueryParams params) {
logger.info("count {} , limit {} , skip {}", count, limit, skip);
try {
List<QueryResult> queryResults = new LinkedList<>();
QueryResult queryResult;
// Get all query options
QueryOptions postParams = new QueryOptions(jsonObjectMapper.writeValueAsString(params));
QueryOptions queryOptions = new QueryOptions(uriInfo.getQueryParameters(), true);
Query query = VariantStorageManager.getVariantQuery(postParams);
logger.info("query " + query.toJson());
logger.info("postParams " + postParams.toJson());
logger.info("queryOptions " + queryOptions.toJson());
if (count) {
queryResult = variantManager.count(query, sessionId);
} else if (params.histogram) {
queryResult = variantManager.getFrequency(query, params.interval, sessionId);
} else if (StringUtils.isNotEmpty(params.groupBy)) {
queryResult = variantManager.groupBy(params.groupBy, query, queryOptions, sessionId);
} else {
queryResult = variantManager.get(query, queryOptions, sessionId);
}
queryResults.add(queryResult);
return createOkResponse(queryResults);
} catch (Exception e) {
return createErrorResponse(e);
}
}
@GET
@Path("/samples")
@ApiOperation(value = "Get samples given a set of variants", position = 14, response = Sample.class)
@ApiImplicitParams({
@ApiImplicitParam(name = "ids", value = ID_DESCR, dataType = "string", paramType = "query"),
@ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"),
@ApiImplicitParam(name = "chromosome", value = CHROMOSOME_DESCR, dataType = "string", paramType = "query"),
@ApiImplicitParam(name = "gene", value = GENE_DESCR, dataType = "string", paramType = "query"),
@ApiImplicitParam(name = "type", value = TYPE_DESCR, dataType = "string", paramType = "query"),
@ApiImplicitParam(name = QueryOptions.INCLUDE, value = "Fields included in the response, whole JSON path must be provided", example = "name,attributes", dataType = "string", paramType = "query"),
@ApiImplicitParam(name = QueryOptions.EXCLUDE, value = "Fields excluded in the response, whole JSON path must be provided", example = "id,status", dataType = "string", paramType = "query")
})
public Response samples(
@ApiParam(value = "Study where all the samples belong to") @QueryParam("study") String study,
@ApiParam(value = "List of samples to check. By default, all samples") @QueryParam("samples") String samples,
@ApiParam(value = "Genotypes that the sample must have to be selected") @QueryParam("genotypes") @DefaultValue("0/1,1/1") String genotypesStr,
@ApiParam(value = "Samples must be present in ALL variants or in ANY variant.") @QueryParam("all") @DefaultValue("false") boolean all
) {
try {
VariantSampleFilter variantSampleFilter = new VariantSampleFilter(variantManager.iterable(sessionId));
List<String> genotypes = Arrays.asList(genotypesStr.split(","));
QueryOptions queryOptions = new QueryOptions(uriInfo.getQueryParameters(), true);
Query query = VariantStorageManager.getVariantQuery(queryOptions);
if (StringUtils.isNotEmpty(samples)) {
query.append(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), Arrays.asList(samples.split(",")));
query.remove(VariantDBAdaptor.VariantQueryParams.SAMPLES.key());
}
if (StringUtils.isNotEmpty(study)) {
query.append(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), study);
}
long studyId = catalogManager.getStudyId(study, sessionId);
Collection<String> sampleNames;
if (all) {
sampleNames = variantSampleFilter.getSamplesInAllVariants(query, genotypes);
} else {
Map<String, Set<Variant>> samplesInAnyVariants = variantSampleFilter.getSamplesInAnyVariants(query, genotypes);
sampleNames = samplesInAnyVariants.keySet();
}
Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.NAME.key(), String.join(",", sampleNames));
QueryResult<Sample> allSamples = catalogManager.getAllSamples(studyId, sampleQuery, queryOptions, sessionId);
return createOkResponse(allSamples);
} catch (Exception e) {
return createErrorResponse(e);
}
}
@GET
@Path("/ibs")
@ApiOperation(value = "[PENDING]", position = 15, response = Job.class)
@ApiImplicitParams({
@ApiImplicitParam(name = "limit", value = "[TO BE IMPLEMENTED] Number of results to be returned in the queries", dataType = "integer", paramType = "query"),
})
public Response ibs() {
try {
return createOkResponse("[PENDING]");
} catch (Exception e) {
return createErrorResponse(e);
}
}
@GET
@Path("/stats")
@ApiOperation(value = "Calculate variant stats [PENDING]", position = 2)
public Response stats() {
return createErrorResponse(new NotImplementedException("Pending"));
}
}