/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.variant.adaptors;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Gene;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings;
import org.opencb.cellbase.core.api.GeneDBAdaptor;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryParam;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.VariantQueryParams.*;
/**
* Created on 29/01/16 .
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class VariantDBAdaptorUtils {
public static final Pattern OPERATION_PATTERN = Pattern.compile("^([^=<>~!]*)(<=?|>=?|!=?|!?=?~|==?)([^=<>~!]+.*)$");
private static final Pattern GENOTYPE_FILTER_PATTERN = Pattern.compile("(?<sample>[^,;]+):(?<gts>([^:;,]+,?)+)(?<op>[;,.])");
public static final String OR = ",";
public static final String AND = ";";
public static final String IS = ":";
public static final String STUDY_POP_FREQ_SEPARATOR = ":";
public static final String NONE = "none";
public static final String ALL = "all";
private static final int GENE_EXTRA_REGION = 5000;
private static Logger logger = LoggerFactory.getLogger(VariantDBAdaptorUtils.class);
private VariantDBAdaptor adaptor;
public enum QueryOperation {
AND(VariantDBAdaptorUtils.AND),
OR(VariantDBAdaptorUtils.OR);
private final String separator;
QueryOperation(String separator) {
this.separator = separator;
}
public String separator() {
return separator;
}
}
public VariantDBAdaptorUtils(VariantDBAdaptor variantDBAdaptor) {
adaptor = variantDBAdaptor;
}
/**
* Check if the object query contains the value param, is not null and, if is an string or a list, is not empty.
*
* isValidParam(new Query(), PARAM) == false
* isValidParam(new Query(PARAM.key(), null), PARAM) == false
* isValidParam(new Query(PARAM.key(), ""), PARAM) == false
* isValidParam(new Query(PARAM.key(), Collections.emptyList()), PARAM) == false
* isValidParam(new Query(PARAM.key(), 5), PARAM) == true
* isValidParam(new Query(PARAM.key(), "sdfas"), PARAM) == true
*
* @param query Query to parse
* @param param QueryParam to check
* @return If is valid or not
*/
public static boolean isValidParam(Query query, QueryParam param) {
Object value = query.getOrDefault(param.key(), null);
return (value != null)
&& !(value instanceof String && ((String) value).isEmpty()
|| value instanceof Collection && ((Collection) value).isEmpty());
}
/**
* Determines if the filter is negated.
*
* @param value Value to check
* @return If the value is negated
*/
public static boolean isNegated(String value) {
return value.startsWith("!");
}
public static boolean isNoneOrAll(String value) {
return value.equals(NONE) || value.equals(ALL);
}
/**
* Determines if the given value is a known variant accession or not.
*
* @param value Value to check
* @return If is a known accession
*/
public static boolean isVariantAccession(String value) {
return value.startsWith("rs") || value.startsWith("VAR_");
}
/**
* Determines if the given value is a known clinical accession or not.
*
* ClinVar accession starts with 'RCV'
* COSMIC mutationId starts with 'COSM'
*
* @param value Value to check
* @return If is a known accession
*/
public static boolean isClinicalAccession(String value) {
return value.startsWith("RCV") || value.startsWith("COSM");
}
/**
* Determines if the given value is a known gene accession or not.
*
* Human Phenotype Ontology (HPO) terms starts with 'HP:'
* Online Mendelian Inheritance in Man (OMIM) terms starts with 'OMIM:'
*
* @param value Value to check
* @return If is a known accession
*/
public static boolean isGeneAccession(String value) {
return value.startsWith("HP:") || value.startsWith("OMIM:");
}
/**
* Determines if the given value is a variant id or not.
*
* chr:pos:ref:alt
*
* @param value Value to check
* @return If is a variant id
*/
public static boolean isVariantId(String value) {
int count = StringUtils.countMatches(value, ':');
return count == 3;
}
/**
* Determines if the given value is a variant id or not.
*
* chr:pos:ref:alt
*
* @param value Value to check
* @return If is a variant id
*/
public static Variant toVariant(String value) {
Variant variant = null;
if (isVariantId(value)) {
if (value.contains(":")) {
try {
variant = new Variant(value);
} catch (IllegalArgumentException ignore) {
variant = null;
// TODO: Should this throw an exception?
logger.info("Wrong variant " + value, ignore);
}
}
}
return variant;
}
public StudyConfigurationManager getStudyConfigurationManager() {
return adaptor.getStudyConfigurationManager();
}
public List<Integer> getStudyIds(QueryOptions options) {
return getStudyConfigurationManager().getStudyIds(options);
}
/**
* Get studyIds from a list of studies.
* Replaces studyNames for studyIds.
* Excludes those studies that starts with '!'
*
* @param studiesNames List of study names or study ids
* @param options Options
* @return List of study Ids
*/
public List<Integer> getStudyIds(List studiesNames, QueryOptions options) {
return getStudyIds(studiesNames, getStudyConfigurationManager().getStudies(options));
}
/**
* Get studyIds from a list of studies.
* Replaces studyNames for studyIds.
* Excludes those studies that starts with '!'
*
* @param studiesNames List of study names or study ids
* @param studies Map of available studies. See {@link StudyConfigurationManager#getStudies}
* @return List of study Ids
*/
public List<Integer> getStudyIds(List studiesNames, Map<String, Integer> studies) {
List<Integer> studiesIds;
if (studiesNames == null) {
return Collections.emptyList();
}
studiesIds = new ArrayList<>(studiesNames.size());
for (Object studyObj : studiesNames) {
Integer studyId = getStudyId(studyObj, true, studies);
if (studyId != null) {
studiesIds.add(studyId);
}
}
return studiesIds;
}
public Integer getStudyId(Object studyObj, QueryOptions options) {
return getStudyId(studyObj, options, true);
}
public Integer getStudyId(Object studyObj, QueryOptions options, boolean skipNegated) {
if (studyObj instanceof Integer) {
return ((Integer) studyObj);
} else if (studyObj instanceof String && StringUtils.isNumeric((String) studyObj)) {
return Integer.parseInt((String) studyObj);
} else {
return getStudyId(studyObj, skipNegated, getStudyConfigurationManager().getStudies(options));
}
}
public Integer getStudyId(Object studyObj, boolean skipNegated, Map<String, Integer> studies) {
Integer studyId;
if (studyObj instanceof Integer) {
studyId = ((Integer) studyObj);
} else {
String studyName = studyObj.toString();
if (isNegated(studyName)) { //Skip negated studies
if (skipNegated) {
return null;
} else {
studyName = studyName.substring(1);
}
}
if (StringUtils.isNumeric(studyName)) {
studyId = Integer.parseInt(studyName);
} else {
Integer value = studies.get(studyName);
if (value == null) {
throw VariantQueryException.studyNotFound(studyName, studies.keySet());
}
studyId = value;
}
}
if (!studies.containsValue(studyId)) {
throw VariantQueryException.studyNotFound(studyId, studies.keySet());
}
return studyId;
}
public StudyConfiguration getDefaultStudyConfiguration(Query query, QueryOptions options) {
final StudyConfiguration defaultStudyConfiguration;
if (isValidParam(query, VariantDBAdaptor.VariantQueryParams.STUDIES)) {
String value = query.getString(VariantDBAdaptor.VariantQueryParams.STUDIES.key());
// Check that the study exists
QueryOperation studiesOperation = checkOperator(value);
List<String> studiesNames = splitValue(value, studiesOperation);
List<Integer> studyIds = getStudyIds(studiesNames, options); // Non negated studyIds
if (studyIds.size() == 1) {
defaultStudyConfiguration = getStudyConfigurationManager().getStudyConfiguration(studyIds.get(0), null).first();
} else {
defaultStudyConfiguration = null;
}
} else {
List<String> studyNames = getStudyConfigurationManager().getStudyNames(null);
if (studyNames != null && studyNames.size() == 1) {
defaultStudyConfiguration = getStudyConfigurationManager().getStudyConfiguration(studyNames.get(0), null).first();
} else {
defaultStudyConfiguration = null;
}
}
return defaultStudyConfiguration;
}
/**
* Given a study reference (name or id) and a default study, returns the associated StudyConfiguration.
*
* @param study Study reference (name or id)
* @param defaultStudyConfiguration Default studyConfiguration
* @return Assiciated StudyConfiguration
* @throws VariantQueryException is the study does not exists
*/
public StudyConfiguration getStudyConfiguration(String study, StudyConfiguration defaultStudyConfiguration)
throws VariantQueryException {
StudyConfiguration studyConfiguration;
if (StringUtils.isEmpty(study)) {
studyConfiguration = defaultStudyConfiguration;
if (studyConfiguration == null) {
throw VariantQueryException.studyNotFound(study, getStudyConfigurationManager().getStudyNames(null));
}
} else if (StringUtils.isNumeric(study)) {
int studyInt = Integer.parseInt(study);
if (defaultStudyConfiguration != null && studyInt == defaultStudyConfiguration.getStudyId()) {
studyConfiguration = defaultStudyConfiguration;
} else {
studyConfiguration = getStudyConfigurationManager().getStudyConfiguration(studyInt, null).first();
}
if (studyConfiguration == null) {
throw VariantQueryException.studyNotFound(studyInt, getStudyConfigurationManager().getStudyNames(null));
}
} else {
if (defaultStudyConfiguration != null && defaultStudyConfiguration.getStudyName().equals(study)) {
studyConfiguration = defaultStudyConfiguration;
} else {
studyConfiguration = getStudyConfigurationManager().getStudyConfiguration(study, null).first();
}
if (studyConfiguration == null) {
throw VariantQueryException.studyNotFound(study, getStudyConfigurationManager().getStudyNames(null));
}
}
return studyConfiguration;
}
public List<Integer> getFileIds(List files, boolean skipNegated, StudyConfiguration defaultStudyConfiguration) {
List<Integer> fileIds;
if (files == null || files.isEmpty()) {
return Collections.emptyList();
}
fileIds = new ArrayList<>(files.size());
for (Object fileObj : files) {
Integer fileId = getFileId(fileObj, skipNegated, defaultStudyConfiguration);
if (fileId != null) {
fileIds.add(fileId);
}
}
return fileIds;
}
public Integer getFileId(Object fileObj, boolean skipNegated, StudyConfiguration defaultStudyConfiguration) {
if (fileObj == null) {
return null;
} else if (fileObj instanceof Number) {
return ((Number) fileObj).intValue();
} else {
String file = String.valueOf(fileObj);
if (isNegated(file)) { //Skip negated studies
if (skipNegated) {
return null;
} else {
file = file.substring(1);
}
}
if (file.contains(":")) {
String[] studyFile = file.split(":");
QueryResult<StudyConfiguration> queryResult = getStudyConfigurationManager().getStudyConfiguration(studyFile[0], null);
if (queryResult.getResult().isEmpty()) {
throw VariantQueryException.studyNotFound(studyFile[0]);
}
return queryResult.first().getFileIds().get(studyFile[1]);
} else {
try {
return Integer.parseInt(file);
} catch (NumberFormatException e) {
if (defaultStudyConfiguration != null) {
return defaultStudyConfiguration.getFileIds().get(file);
} else {
List<String> studyNames = getStudyConfigurationManager().getStudyNames(null);
throw new VariantQueryException("Unknown file \"" + file + "\". "
+ "Please, specify the study belonging."
+ (studyNames == null ? "" : " Available studies: " + studyNames));
}
}
}
}
}
public int getSampleId(Object sampleObj, StudyConfiguration defaultStudyConfiguration) {
int sampleId;
if (sampleObj instanceof Number) {
sampleId = ((Number) sampleObj).intValue();
} else {
String sampleStr = sampleObj.toString();
if (StringUtils.isNumeric(sampleStr)) {
sampleId = Integer.parseInt(sampleStr);
} else {
if (sampleStr.contains(":")) { //Expect to be as <study>:<sample>
String[] split = sampleStr.split(":");
String study = split[0];
sampleStr= split[1];
StudyConfiguration sc;
if (defaultStudyConfiguration != null && study.equals(defaultStudyConfiguration.getStudyName())) {
sc = defaultStudyConfiguration;
} else {
QueryResult<StudyConfiguration> queryResult = getStudyConfigurationManager().getStudyConfiguration(study, null);
if (queryResult.getResult().isEmpty()) {
throw VariantQueryException.studyNotFound(study);
}
if (!queryResult.first().getSampleIds().containsKey(sampleStr)) {
throw VariantQueryException.sampleNotFound(sampleStr, study);
}
sc = queryResult.first();
}
sampleId = sc.getSampleIds().get(sampleStr);
} else if (defaultStudyConfiguration != null) {
if (!defaultStudyConfiguration.getSampleIds().containsKey(sampleStr)) {
throw VariantQueryException.sampleNotFound(sampleStr, defaultStudyConfiguration.getStudyName());
}
sampleId = defaultStudyConfiguration.getSampleIds().get(sampleStr);
} else {
//Unable to identify that sample!
List<String> studyNames = getStudyConfigurationManager().getStudyNames(null);
throw VariantQueryException.missingStudyForSample(sampleStr, studyNames);
}
}
}
return sampleId;
}
public List<Integer> getReturnedStudies(Query query, QueryOptions options) {
Set<VariantField> returnedFields = VariantField.getReturnedFields(options);
List<Integer> studyIds;
if (!returnedFields.contains(VariantField.STUDIES)) {
studyIds = Collections.emptyList();
} else if (isValidParam(query, RETURNED_STUDIES)) {
String returnedStudies = query.getString(VariantDBAdaptor.VariantQueryParams.RETURNED_STUDIES.key());
if (NONE.equals(returnedStudies)) {
studyIds = Collections.emptyList();
} else if (ALL.equals(returnedStudies)) {
studyIds = getStudyConfigurationManager().getStudyIds(options);
} else {
studyIds = getStudyIds(query.getAsList(VariantDBAdaptor.VariantQueryParams.RETURNED_STUDIES.key()), options);
}
} else if (isValidParam(query, STUDIES)) {
String studies = query.getString(VariantDBAdaptor.VariantQueryParams.STUDIES.key());
studyIds = getStudyIds(splitValue(studies, checkOperator(studies)), options);
// if empty, all the studies
if (studyIds.isEmpty()) {
studyIds = getStudyConfigurationManager().getStudyIds(options);
}
} else {
studyIds = getStudyConfigurationManager().getStudyIds(options);
}
return studyIds;
}
/**
* Get list of returned files.
*
* Use {@link VariantDBAdaptor.VariantQueryParams#RETURNED_FILES} if defined.
* If missing, get non negated values from {@link VariantDBAdaptor.VariantQueryParams#FILES}
* If missing, get files from samples at {@link VariantDBAdaptor.VariantQueryParams#SAMPLES}
*
* Null for undefined returned files. If null, return ALL files.
* Return NONE if empty list
*
*
* @param query Query with the QueryParams
* @param options Query options
* @param fields Returned fields
* @return List of fileIds to return.
*/
public List<Integer> getReturnedFiles(Query query, QueryOptions options, Set<VariantField> fields) {
List<Integer> returnedFiles;
if (!fields.contains(VariantField.STUDIES_FILES)) {
returnedFiles = Collections.emptyList();
} else if (query.containsKey(RETURNED_FILES.key())) {
String files = query.getString(RETURNED_FILES.key());
if (files.equals(ALL)) {
returnedFiles = null;
} else if (files.equals(NONE)) {
returnedFiles = Collections.emptyList();
} else {
returnedFiles = query.getAsIntegerList(RETURNED_FILES.key());
}
} else if (query.containsKey(FILES.key())) {
String files = query.getString(FILES.key());
returnedFiles = splitValue(files, checkOperator(files))
.stream()
.filter((value) -> !isNegated(value)) // Discard negated
.map(Integer::parseInt)
.collect(Collectors.toList());
if (returnedFiles.isEmpty()) {
returnedFiles = null;
}
} else {
List<String> sampleNames = query.getAsStringList(VariantDBAdaptor.VariantQueryParams.SAMPLES.key());
StudyConfiguration studyConfiguration = getDefaultStudyConfiguration(query, options);
Set<Integer> returnedFilesSet = new LinkedHashSet<>();
for (String sample : sampleNames) {
Integer sampleId = getSampleId(sample, studyConfiguration);
studyConfiguration.getSamplesInFiles().forEach((fileId, samples) -> {
if (samples.contains(sampleId)) {
returnedFilesSet.add(fileId);
}
});
}
returnedFiles = new ArrayList<>(returnedFilesSet);
if (returnedFiles.isEmpty()) {
returnedFiles = null;
}
}
return returnedFiles;
}
public static boolean isReturnedSamplesDefined(Query query, Set<VariantField> returnedFields) {
if (getReturnedSamplesList(query, returnedFields) != null) {
return true;
} else if (isValidParam(query, FILES)) {
String files = query.getString(FILES.key());
return splitValue(files, checkOperator(files))
.stream()
.anyMatch((value) -> !isNegated(value)); // Discard negated
}
return false;
}
public Map<String, List<String>> getSamplesMetadata(Query query) {
List<Integer> returnedStudies = getReturnedStudies(query, null);
Function<Integer, StudyConfiguration> studyProvider = studyId -> getStudyConfigurationManager()
.getStudyConfiguration(studyId, null).first();
return getReturnedSamples(query, null, returnedStudies, studyProvider, (sc, s) -> s, StudyConfiguration::getStudyName);
}
public static Map<String, List<String>> getSamplesMetadata(Query query, StudyConfiguration studyConfiguration) {
List<Integer> returnedStudies = Collections.singletonList(studyConfiguration.getStudyId());
Function<Integer, StudyConfiguration> studyProvider = studyId -> studyConfiguration;
return getReturnedSamples(query, null, returnedStudies, studyProvider, (sc, s) -> s, StudyConfiguration::getStudyName);
}
public Map<String, List<String>> getSamplesMetadata(Query query, QueryOptions options) {
if (query.getBoolean(SAMPLES_METADATA.key(), false)) {
if (VariantField.getReturnedFields(options).contains(VariantField.STUDIES)) {
List<Integer> returnedStudies = getReturnedStudies(query, options);
Function<Integer, StudyConfiguration> studyProvider = studyId -> getStudyConfigurationManager()
.getStudyConfiguration(studyId, options).first();
return getReturnedSamples(query, options, returnedStudies, studyProvider, (sc, s) -> s, StudyConfiguration::getStudyName);
} else {
return Collections.emptyMap();
}
} else {
return null;
}
}
public Map<Integer, List<Integer>> getReturnedSamples(Query query, QueryOptions options) {
List<Integer> returnedStudies = getReturnedStudies(query, options);
return getReturnedSamples(query, options, returnedStudies, studyId -> getStudyConfigurationManager()
.getStudyConfiguration(studyId, options).first());
}
public static Map<Integer, List<Integer>> getReturnedSamples(Query query, QueryOptions options,
Collection<StudyConfiguration> studies) {
Map<Integer, StudyConfiguration> map = studies.stream()
.collect(Collectors.toMap(StudyConfiguration::getStudyId, Function.identity()));
return getReturnedSamples(query, options, map.keySet(), map::get);
}
public static Map<Integer, List<Integer>> getReturnedSamples(Query query, QueryOptions options, Collection<Integer> studyIds,
Function<Integer, StudyConfiguration> studyProvider) {
return getReturnedSamples(query, options, studyIds, studyProvider, (sc, s) -> sc.getSampleIds().get(s),
StudyConfiguration::getStudyId);
}
private static <T> Map<T, List<T>> getReturnedSamples(
Query query, QueryOptions options, Collection<Integer> studyIds,
Function<Integer, StudyConfiguration> studyProvider,
BiFunction<StudyConfiguration, String, T> getSample, Function<StudyConfiguration, T> getStudyId) {
List<Integer> fileIds = null;
if (isValidParam(query, FILES)) {
String files = query.getString(FILES.key());
fileIds = splitValue(files, checkOperator(files))
.stream()
.filter((value) -> !isNegated(value)) // Discard negated
.map(Integer::parseInt)
.collect(Collectors.toList());
}
List<String> returnedSamples = getReturnedSamplesList(query, options);
LinkedHashSet<String> returnedSamplesSet = returnedSamples != null ? new LinkedHashSet<>(returnedSamples) : null;
boolean returnAllSamples = query.getString(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key()).equals(ALL);
Map<T, List<T>> samples = new HashMap<>(studyIds.size());
for (Integer studyId : studyIds) {
StudyConfiguration sc = studyProvider.apply(studyId);
if (sc == null) {
continue;
}
List<T> sampleNames;
if (returnedSamplesSet != null || returnAllSamples || fileIds == null) {
LinkedHashMap<String, Integer> returnedSamplesPosition
= StudyConfiguration.getReturnedSamplesPosition(sc, returnedSamplesSet);
@SuppressWarnings("unchecked")
T[] a = (T[]) new Object[returnedSamplesPosition.size()];
sampleNames = Arrays.asList(a);
returnedSamplesPosition.forEach((sample, position) -> {
sampleNames.set(position, getSample.apply(sc, sample));
});
} else {
Set<T> sampleSet = new LinkedHashSet<>();
for (Integer fileId : fileIds) {
LinkedHashSet<Integer> sampleIds = sc.getSamplesInFiles().get(fileId);
if (sampleIds != null) {
for (Integer sampleId : sampleIds) {
sampleSet.add(getSample.apply(sc, sc.getSampleIds().inverse().get(sampleId)));
}
}
}
sampleNames = new ArrayList<T>(sampleSet);
}
samples.put(getStudyId.apply(sc), sampleNames);
}
return samples;
}
public static List<String> getReturnedSamplesList(Query query, QueryOptions options) {
return getReturnedSamplesList(query, VariantField.getReturnedFields(options));
}
public static List<String> getReturnedSamplesList(Query query, Set<VariantField> returnedFields) {
List<String> samples;
if (!returnedFields.contains(VariantField.STUDIES_SAMPLES_DATA)) {
samples = Collections.emptyList();
} else {
//Remove the studyName, if any
samples = getReturnedSamplesList(query);
}
return samples;
}
/**
* Get list of returned samples.
*
* Null for undefined returned samples. If null, return ALL samples.
* Return NONE if empty list
*
*
* @param query Query with the QueryParams
* @return List of samples to return.
*/
public static List<String> getReturnedSamplesList(Query query) {
List<String> samples;
if (isValidParam(query, RETURNED_SAMPLES)) {
String samplesString = query.getString(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key());
if (samplesString.equals(ALL)) {
samples = null; // Undefined. All by default
} else if (samplesString.equals(NONE)) {
samples = Collections.emptyList();
} else {
samples = query.getAsStringList(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key());
}
} else if (isValidParam(query, SAMPLES)) {
samples = query.getAsStringList(VariantDBAdaptor.VariantQueryParams.SAMPLES.key());
} else {
samples = null;
}
if (samples != null) {
samples.stream()
.map(s -> s.contains(":") ? s.split(":")[1] : s)
.collect(Collectors.toList());
}
return samples;
}
/**
* Partes the genotype filter.
*
* @param sampleGenotypes Genotypes filter value
* @param map Initialized map to be filled with the sample to list of genotypes
* @return QueryOperation between samples
*/
public static QueryOperation parseGenotypeFilter(String sampleGenotypes, Map<Object, List<String>> map) {
Matcher matcher = GENOTYPE_FILTER_PATTERN.matcher(sampleGenotypes + '.');
QueryOperation operation = null;
while (matcher.find()) {
String gts = matcher.group("gts");
String sample = matcher.group("sample");
String op = matcher.group("op");
map.put(sample, Arrays.asList(gts.split(",")));
if (AND.equals(op)) {
if (operation == QueryOperation.OR) {
throw VariantQueryException.malformedParam(GENOTYPE, sampleGenotypes,
"Unable to mix AND (" + AND + ") and OR (" + OR + ") in the same query.");
} else {
operation = QueryOperation.AND;
}
} else if (OR.equals(op)) {
if (operation == QueryOperation.AND) {
throw VariantQueryException.malformedParam(GENOTYPE, sampleGenotypes,
"Unable to mix AND (" + AND + ") and OR (" + OR + ") in the same query.");
} else {
operation = QueryOperation.OR;
}
}
}
return operation;
}
/**
* Finds the cohortId from a cohort reference.
*
* @param cohort Cohort reference (name or id)
* @param studyConfiguration Default study configuration
* @return Cohort id
* @throws VariantQueryException if the cohort does not exist
*/
public int getCohortId(String cohort, StudyConfiguration studyConfiguration) throws VariantQueryException {
int cohortId;
if (StringUtils.isNumeric(cohort)) {
cohortId = Integer.parseInt(cohort);
if (!studyConfiguration.getCohortIds().containsValue(cohortId)) {
throw VariantQueryException.cohortNotFound(cohortId, studyConfiguration.getStudyId(),
studyConfiguration.getCohortIds().keySet());
}
} else {
Integer cohortIdNullable = studyConfiguration.getCohortIds().get(cohort);
if (cohortIdNullable == null) {
throw VariantQueryException.cohortNotFound(cohort, studyConfiguration.getStudyId(),
studyConfiguration.getCohortIds().keySet());
}
cohortId = cohortIdNullable;
}
return cohortId;
}
public Region getGeneRegion(String geneStr) {
QueryOptions params = new QueryOptions(QueryOptions.INCLUDE, "name,chromosome,start,end");
try {
Gene gene = adaptor.getCellBaseClient().getGeneClient().get(Collections.singletonList(geneStr), params).firstResult();
if (gene != null) {
int start = Math.max(0, gene.getStart() - GENE_EXTRA_REGION);
int end = gene.getEnd() + GENE_EXTRA_REGION;
return new Region(gene.getChromosome(), start, end);
} else {
return null;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
public Set<String> getGenesByGo(List<String> goValues) {
Set<String> genes = new HashSet<>();
QueryOptions params = new QueryOptions(QueryOptions.INCLUDE, "name,chromosome,start,end");
try {
List<QueryResult<Gene>> responses = adaptor.getCellBaseClient().getGeneClient().get(goValues, params)
.getResponse();
for (QueryResult<Gene> response : responses) {
for (Gene gene : response.getResult()) {
genes.add(gene.getName());
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return genes;
}
public Set<String> getGenesByExpression(List<String> expressionValues) {
Set<String> genes = new HashSet<>();
QueryOptions params = new QueryOptions(QueryOptions.INCLUDE, "name,chromosome,start,end");
// The number of results for each expression value may be huge. Query one by one
for (String expressionValue : expressionValues) {
try {
String[] split = expressionValue.split(":");
expressionValue = split[0];
// TODO: Add expression value {UP, DOWN}. See https://github.com/opencb/cellbase/issues/245
Query cellbaseQuery = new Query(GeneDBAdaptor.QueryParams.ANNOTATION_EXPRESSION_TISSUE.key(), expressionValue);
List<QueryResult<Gene>> responses = adaptor.getCellBaseClient().getGeneClient().search(cellbaseQuery, params)
.getResponse();
for (QueryResult<Gene> response : responses) {
for (Gene gene : response.getResult()) {
genes.add(gene.getName());
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
return genes;
}
public static int parseConsequenceType(String so) {
int soAccession;
boolean startsWithSO = so.toUpperCase().startsWith("SO:");
if (startsWithSO || StringUtils.isNumeric(so)) {
try {
if (startsWithSO) {
soAccession = Integer.parseInt(so.substring("SO:".length()));
} else {
soAccession = Integer.parseInt(so);
}
} catch (NumberFormatException e) {
throw VariantQueryException.malformedParam(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE, so,
"Not a valid SO number");
}
if (!ConsequenceTypeMappings.accessionToTerm.containsKey(soAccession)) {
throw VariantQueryException.malformedParam(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE, so,
"Not a valid SO number");
}
} else {
if (!ConsequenceTypeMappings.termToAccession.containsKey(so)) {
throw VariantQueryException.malformedParam(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE, so,
"Not a valid Accession term");
} else {
soAccession = ConsequenceTypeMappings.termToAccession.get(so);
}
}
return soAccession;
}
/**
* Checks that the filter value list contains only one type of operations.
*
* @param value List of values to check
* @return The used operator. Null if no operator is used.
* @throws VariantQueryException if the list contains different operators.
*/
public static QueryOperation checkOperator(String value) throws VariantQueryException {
boolean containsOr = value.contains(OR);
boolean containsAnd = value.contains(AND);
if (containsAnd && containsOr) {
throw new VariantQueryException("Can't merge in the same query filter, AND and OR operators");
} else if (containsAnd) { // && !containsOr -> true
return QueryOperation.AND;
} else if (containsOr) { // && !containsAnd -> true
return QueryOperation.OR;
} else { // !containsOr && !containsAnd
return null;
}
}
/**
* Splits the string with the specified operation.
*
* @param value Value to split
* @param operation Operation that defines the split delimiter
* @return List of values, without the delimiter
*/
public static List<String> splitValue(String value, QueryOperation operation) {
List<String> list;
if (value == null || value.isEmpty()) {
list = Collections.emptyList();
} else if (operation == null) {
list = Collections.singletonList(value);
} else if (operation == QueryOperation.AND) {
list = Arrays.asList(value.split(QueryOperation.AND.separator()));
} else {
list = Arrays.asList(value.split(QueryOperation.OR.separator()));
}
return list;
}
public static String[] splitOperator(String value) {
Matcher matcher = OPERATION_PATTERN.matcher(value);
String key;
String operator;
String filter;
if (matcher.find()) {
key = matcher.group(1);
operator = matcher.group(2);
filter = matcher.group(3);
} else {
return new String[]{null, "=", value};
}
return new String[]{key.trim(), operator.trim(), filter.trim()};
}
}