/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.hadoop.variant.index.phoenix;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.phoenix.parse.HintNode;
import org.apache.phoenix.util.SchemaUtil;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorUtils;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorUtils.*;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException;
import org.opencb.opencga.storage.hadoop.variant.GenomeHelper;
import org.opencb.opencga.storage.hadoop.variant.index.VariantTableStudyRow;
import org.opencb.opencga.storage.hadoop.variant.index.phoenix.VariantPhoenixHelper.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.VariantQueryParams;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.VariantQueryParams.*;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorUtils.*;
import static org.opencb.opencga.storage.hadoop.variant.index.VariantTableStudyRow.*;
import static org.opencb.opencga.storage.hadoop.variant.index.phoenix.PhoenixHelper.Column;
import static org.opencb.opencga.storage.hadoop.variant.index.phoenix.VariantPhoenixHelper.*;
/**
* Created on 16/12/15.
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class VariantSqlQueryParser {
public static final String COUNT = "count";
private final GenomeHelper genomeHelper;
private final String variantTable;
private final Logger logger = LoggerFactory.getLogger(VariantSqlQueryParser.class);
private final VariantDBAdaptorUtils utils;
private final boolean clientSideSkip;
private static final Map<String, String> SQL_OPERATOR;
static {
SQL_OPERATOR = new HashMap<>();
SQL_OPERATOR.put("==", "=");
SQL_OPERATOR.put("=~", "LIKE");
SQL_OPERATOR.put("~", "LIKE");
SQL_OPERATOR.put("!", "!=");
}
public VariantSqlQueryParser(GenomeHelper genomeHelper, String variantTable, VariantDBAdaptorUtils utils, boolean clientSideSkip) {
this.genomeHelper = genomeHelper;
this.variantTable = variantTable;
this.utils = utils;
this.clientSideSkip = clientSideSkip;
}
public String parse(Query query, QueryOptions options) {
StringBuilder sb = new StringBuilder("SELECT ");
try {
Set<Column> dynamicColumns = new HashSet<>();
List<String> regionFilters = getRegionFilters(query);
List<String> filters = getOtherFilters(query, options, dynamicColumns);
if (filters.isEmpty()) {
// Only region filters. Hint no index usage
sb.append("/*+ ").append(HintNode.Hint.NO_INDEX.toString()).append(" */ ");
}
appendProjectedColumns(sb, query, options);
appendFromStatement(sb, dynamicColumns);
appendWhereStatement(sb, regionFilters, filters);
} catch (VariantQueryException e) {
e.setQuery(query);
throw e;
}
if (options.getBoolean(QueryOptions.SORT)) {
sb.append(" ORDER BY ").append(VariantColumn.CHROMOSOME.column()).append(",").append(VariantColumn.POSITION.column());
String order = options.getString(QueryOptions.ORDER, QueryOptions.ASCENDING);
if (order.equalsIgnoreCase(QueryOptions.ASCENDING) || order.equalsIgnoreCase("ASC")) {
sb.append(" ASC ");
} else {
sb.append(" DESC ");
}
}
if (clientSideSkip) {
int skip = Math.max(0, options.getInt(QueryOptions.SKIP));
if (options.getInt(QueryOptions.LIMIT) > 0) {
sb.append(" LIMIT ").append(skip + options.getInt(QueryOptions.LIMIT));
}
} else {
if (options.getInt(QueryOptions.LIMIT) > 0) {
sb.append(" LIMIT ").append(options.getInt(QueryOptions.LIMIT));
}
if (options.getInt(QueryOptions.SKIP) > 0) {
sb.append(" OFFSET ").append(options.getInt(QueryOptions.SKIP));
}
}
return sb.toString();
}
public VariantDBAdaptorUtils getUtils() {
return utils;
}
/**
* Select only the required columns.
*
* Uses the params:
* {@link VariantQueryParams#RETURNED_STUDIES}
* {@link VariantQueryParams#RETURNED_SAMPLES}
* {@link VariantQueryParams#RETURNED_FILES}
* {@link VariantQueryParams#UNKNOWN_GENOTYPE}
*
* @param sb SQLStringBuilder
* @param query Query to parse
* @param options other options
* @return String builder
*/
protected StringBuilder appendProjectedColumns(StringBuilder sb, Query query, QueryOptions options) {
if (options.getBoolean(COUNT)) {
return sb.append(" COUNT(*) ");
} else {
Set<VariantField> returnedFields = VariantField.getReturnedFields(options);
List<Integer> studyIds = utils.getStudyIds(options.getAsList(RETURNED_STUDIES.key()), options);
if (studyIds == null || studyIds.isEmpty()) {
studyIds = utils.getStudyIds(options);
}
sb.append(VariantColumn.CHROMOSOME).append(',')
.append(VariantColumn.POSITION).append(',')
.append(VariantColumn.REFERENCE).append(',')
.append(VariantColumn.ALTERNATE).append(',')
.append(VariantColumn.TYPE);
if (returnedFields.contains(VariantField.STUDIES)) {
for (Integer studyId : studyIds) {
List<String> studyColumns = STUDY_COLUMNS;
// if (returnedFields.contains(SAMPLES_FIELD)) {
// studyColumns = STUDY_COLUMNS;
// } else {
// // If samples are not required, do not fetch all the fields
// studyColumns = Collections.singletonList(HOM_REF);
// }
for (String studyColumn : studyColumns) {
sb.append(",\"").append(buildColumnKey(studyId, studyColumn)).append('"');
}
if (returnedFields.contains(VariantField.STUDIES_STATS)) {
StudyConfiguration studyConfiguration = utils.getStudyConfigurationManager()
.getStudyConfiguration(studyId, null).first();
for (Integer cohortId : studyConfiguration.getCalculatedStats()) {
Column statsColumn = getStatsColumn(studyId, cohortId);
sb.append(",\"").append(statsColumn.column()).append('"');
}
}
}
}
if (returnedFields.contains(VariantField.ANNOTATION)) {
sb.append(',').append(VariantColumn.FULL_ANNOTATION);
}
return sb;
}
}
protected void appendFromStatement(StringBuilder sb, Set<Column> dynamicColumns) {
sb.append(" FROM ").append(SchemaUtil.getEscapedFullTableName(variantTable));
if (!dynamicColumns.isEmpty()) {
sb.append(dynamicColumns.stream()
.map(column -> SchemaUtil.getEscapedFullColumnName(column.column()) + " " + column.sqlType())
.collect(Collectors.joining(",", " ( ", " ) "))
);
}
}
protected StringBuilder appendWhereStatement(StringBuilder sb, List<String> regionFilters, List<String> filters) {
if (!regionFilters.isEmpty() || !filters.isEmpty()) {
sb.append(" WHERE");
}
appendFilters(sb, regionFilters, "OR");
if (!filters.isEmpty() && !regionFilters.isEmpty()) {
sb.append(" AND");
}
appendFilters(sb, filters, "AND");
return sb;
}
protected String appendFilters(List<String> filters, String delimiter) {
return appendFilters(new StringBuilder(), filters, delimiter).toString();
}
protected StringBuilder appendFilters(StringBuilder sb, List<String> filters, String delimiter) {
delimiter = " " + delimiter + " ";
if (!filters.isEmpty()) {
sb.append(filters.stream().collect(Collectors.joining(delimiter, " ( ", " )")));
}
return sb;
}
/**
* Transform QueryParams that are inclusive.
*
* A variant will pass this filters if matches with ANY of this filters.
*
* {@link VariantQueryParams#REGION}
* {@link VariantQueryParams#CHROMOSOME}
*
* Using annotation:
* {@link VariantQueryParams#ID}
* {@link VariantQueryParams#GENE}
* {@link VariantQueryParams#ANNOT_XREF}
*
* @param query Query to parse
* @return List of region filters
*/
protected List<String> getRegionFilters(Query query) {
List<String> regionFilters = new LinkedList<>();
if (isValidParam(query, REGION)) {
List<Region> regions = Region.parseRegions(query.getString(REGION.key()), true);
for (Region region : regions) {
regionFilters.add(getRegionFilter(region));
}
}
addQueryFilter(query, CHROMOSOME, VariantColumn.CHROMOSOME, regionFilters, Region::normalizeChromosome);
// addQueryFilter(query, ID, VariantColumn.XREFS, regionFilters);
List<Variant> variants = new ArrayList<>();
if (isValidParam(query, ID)) {
for (String id : query.getAsStringList(ID.key())) {
Variant variant = toVariant(id);
if (variant == null) {
regionFilters.add(buildFilter(VariantColumn.XREFS, "=", id));
} else {
variants.add(variant);
// List<String> subFilters = new ArrayList<>(4);
// subFilters.add(buildFilter(VariantColumn.CHROMOSOME, "=", variant.getChromosome()));
// subFilters.add(buildFilter(VariantColumn.POSITION, "=", variant.getStart().toString()));
// subFilters.add(buildFilter(VariantColumn.REFERENCE, "=", variant.getReference()));
// subFilters.add(buildFilter(VariantColumn.ALTERNATE, "=", varian t.getAlternate()));
// regionFilters.add(appendFilters(subFilters, QueryOperation.AND.toString()));
}
}
}
// TODO: Ask cellbase for gene region?
// addQueryFilter(query, GENE, VariantColumn.GENES, regionFilters);
List<String> genes = new ArrayList<>();
if (isValidParam(query, GENE)) {
for (String gene : query.getAsStringList(GENE.key())) {
genes.add(gene);
Region region = utils.getGeneRegion(gene);
if (region != null) {
regionFilters.add(getRegionFilter(region));
} else {
regionFilters.add(getVoidFilter());
}
}
}
// addQueryFilter(query, ANNOT_XREF, VariantColumn.XREFS, regionFilters);
if (isValidParam(query, ANNOT_XREF)) {
List<String> xrefs = query.getAsStringList(VariantQueryParams.ANNOT_XREF.key());
List<String> otherXrefs = new ArrayList<>();
for (String value : xrefs) {
Variant variant = toVariant(value);
if (variant != null) {
variants.add(variant);
} else {
if (isVariantAccession(value) || isClinicalAccession(value) || isGeneAccession(value)) {
otherXrefs.add(value);
} else {
genes.add(value);
}
regionFilters.add(buildFilter(VariantColumn.XREFS, "=", value));
}
}
}
if (!variants.isEmpty()) {
regionFilters.add(getVariantFilter(variants));
}
if (regionFilters.isEmpty()) {
// chromosome != _METADATA
regionFilters.add(VariantColumn.CHROMOSOME + " != '" + genomeHelper.getMetaRowKeyString() + "'");
}
return regionFilters;
}
private String getVariantFilter(List<Variant> variants) {
StringBuilder sb = new StringBuilder().append("(")
.append(VariantColumn.CHROMOSOME).append(", ")
.append(VariantColumn.POSITION).append(", ")
.append(VariantColumn.REFERENCE).append(", ")
.append(VariantColumn.ALTERNATE).append(") IN (");
Iterator<Variant> iterator = variants.iterator();
while (iterator.hasNext()) {
Variant variant = iterator.next();
sb.append("('").append(variant.getChromosome()).append("', ")
.append(variant.getStart()).append(", ")
.append("'").append(variant.getReference()).append("', ")
.append("'").append(variant.getAlternate()).append("') ");
if (iterator.hasNext()) {
sb.append(",");
}
}
sb.append(")");
return sb.toString();
}
// private String getRegionFilter(Region region) {
// if (region.getStart() == region.getEnd()) {
// return String.format("(%s,%s) = ('%s',%s)",
// VariantColumn.CHROMOSOME,
// VariantColumn.POSITION,
// region.getChromosome(), region.getStart());
// } else {
// return String.format("(%s,%s) BETWEEN ('%s',%s) AND ('%s',%s)",
// VariantColumn.CHROMOSOME,
// VariantColumn.POSITION,
// region.getChromosome(), region.getStart(),
// region.getChromosome(), region.getEnd());
// }
// }
private String getRegionFilter(Region region) {
List<String> subFilters = new ArrayList<>(3);
subFilters.add(buildFilter(VariantColumn.CHROMOSOME, "=", region.getChromosome()));
if (region.getStart() > 1) {
subFilters.add(buildFilter(VariantColumn.POSITION, ">=", region.getStart()));
}
if (region.getEnd() < Integer.MAX_VALUE) {
subFilters.add(buildFilter(VariantColumn.POSITION, "<=", region.getEnd()));
}
return appendFilters(subFilters, QueryOperation.AND.toString());
}
/**
* Transform QueryParams that are exclusive.
*
* A variant will pass this filters if matches with ALL of this filters.
*
* Variant filters:
* {@link VariantQueryParams#REFERENCE}
* {@link VariantQueryParams#ALTERNATE}
* {@link VariantQueryParams#TYPE}
* {@link VariantQueryParams#STUDIES}
* {@link VariantQueryParams#FILES}
* {@link VariantQueryParams#COHORTS}
* {@link VariantQueryParams#GENOTYPE}
*
* Annotation filters:
* {@link VariantQueryParams#ANNOTATION_EXISTS}
* {@link VariantQueryParams#ANNOT_CONSEQUENCE_TYPE}
* {@link VariantQueryParams#ANNOT_BIOTYPE}
* {@link VariantQueryParams#ANNOT_POLYPHEN}
* {@link VariantQueryParams#ANNOT_SIFT}
* {@link VariantQueryParams#ANNOT_CONSERVATION}
* {@link VariantQueryParams#ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY}
* {@link VariantQueryParams#ANNOT_POPULATION_ALTERNATE_FREQUENCY}
* {@link VariantQueryParams#ANNOT_POPULATION_REFERENCE_FREQUENCY}
* {@link VariantQueryParams#ANNOT_TRANSCRIPTION_FLAGS}
* {@link VariantQueryParams#ANNOT_GENE_TRAITS_ID}
* {@link VariantQueryParams#ANNOT_GENE_TRAITS_NAME}
* {@link VariantQueryParams#ANNOT_HPO}
* {@link VariantQueryParams#ANNOT_GO}
* {@link VariantQueryParams#ANNOT_EXPRESSION}
* {@link VariantQueryParams#ANNOT_PROTEIN_KEYWORDS}
* {@link VariantQueryParams#ANNOT_DRUG}
* {@link VariantQueryParams#ANNOT_FUNCTIONAL_SCORE}
*
* Stats filters:
* {@link VariantQueryParams#STATS_MAF}
* {@link VariantQueryParams#STATS_MGF}
* {@link VariantQueryParams#MISSING_ALLELES}
* {@link VariantQueryParams#MISSING_GENOTYPES}
*
* @param query Query to parse
* @param options Options
* @param dynamicColumns Initialized empty set to be filled with dynamic columns required by the queries
* @return List of sql filters
*/
protected List<String> getOtherFilters(Query query, QueryOptions options, final Set<Column> dynamicColumns) {
List<String> filters = new LinkedList<>();
// Variant filters:
StudyConfiguration defaultStudyConfiguration = addVariantFilters(query, options, filters);
// Annotation filters:
addAnnotFilters(query, dynamicColumns, filters);
// Stats filters:
addStatsFilters(query, defaultStudyConfiguration, filters);
return filters;
}
protected StudyConfiguration addVariantFilters(Query query, QueryOptions options, List<String> filters) {
addQueryFilter(query, REFERENCE, VariantColumn.REFERENCE, filters);
addQueryFilter(query, ALTERNATE, VariantColumn.ALTERNATE, filters);
addQueryFilter(query, TYPE, VariantColumn.TYPE, filters, s -> {
VariantType type = VariantType.valueOf(s);
Set<VariantType> subTypes = Variant.subTypes(type);
ArrayList<VariantType> types = new ArrayList<>(subTypes.size() + 1);
types.add(type);
types.addAll(subTypes);
return types;
});
final StudyConfiguration defaultStudyConfiguration;
if (isValidParam(query, STUDIES)) {
String value = query.getString(STUDIES.key());
QueryOperation operation = checkOperator(value);
List<String> values = splitValue(value, operation);
StringBuilder sb = new StringBuilder();
Iterator<String> iterator = values.iterator();
Map<String, Integer> studies = utils.getStudyConfigurationManager().getStudies(options);
Set<Integer> notNullStudies = new HashSet<>();
while (iterator.hasNext()) {
String study = iterator.next();
Integer studyId = utils.getStudyId(study, false, studies);
if (study.startsWith("!")) {
sb.append("\"").append(buildColumnKey(studyId, VariantTableStudyRow.HOM_REF)).append("\" IS NULL ");
} else {
notNullStudies.add(studyId);
sb.append("\"").append(buildColumnKey(studyId, VariantTableStudyRow.HOM_REF)).append("\" IS NOT NULL ");
}
if (iterator.hasNext()) {
if (operation == null || operation.equals(QueryOperation.AND)) {
sb.append(" AND ");
} else {
sb.append(" OR ");
}
}
}
// Skip this filter if contains all the existing studies.
if (studies.values().size() != notNullStudies.size() || !notNullStudies.containsAll(studies.values())) {
filters.add(sb.toString());
}
List<Integer> studyIds = utils.getStudyIds(values, options);
if (studyIds.size() == 1) {
defaultStudyConfiguration = utils.getStudyConfigurationManager().getStudyConfiguration(studyIds.get(0), options).first();
} else {
defaultStudyConfiguration = null;
}
} else {
List<Integer> studyIds = utils.getStudyConfigurationManager().getStudyIds(options);
if (studyIds.size() == 1) {
defaultStudyConfiguration = utils.getStudyConfigurationManager().getStudyConfiguration(studyIds.get(0), options).first();
} else {
defaultStudyConfiguration = null;
}
// StringBuilder sb = new StringBuilder();
// for (Iterator<Integer> iterator = studyIds.iterator(); iterator.hasNext();) {
// Integer studyId = iterator.next();
// sb.append('"').append(buildColumnKey(studyId, HOM_REF)).append("\" IS NOT NULL");
// if (iterator.hasNext()) {
// sb.append(" OR ");
// }
// }
// filters.add(sb.toString());
}
unsupportedFilter(query, FILES);
if (isValidParam(query, COHORTS)) {
for (String cohort : query.getAsStringList(COHORTS.key())) {
boolean negated = false;
if (cohort.startsWith("!")) {
cohort = cohort.substring(1);
negated = true;
}
String[] studyCohort = cohort.split(":");
StudyConfiguration studyConfiguration;
if (studyCohort.length == 2) {
studyConfiguration = utils.getStudyConfiguration(studyCohort[0], defaultStudyConfiguration);
cohort = studyCohort[1];
} else if (studyCohort.length == 1) {
studyConfiguration = defaultStudyConfiguration;
} else {
throw VariantQueryException.malformedParam(COHORTS, query.getString((COHORTS.key())), "Expected {study}:{cohort}");
}
int cohortId = utils.getCohortId(cohort, studyConfiguration);
Column column = VariantPhoenixHelper.getStatsColumn(studyConfiguration.getStudyId(), cohortId);
if (negated) {
filters.add(column + " IS NULL");
} else {
filters.add(column + " IS NOT NULL");
}
}
}
Map<Object, List<String>> genotypesMap = new HashMap<>();
if (isValidParam(query, GENOTYPE)) {
// NA12877_01 : 0/0 ; NA12878_01 : 0/1 , 1/1
parseGenotypeFilter(query.getString(GENOTYPE.key()), genotypesMap);
}
if (isValidParam(query, SAMPLES)) {
String value = query.getString(SAMPLES.key());
QueryOperation op = checkOperator(value);
List<String> samples = splitValue(value, op);
for (String sample : samples) {
genotypesMap.put(sample, Arrays.asList(HET_REF, HOM_VAR, OTHER));
}
}
if (!genotypesMap.isEmpty()) {
for (Map.Entry<Object, List<String>> entry : genotypesMap.entrySet()) {
if (defaultStudyConfiguration == null) {
List<String> studyNames = utils.getStudyConfigurationManager().getStudyNames(null);
throw VariantQueryException.missingStudyForSample(entry.getKey().toString(), studyNames);
}
int studyId = defaultStudyConfiguration.getStudyId();
int sampleId = utils.getSampleId(entry.getKey(), defaultStudyConfiguration);
List<String> genotypes = entry.getValue();
List<String> gts = new ArrayList<>(genotypes.size());
for (String genotype : genotypes) {
boolean negated = false;
if (isNegated(genotype)) {
genotype = genotype.substring(1);
negated = true;
}
switch (genotype) {
case HET_REF:
case HOM_VAR:
case NOCALL:
// 0 = any("1_.")
gts.add((negated ? " NOT " : " ") + sampleId + " = ANY(\"" + buildColumnKey(studyId, genotype) + "\") ");
break;
case HOM_REF:
if (negated) {
gts.add(" ( " + sampleId + " = ANY(\"" + buildColumnKey(studyId, HET_REF) + "\") "
+ " OR " + sampleId + " = ANY(\"" + buildColumnKey(studyId, HOM_VAR) + "\") "
+ " OR " + sampleId + " = ANY(\"" + buildColumnKey(studyId, NOCALL) + "\") "
+ " OR " + sampleId + " = ANY(\"" + buildColumnKey(studyId, OTHER) + "\") "
+ " ) "
);
} else {
gts.add(" NOT " + sampleId + " = ANY(\"" + buildColumnKey(studyId, HET_REF) + "\") "
+ "AND NOT " + sampleId + " = ANY(\"" + buildColumnKey(studyId, HOM_VAR) + "\") "
+ "AND NOT " + sampleId + " = ANY(\"" + buildColumnKey(studyId, NOCALL) + "\") "
+ "AND NOT " + sampleId + " = ANY(\"" + buildColumnKey(studyId, OTHER) + "\") "
);
}
break;
default: //OTHER
gts.add((negated ? " NOT " : " ") + sampleId + " = ANY(\"" + buildColumnKey(studyId, OTHER) + "\") ");
break;
}
}
filters.add(gts.stream().collect(Collectors.joining(" OR ", " ( ", " ) ")));
}
}
return defaultStudyConfiguration;
}
private void unsupportedFilter(Query query, VariantQueryParams param) {
if (isValidParam(query, param)) {
String warn = "Unsupported filter \"" + param + "\"";
// warnings.add(warn);
logger.warn(warn);
}
}
protected void addAnnotFilters(Query query, Set<Column> dynamicColumns, List<String> filters) {
if (isValidParam(query, ANNOTATION_EXISTS)) {
if (query.getBoolean(ANNOTATION_EXISTS.key())) {
filters.add(VariantColumn.FULL_ANNOTATION + " IS NOT NULL");
} else {
filters.add(VariantColumn.FULL_ANNOTATION + " IS NULL");
}
}
addQueryFilter(query, ANNOT_CONSEQUENCE_TYPE, VariantColumn.SO, filters, VariantDBAdaptorUtils::parseConsequenceType);
addQueryFilter(query, ANNOT_BIOTYPE, VariantColumn.BIOTYPE, filters);
addQueryFilter(query, ANNOT_SIFT, (keyOpValue, rawValue) -> {
if (StringUtils.isNotEmpty(keyOpValue[0])) {
throw VariantQueryException.malformedParam(ANNOT_SIFT, Arrays.toString(keyOpValue));
}
if (NumberUtils.isParsable(keyOpValue[2])) {
return VariantColumn.SIFT;
} else {
return VariantColumn.SIFT_DESC;
}
}, null, null, null, filters, op -> op.contains(">") ? 2 : op.contains("<") ? 1 : -1);
addQueryFilter(query, ANNOT_POLYPHEN, (keyOpValue, rawValue) -> {
if (StringUtils.isNotEmpty(keyOpValue[0])) {
throw VariantQueryException.malformedParam(ANNOT_POLYPHEN, Arrays.toString(keyOpValue));
}
if (NumberUtils.isParsable(keyOpValue[2])) {
return VariantColumn.POLYPHEN;
} else {
return VariantColumn.POLYPHEN_DESC;
}
}, null, null, null, filters, op -> op.contains(">") ? 2 : op.contains("<") ? 1 : -1);
addQueryFilter(query, ANNOT_CONSERVATION,
(keyOpValue, rawValue) -> getConservationScoreColumn(keyOpValue[0], rawValue, true), null, filters);
/*
* maf < 0.3 --> PF < 0.3 OR PF >= 0.7
* maf > 0.3 --> PF > 0.3 AND PF <= 0.7
*/
addQueryFilter(query, ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY,
(keyOpValue, s) -> {
Column column = getPopulationFrequencyColumn(keyOpValue[0]);
// dynamicColumns.add(column);
return column;
}, null, null,
keyOpValue -> {
String op = keyOpValue[1];
double value = Double.parseDouble(keyOpValue[2]);
Column column = getPopulationFrequencyColumn(keyOpValue[0]);
if (op.startsWith("<")) {
// If asking "less than", add "OR FIELD IS NULL" to read NULL values as 0, so accept the filter
return " OR \"" + column.column() + "\"[2] " + op + " " + value
+ " OR \"" + column.column() + "\"[2] IS NULL";
} else if (op.startsWith(">")) {
return " AND \"" + column.column() + "\"[2] " + op + " " + value;
} else {
throw VariantQueryException.malformedParam(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY, Arrays.toString(keyOpValue),
"Unable to use operator " + op + " with this query.");
}
}, filters, 1);
addQueryFilter(query, ANNOT_POPULATION_ALTERNATE_FREQUENCY,
(keyOpValue, s) -> {
Column column = getPopulationFrequencyColumn(keyOpValue[0]);
// dynamicColumns.add(column);
return column;
}, null, null,
keyOpValue -> {
// If asking "less than", add "OR FIELD IS NULL" to read NULL values as 0, so accept the filter
if (keyOpValue[1].startsWith("<")) {
return " OR \"" + getPopulationFrequencyColumn(keyOpValue[0]).column() + "\"[2] IS NULL";
}
return "";
}, filters, 2);
addQueryFilter(query, ANNOT_POPULATION_REFERENCE_FREQUENCY,
(keyOpValue, s) -> {
Column column = getPopulationFrequencyColumn(keyOpValue[0]);
// dynamicColumns.add(column);
return column;
}, null, null,
keyOpValue -> {
// If asking "less than", add "OR FIELD IS NULL" to read NULL values as 0, so accept the filter
if (keyOpValue[1].startsWith("<")) {
return " OR \"" + getPopulationFrequencyColumn(keyOpValue[0]).column() + "\"[1] IS NULL";
}
return "";
}, filters, 1);
addQueryFilter(query, ANNOT_TRANSCRIPTION_FLAGS, VariantColumn.TRANSCRIPTION_FLAGS, filters);
addQueryFilter(query, ANNOT_GENE_TRAITS_ID, VariantColumn.GENE_TRAITS_ID, filters);
addQueryFilter(query, ANNOT_GENE_TRAITS_NAME, VariantColumn.GENE_TRAITS_NAME, filters);
addQueryFilter(query, ANNOT_HPO, VariantColumn.XREFS, filters);
if (isValidParam(query, ANNOT_GO)) {
String value = query.getString(ANNOT_GO.key());
if (checkOperator(value) == QueryOperation.AND) {
throw VariantQueryException.malformedParam(VariantQueryParams.ANNOT_GO, value, "Unimplemented AND operator");
}
List<String> goValues = splitValue(value, QueryOperation.OR);
Set<String> genesByGo = utils.getGenesByGo(goValues);
if (genesByGo.isEmpty()) {
// If any gene was found, the query will return no results.
// FIXME: Find another way of returning empty results
filters.add(getVoidFilter());
} else {
addQueryFilter(new Query(ANNOT_GO.key(), genesByGo), ANNOT_GO, VariantColumn.GENES, filters);
}
}
if (isValidParam(query, ANNOT_EXPRESSION)) {
String value = query.getString(ANNOT_EXPRESSION.key());
if (checkOperator(value) == QueryOperation.AND) {
throw VariantQueryException.malformedParam(VariantQueryParams.ANNOT_EXPRESSION, value, "Unimplemented AND operator");
}
List<String> expressionValues = splitValue(value, QueryOperation.OR);
Set<String> genesByExpression = utils.getGenesByExpression(expressionValues);
if (genesByExpression.isEmpty()) {
// If any gene was found, the query will return no results.
// FIXME: Find another way of returning empty results
filters.add(getVoidFilter());
} else {
addQueryFilter(new Query(ANNOT_EXPRESSION.key(), genesByExpression), ANNOT_EXPRESSION, VariantColumn.GENES, filters);
}
}
addQueryFilter(query, ANNOT_PROTEIN_KEYWORDS, VariantColumn.PROTEIN_KEYWORDS, filters);
addQueryFilter(query, ANNOT_DRUG, VariantColumn.DRUG, filters);
addQueryFilter(query, ANNOT_FUNCTIONAL_SCORE,
(keyOpValue, rawValue) -> getFunctionalScoreColumn(keyOpValue[0], rawValue), null, filters);
}
/**
* @return a filter which does not match with any chromosome.
*/
private String getVoidFilter() {
return buildFilter(VariantColumn.CHROMOSOME, "=", "_VOID");
}
protected void addStatsFilters(Query query, StudyConfiguration defaultStudyConfiguration, List<String> filters) {
addQueryFilter(query, STATS_MAF, getStatsColumnParser(defaultStudyConfiguration, VariantPhoenixHelper::getMafColumn),
null, filters);
addQueryFilter(query, STATS_MGF, getStatsColumnParser(defaultStudyConfiguration, VariantPhoenixHelper::getMgfColumn),
null, filters);
unsupportedFilter(query, MISSING_ALLELES);
unsupportedFilter(query, MISSING_GENOTYPES);
}
private BiFunction<String[], String, Column> getStatsColumnParser(StudyConfiguration defaultStudyConfiguration,
BiFunction<Integer, Integer, Column> columnBuilder) {
return (keyOpValue, v) -> {
String key = keyOpValue[0];
int indexOf = key.lastIndexOf(":");
String cohort;
final StudyConfiguration sc;
if (indexOf > 0) {
String study = key.substring(0, indexOf);
cohort = key.substring(indexOf + 1);
sc = utils.getStudyConfiguration(study, defaultStudyConfiguration);
} else {
cohort = key;
sc = defaultStudyConfiguration;
}
int cohortId = utils.getCohortId(cohort, sc);
return columnBuilder.apply(sc.getStudyId(), cohortId);
};
}
private void addQueryFilter(Query query, VariantQueryParams param, Column column, List<String> filters) {
addQueryFilter(query, param, column, filters, null);
}
private void addQueryFilter(Query query, VariantQueryParams param, Column column, List<String> filters,
Function<String, Object> valueParser) {
addQueryFilter(query, param, (a, s) -> column, null, valueParser, null, filters);
}
private void addQueryFilter(Query query, VariantQueryParams param, BiFunction<String[], String, Column> columnParser,
Function<String, Object> valueParser, List<String> filters) {
addQueryFilter(query, param, columnParser, null, valueParser, null, filters);
}
/**
* Transforms a Key-Value from a query into a valid SQL filter.
*
* @param query Query with the values
* @param param Param to read from the query
* @param columnParser Column parser. Given the [key, op, value] and the original value, returns a {@link Column}
* @param operatorParser Operator parser. Given the [key, op, value], returns a valid SQL operator
* @param valueParser Value parser. Given the [key, op, value], transforms the value to make the query.
* If the returned value is a Collection, uses each value for the query.
* @param extraFilters Provides extra filters to be concatenated to the filter.
* @param filters List of filters to be modified.
*/
private void addQueryFilter(Query query, VariantQueryParams param,
BiFunction<String[], String, Column> columnParser,
Function<String, String> operatorParser,
Function<String, Object> valueParser, Function<String[], String> extraFilters, List<String> filters) {
addQueryFilter(query, param, columnParser, operatorParser, valueParser, extraFilters, filters, -1);
}
/**
* Transforms a Key-Value from a query into a valid SQL filter.
*
* @param query Query with the values
* @param param Param to read from the query
* @param columnParser Column parser. Given the [key, op, value] and the original value, returns a {@link Column}
* @param operatorParser Operator parser. Given the [key, op, value], returns a valid SQL operator
* @param valueParser Value parser. Given the [key, op, value], transforms the value to make the query.
* If the returned value is a Collection, uses each value for the query.
* @param extraFilters Provides extra filters to be concatenated to the filter.
* @param filters List of filters to be modified.
* @param arrayIdx Array accessor index in base-1.
*/
private void addQueryFilter(Query query, VariantQueryParams param,
BiFunction<String[], String, Column> columnParser,
Function<String, String> operatorParser,
Function<String, Object> valueParser,
Function<String[], String> extraFilters, List<String> filters, int arrayIdx) {
addQueryFilter(query, param, columnParser, operatorParser, valueParser, extraFilters, filters, (o) -> arrayIdx);
}
/**
* Transforms a Key-Value from a query into a valid SQL filter.
*
* @param query Query with the values
* @param param Param to read from the query
* @param columnParser Column parser. Given the [key, op, value] and the original value, returns a {@link Column}
* @param operatorParser Operator parser. Given the [key, op, value], returns a valid SQL operator
* @param valueParser Value parser. Given the [key, op, value], transforms the value to make the query.
* If the returned value is a Collection, uses each value for the query.
* @param extraFilters Provides extra filters to be concatenated to the filter.
* @param filters List of filters to be modified.
* @param arrayIdxParser Array accessor index in base-1.
*/
private void addQueryFilter(Query query, VariantQueryParams param,
BiFunction<String[], String, Column> columnParser,
Function<String, String> operatorParser,
Function<String, Object> valueParser,
Function<String[], String> extraFilters, List<String> filters, Function<String, Integer> arrayIdxParser) {
if (isValidParam(query, param)) {
List<String> subFilters = new LinkedList<>();
QueryOperation logicOperation = checkOperator(query.getString(param.key()));
if (logicOperation == null) {
logicOperation = QueryOperation.AND;
}
for (String rawValue : query.getAsStringList(param.key(), logicOperation.separator())) {
String[] keyOpValue = splitOperator(rawValue);
Column column = columnParser.apply(keyOpValue, rawValue);
String op = parseOperator(keyOpValue[1]);
if (operatorParser != null) {
op = operatorParser.apply(op);
}
int arrayIdx = arrayIdxParser.apply(op);
if (!column.getPDataType().isArrayType() && arrayIdx >= 0) {
throw new VariantQueryException("Unable to use array indexes with non array columns. "
+ column + " " + column.sqlType());
}
final String negatedStr;
boolean negated = false;
if (op.startsWith("!")) {
op = inverseOperator(op);
negated = true;
negatedStr = "NOT ";
} else {
negatedStr = "";
}
String extra = "";
if (extraFilters != null) {
extra = extraFilters.apply(keyOpValue);
}
if (valueParser != null) {
Object value = valueParser.apply(keyOpValue[2]);
if (value instanceof Collection) {
List<String> subSubFilters = new ArrayList<>(((Collection) value).size());
for (Object o : ((Collection) value)) {
subSubFilters.add(buildFilter(column, op, o.toString(), "", extra, arrayIdx, param, rawValue));
}
subFilters.add(negatedStr + appendFilters(subSubFilters, QueryOperation.OR.toString()));
} else {
subFilters.add(buildFilter(column, op, value.toString(), negatedStr, extra, arrayIdx, param, rawValue));
}
} else {
subFilters.add(buildFilter(column, op, keyOpValue[2], negatedStr, extra, arrayIdx, param, rawValue));
}
}
filters.add(appendFilters(subFilters, logicOperation.toString()));
// filters.add(subFilters.stream().collect(Collectors.joining(" ) " + operation.name() + " ( ", " ( ", " ) ")));
}
}
private String buildFilter(Column column, String op, Object value) {
return buildFilter(column, op, value, "", "", 0, null, null);
}
private String buildFilter(Column column, String op, Object value, boolean negated) {
return buildFilter(column, op, value, negated ? "NOT " : "", "", 0, null, null);
}
private String buildFilter(Column column, String op, Object value, String negated, String extra, int idx,
VariantQueryParams param, String rawValue) {
Object parsedValue;
StringBuilder sb = new StringBuilder();
String arrayPosition = "";
if (StringUtils.isNotEmpty(extra)) {
sb.append("( ");
}
String sqlType = column.sqlType();
if (idx > 0) {
sqlType = sqlType.replace(" ARRAY", "");
arrayPosition = "[" + idx + "]";
}
switch (sqlType) {
case "VARCHAR":
parsedValue = value;
checkStringValue((String) parsedValue);
sb.append(negated)
.append('"').append(column).append('"').append(arrayPosition).append(' ');
if (((String) parsedValue).isEmpty()) {
sb.append("IS NULL");
} else {
sb.append(parseOperator(op))
.append(" '").append(parsedValue).append('\'');
}
break;
case "VARCHAR ARRAY":
parsedValue = value;
checkStringValue((String) parsedValue);
sb.append(negated)
.append('\'').append(parsedValue).append("' ")
.append(parseOperator(op))
.append(" ANY(\"").append(column).append("\")");
break;
case "INTEGER ARRAY":
parsedValue = parseInteger(value, param, rawValue);
String operator = flipOperator(parseNumericOperator(op));
sb.append(negated)
.append(parsedValue).append(' ')
.append(operator)
.append(" ANY(\"").append(column).append("\")");
break;
case "INTEGER":
case "UNSIGNED_INT":
parsedValue = parseInteger(value, param, rawValue);
sb.append(negated)
.append('"').append(column).append('"').append(arrayPosition).append(' ')
.append(parseNumericOperator(op))
.append(' ').append(parsedValue);
break;
case "FLOAT ARRAY":
case "DOUBLE ARRAY":
parsedValue = parseDouble(value, param, rawValue);
String flipOperator = flipOperator(parseNumericOperator(op));
sb.append(negated)
.append(parsedValue).append(' ')
.append(flipOperator)
.append(" ANY(\"").append(column).append("\")");
break;
case "FLOAT":
case "DOUBLE":
parsedValue = parseDouble(value, param, rawValue);
sb.append(negated)
.append('"').append(column).append('"').append(arrayPosition).append(' ')
.append(parseNumericOperator(op))
.append(' ').append(parsedValue);
break;
default:
throw new VariantQueryException("Unsupported column type " + column.getPDataType().getSqlTypeName()
+ " for column " + column);
}
if (StringUtils.isNotEmpty(extra)) {
sb.append(' ').append(extra).append(" )");
}
return sb.toString();
}
private double parseDouble(Object value, VariantQueryParams param, String rawValue) {
if (value instanceof Number) {
return ((Number) value).doubleValue();
} else {
try {
return Double.parseDouble(value.toString());
} catch (NumberFormatException e) {
if (param != null) {
throw VariantQueryException.malformedParam(param, rawValue);
} else {
throw new VariantQueryException("Error parsing decimal value '" + value + '\'', e);
}
}
}
}
private int parseInteger(Object value, VariantQueryParams param, String rawValue) {
if (value instanceof Number) {
return ((Number) value).intValue();
} else {
try {
return Integer.parseInt(value.toString());
} catch (NumberFormatException e) {
if (param != null) {
throw VariantQueryException.malformedParam(param, rawValue);
} else {
throw new VariantQueryException("Error parsing integer value '" + value + '\'', e);
}
}
}
}
private void checkStringValue(String parsedValue) {
if (parsedValue.contains("'")) {
throw new VariantQueryException("Unable to query text field using \"'\"");
}
}
/**
* Flip the operator to flip the order of the operands.
*
* ">" --> "<"
* "<" --> ">"
* ">=" --> "<="
* "<=" --> ">="
*
* @param op Operation to flip
* @return Operation flipped
*/
public static String flipOperator(String op) {
StringBuilder sb = new StringBuilder(op.length());
for (int i = 0; i < op.length(); i++) {
char c = op.charAt(i);
if (c == '>') {
c = '<';
} else if (c == '<') {
c = '>';
}
sb.append(c);
}
return sb.toString();
// return op.replace(">", "G").replace("<", ">").replace("G", "<");
}
/**
* Inverse the operator obtaining the opposite operator.
*
* ">" --> "<="
* "<" --> ">="
* ">=" --> "<"
* "<=" --> ">"
*
* @param op Operation to inverse
* @return Operation inverted
*/
public static String inverseOperator(String op) {
switch (op) {
case ">":
return "<=";
case ">=":
return "<";
case "<":
return ">=";
case "<=":
return ">";
case "":
case "=":
case "==":
return "!=";
case "!":
case "!=":
return "=";
default:
throw new VariantQueryException("Unknown operator " + op);
}
}
public static String parseOperator(String op) {
return SQL_OPERATOR.getOrDefault(op, op);
}
public static String parseNumericOperator(String op) {
String parsedOp = parseOperator(op);
if (parsedOp.equals("LIKE")) {
throw new VariantQueryException("Unable to use REGEX operator (" + op + ") with numerical fields");
}
return parsedOp;
}
}