/* * Copyright 2015 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.hpg.bigdata.core.lib; import org.apache.commons.lang3.StringUtils; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Created by imedina on 09/08/16. */ public class VariantParseQuery extends ParseQuery { private static final Pattern POPULATION_PATTERN = Pattern.compile("^([^=<>:]+.*):([^=<>:]+.*)(<=?|>=?|!=|!?=?~|==?)([^=<>:]+.*)$"); private static final Pattern CONSERVATION_PATTERN = Pattern.compile("^([^=<>]+.*)(<=?|>=?|!=|!?=?~|==?)([^=<>]+.*)$"); private static final String CONSERVATION_VIEW = "cons"; public VariantParseQuery() { super(); } public String parse(Query query, QueryOptions queryOptions, String viewName) { Set<String> keySet = query.keySet(); for (String key : keySet) { String[] fields = key.split("\\."); // First we check if there is any ... if (fields.length == 0) { return null; } String value = (String) query.get(key); switch (fields[0]) { case "id": case "chromosome": case "type": filters.add(processFilter(fields[0], value, true, false)); break; case "start": case "end": case "length": filters.add(processFilter(fields[0], value, false, false)); break; case "region": processRegionQuery(value, "chromosome", "start", "end"); break; case "names": filters.add(processFilter("hgvs", value, true, true)); break; case "studies": processStudyQuery(fields, value); break; case "annotation": processAnnotationQuery(fields, value); break; default: break; } } // Build the SQL string from the processed query using explodes and filters buildQueryString(viewName, queryOptions); return sqlQueryString.toString(); } private void processStudyQuery(String[] fields, Object value) { // sanity check, if there is any ... if (fields == null || fields.length == 0) { return; } String path = StringUtils.join(fields, ".", 1, fields.length - 1); String field = fields[fields.length - 1]; switch (path) { case "studyId": filters.add(processFilter("studies." + path, (String) value, true, false)); break; case "files": explodes.add("LATERAL VIEW explode(studies.files) act as file"); switch (field) { case "fileId": case "call": filters.add(processFilter("file." + field, (String) value, true, false)); break; default: // error!! break; } break; case "format": filters.add(processFilter("studies.format", (String) value, true, true)); break; case "samplesData": explodes.add("LATERAL VIEW explode(studies.samplesData) act as sd"); // investigate how to query GT break; default: break; } } private void processAnnotationQuery(String[] fields, String value) { // sanity check, if there is any ... if (fields == null || fields.length == 0) { return; } Matcher matcher; StringBuilder where; String field = fields[fields.length - 1]; String path = StringUtils.join(fields, ".", 1, fields.length - 1); if (StringUtils.isEmpty(path)) { path = field; } switch (path) { case "id": case "ancestralAllele": case "displayConsequenceType": filters.add(processFilter("annotation." + path, value, true, false)); break; case "xrefs": explodes.add("LATERAL VIEW explode(annotation.xrefs) act as xref"); filters.add(processFilter("xref." + field, value, true, false)); break; case "hgvs": // this is equivalent to case 'names' in parse function !! filters.add(processFilter("annotation.hgvs", value, true, true)); break; // consequenceTypes is an array and therefore we have to use explode function case "consequenceTypes": explodes.add("LATERAL VIEW explode(annotation.consequenceTypes) act as ct"); // we process most important fields inside consequenceTypes switch (field) { case "geneName": case "ensemblGeneId": case "ensemblTranscriptId": case "biotype": filters.add(processFilter("ct." + field, value, true, false)); break; case "transcriptAnnotationFlags": filters.add(processFilter("ct.transcriptAnnotationFlags", value, true, true)); break; default: // error!! break; } break; // sequenceOntologyTerms is also an array and therefore we have to use explode function case "consequenceTypes.sequenceOntologyTerms": // we add both explode (order is kept) to the set (no repetitions allowed) explodes.add("LATERAL VIEW explode(annotation.consequenceTypes) act as ct"); explodes.add("LATERAL VIEW explode(ct.sequenceOntologyTerms) ctso as so"); switch (field) { case "accession": case "name": filters.add(processFilter("so." + field, value, true, false)); break; default: // error!! break; } break; case "populationFrequencies": { explodes.add("LATERAL VIEW explode(annotation.populationFrequencies) apf as popfreq"); if (StringUtils.isEmpty(value)) { throw new IllegalArgumentException("value is null or empty for population frequencies"); } boolean or = value.contains(","); boolean and = value.contains(";"); if (or && and) { throw new IllegalArgumentException("Command and semi-colon cannot be mixed: " + value); } String logicalComparator = or ? " OR " : " AND "; String[] values = value.split("[,;]"); where = new StringBuilder(); if (values == null) { matcher = POPULATION_PATTERN.matcher((String) value); if (matcher.find()) { updatePopWhereString(field, "popfreq", matcher, where); } else { // error System.err.format("error: invalid expresion for population frequencies %s: abort!", value); } } else { matcher = POPULATION_PATTERN.matcher(values[0]); if (matcher.find()) { where.append("("); updatePopWhereString(field, "popfreq", matcher, where); for (int i = 1; i < values.length; i++) { matcher = POPULATION_PATTERN.matcher(values[i]); if (matcher.find()) { where.append(logicalComparator); updatePopWhereString(field, "popfreq", matcher, where); } else { // error System.err.format("Error: invalid expresion %s: abort!", values[i]); } } where.append(")"); } else { // error System.err.format("Error: invalid expresion %s: abort!", values[0]); } } filters.add(where.toString()); break; } case "conservation": { if (StringUtils.isEmpty(value)) { throw new IllegalArgumentException("value is null or empty for conservation"); } boolean or = value.contains(","); boolean and = value.contains(";"); if (or && and) { throw new IllegalArgumentException("Command and semi-colon cannot be mixed: " + value); } String logicalComparator = or ? " OR " : " AND "; String[] values = value.split("[,;]"); where = new StringBuilder(); if (values == null) { matcher = CONSERVATION_PATTERN.matcher(value); if (matcher.find()) { updateConsWhereString(matcher, where); } else { // error System.err.format("error: invalid expresion %s: abort!", value); } } else { matcher = CONSERVATION_PATTERN.matcher(values[0]); if (matcher.find()) { where.append("("); updateConsWhereString(matcher, where); for (int i = 1; i < values.length; i++) { matcher = CONSERVATION_PATTERN.matcher(values[i]); if (matcher.find()) { where.append(logicalComparator); updateConsWhereString(matcher, where); } else { // error System.err.format("Error: invalid expresion %s: abort!", values[i]); } } where.append(")"); } else { // error System.err.format("Error: invalid expresion %s: abort!", values[0]); } } filters.add(where.toString()); explodes.add("LATERAL VIEW explode(annotation.conservation) acons as " + CONSERVATION_VIEW); break; } case "variantTraitAssociation": switch (field) { case "clinvar": explodes.add("LATERAL VIEW explode(annotation.variantTraitAssociation.clinvar) avtac as clinvar"); filters.add(processFilter("clinvar.accession", value, true, false)); case "cosmic": explodes.add("LATERAL VIEW explode(annotation.variantTraitAssociation.cosmic) avtac as cosmic"); filters.add(processFilter("cosmic.mutationId", value, true, false)); break; default: break; } break; default: break; } } private void updatePopWhereString(String field, String viewName, Matcher matcher, StringBuilder where) { where.append("(").append(viewName).append(".study = '").append(matcher.group(1).trim()) .append("' AND ").append(viewName).append(".population = '").append(matcher.group(2).trim()) .append("' AND ").append(viewName).append(".").append(field).append(matcher.group(3).trim()) .append(matcher.group(4).trim()).append(")"); } private void updateConsWhereString(Matcher matcher, StringBuilder where) { where.append("(").append(CONSERVATION_VIEW).append(".source = '").append(matcher.group(1).trim()) .append("' AND ").append(CONSERVATION_VIEW).append(".score") .append(matcher.group(2).trim()).append(matcher.group(3).trim()).append(")"); } }