/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.variant.sqlite; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.sun.jersey.api.client.Client; import com.sun.jersey.api.client.WebResource; import org.apache.commons.lang.StringUtils; import org.opencb.commons.bioformats.variant.json.VariantAnalysisInfo; import org.opencb.commons.bioformats.variant.json.VariantControl; import org.opencb.commons.bioformats.variant.json.VariantInfo; import org.opencb.commons.containers.QueryResult; import org.opencb.commons.containers.map.ObjectMap; import org.opencb.commons.containers.map.QueryOptions; import org.opencb.opencga.core.auth.SqliteCredentials; import org.opencb.opencga.core.common.XObject; import org.opencb.opencga.storage.indices.SqliteManager; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.sql.*; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.opencb.biodata.models.feature.Region; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.effect.VariantEffect; import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; /** * @author Alejandro Aleman Ramos <aaleman@cipf.es> * @author Cristina Yenyxe Gonzalez Garcia <cgonzalez@cipf.es> */ public class VariantSqliteDBAdaptor implements VariantDBAdaptor { private SqliteCredentials sqliteCredentials; private SqliteManager sqliteManager; public VariantSqliteDBAdaptor() { System.out.println("Variant Query Maker"); } public VariantSqliteDBAdaptor(SqliteCredentials sqliteCredentials) throws SQLException, ClassNotFoundException { System.out.println("Variant Query Maker"); this.sqliteCredentials = sqliteCredentials; this.sqliteManager = new SqliteManager(); this.sqliteManager.connect(this.sqliteCredentials.getPath(), true); System.out.println("DB: " + this.sqliteCredentials.getPath()); } /** * @param region The region where variants must be searched * @param studyName The name of the study where variants are filed * @param options Optional arguments * @return */ @Override public QueryResult getAllVariantsByRegionAndStudy(Region region, String studyName, QueryOptions options) { Long start, end, dbStart, dbEnd; start = System.currentTimeMillis(); boolean includeSamples, includeStats, includeEffects; int pos, id = Integer.MIN_VALUE; String sql, chr, ref, alt; String columns = "variant.id_variant, variant.chromosome, variant.position, variant.ref, variant.alt,variant.id "; String joins = ""; Variant variant = null; XObject elem; QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); List<String> whereClauses = new ArrayList<>(10); if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } if (includeSamples) { columns += ",variant_info.key, variant_info.value "; joins += " left join variant_info on variant.id_variant=variant_info.id_variant "; } if (includeStats) { columns += ",variant_stats.maf, variant_stats.mgf, variant_stats.allele_maf , variant_stats.genotype_maf , variant_stats.miss_allele , variant_stats.miss_gt , variant_stats.mendel_err, variant_stats.is_indel , variant_stats.cases_percent_dominant , variant_stats.controls_percent_dominant , variant_stats.cases_percent_recessive , variant_stats.controls_percent_recessive "; joins += " inner join variant_stats on variant_stats.chromosome=variant.chromosome AND variant_stats.position=variant.position AND variant_stats.allele_ref=variant.ref AND variant_stats.allele_alt=variant.alt "; } if (includeEffects) { // columns += ""; // joins += " inner join variant_effect on variant_effect.chromosome=variant.chromosome AND variant_effect.position=variant.position AND variant_effect.reference_allele=variant.ref AND variant_effect.alternative_allele = variant.alt "; } try { StringBuilder regionClauses = new StringBuilder(); regionClauses.append("( variant.chromosome='").append(region.getChromosome()).append("' AND "); regionClauses.append("variant.position>=").append(String.valueOf(region.getStart())).append(" AND "); regionClauses.append("variant.position<=").append(String.valueOf(region.getEnd())); regionClauses.append(" ) "); whereClauses.add(regionClauses.toString()); sql = "SELECT " + columns + " from variant " + joins; if (whereClauses.size() > 0) { StringBuilder where = new StringBuilder(" WHERE "); for (int i = 0; i < whereClauses.size(); i++) { where.append(whereClauses.get(i)); if (i < whereClauses.size() - 1) { where.append(" AND "); } } sql += where.toString() + " ORDER BY variant.id_variant "; } sql += ";"; dbStart = System.currentTimeMillis(); Iterator<XObject> it = sqliteManager.queryIterator(sql); dbEnd = System.currentTimeMillis(); queryResult.setDbTime(dbEnd - dbStart); while (it.hasNext()) { elem = it.next(); if (id != elem.getInt("id_variant")) { // new Elem id = elem.getInt("id_variant"); chr = elem.getString("chromosome"); pos = elem.getInt("position"); ref = elem.getString("ref"); alt = elem.getString("alt"); variant = new Variant(chr, pos, pos, ref, alt); variant.setId(elem.getString("id")); variant.setFormat(elem.getString("format")); if (includeStats) { variant.setStats(new VariantStats( chr, pos, ref, alt, elem.getDouble("maf"), elem.getDouble("mgf"), elem.getString("allele_maf"), elem.getString("genotype_maf"), elem.getInt("miss_allele"), elem.getInt("miss_gt"), elem.getInt("mendel_err"), elem.getInt("is_indel") == 1, elem.getDouble("cases_percent_dominant"), elem.getDouble("controls_percent_dominant"), elem.getDouble("cases_percent_recessive"), elem.getDouble("controls_percent_recessive") )); } if (includeEffects) { VariantEffect ve = new VariantEffect(); } if (variant != null) { results.add(variant); } } if (elem.getString("key") != null && elem.getString("value") != null) { variant.addAttribute(elem.getString("key"), elem.getString("value")); } } sqliteManager.disconnect(false); } catch (SQLException e) { System.err.println("getAllVariantsByRegion: " + e.getClass().getName() + ": " + e.getMessage()); } // System.out.println("Results"); // for (Variant v : results) { // System.out.println(v); // } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; } @Override public List<QueryResult> getAllVariantsByRegionList(List<Region> region, String studyName, QueryOptions options) { return null; // TODO aaleman: Implementation needed } @Override public QueryResult<ObjectMap> getVariantsHistogramByRegion(Region region, String studyName, boolean histogramLogarithm, int histogramMax) { QueryResult<ObjectMap> queryResult = new QueryResult<>(String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); // TODO Fill metadata List<ObjectMap> data = new ArrayList<>(); long startTime = System.currentTimeMillis(); Path metaDir = getMetaDir(sqliteCredentials.getPath()); String fileName = sqliteCredentials.getPath().getFileName().toString(); try { long startDbTime = System.currentTimeMillis(); sqliteManager.connect(metaDir.resolve(Paths.get(fileName)), true); System.out.println("SQLite path: " + metaDir.resolve(Paths.get(fileName)).toString()); String queryString = "SELECT * FROM chunk WHERE chromosome='" + region.getChromosome() + "' AND start <= " + region.getEnd() + " AND end >= " + region.getStart(); List<XObject> queryResults = sqliteManager.query(queryString); sqliteManager.disconnect(true); queryResult.setDbTime(System.currentTimeMillis() - startDbTime); int resultSize = queryResults.size(); if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram int sumChunkSize = resultSize / histogramMax; int i = 0, j = 0; int featuresCount = 0; ObjectMap item = null; for (XObject result : queryResults) { featuresCount += result.getInt("features_count"); if (i == 0) { item = new ObjectMap("chromosome", result.getString("chromosome")); item.put("chunkId", result.getInt("chunk_id")); item.put("start", result.getInt("start")); } else if (i == sumChunkSize - 1 || j == resultSize - 1) { if (histogramLogarithm) { item.put("featuresCount", (featuresCount > 0) ? Math.log(featuresCount) : 0); } else { item.put("featuresCount", featuresCount); } item.put("end", result.getInt("end")); data.add(item); i = -1; featuresCount = 0; } j++; i++; } } else { for (XObject result : queryResults) { ObjectMap item = new ObjectMap("chromosome", result.getString("chromosome")); item.put("chunkId", result.getInt("chunk_id")); item.put("start", result.getInt("start")); if (histogramLogarithm) { int features_count = result.getInt("features_count"); result.put("featuresCount", (features_count > 0) ? Math.log(features_count) : 0); } else { item.put("featuresCount", result.getInt("features_count")); } item.put("end", result.getInt("end")); data.add(item); } } } catch (ClassNotFoundException | SQLException ex) { Logger.getLogger(VariantSqliteDBAdaptor.class.getName()).log(Level.SEVERE, null, ex); queryResult.setErrorMsg(ex.getMessage()); } queryResult.setResult(data); queryResult.setNumResults(data.size()); queryResult.setTime(System.currentTimeMillis() - startTime); return queryResult; } @Override public QueryResult getStatsByVariant(Variant variant, QueryOptions options) { return null; // TODO aaleman: Implementation needed } @Override public QueryResult getSimpleStatsByVariant(Variant variant, QueryOptions options) { return null; // TODO aaleman: Implementation needed } @Override public QueryResult getEffectsByVariant(Variant variant, QueryOptions options) { return null; // TODO aaleman: Implementation needed } @Override public List<VariantInfo> getRecords(Map<String, String> options) { Connection con; Statement stmt; List<VariantInfo> list = new ArrayList<>(100); String dbName = options.get("db_name"); showDb(dbName); try { Class.forName("org.sqlite.JDBC"); con = DriverManager.getConnection("jdbc:sqlite:" + dbName); List<String> whereClauses = new ArrayList<>(10); Map<String, List<String>> sampleGenotypes; Map<String, String> controlsMAFs = new LinkedHashMap<>(); sampleGenotypes = processSamplesGT(options); if (options.containsKey("region_list") && !options.get("region_list").equals("")) { StringBuilder regionClauses = new StringBuilder("("); String[] regions = options.get("region_list").split(","); Pattern patternReg = Pattern.compile("(\\w+):(\\d+)-(\\d+)"); Matcher matcherReg, matcherChr; for (int i = 0; i < regions.length; i++) { String region = regions[i]; matcherReg = patternReg.matcher(region); if (matcherReg.find()) { String chr = matcherReg.group(1); int start = Integer.valueOf(matcherReg.group(2)); int end = Integer.valueOf(matcherReg.group(3)); regionClauses.append("( variant_stats.chromosome='").append(chr).append("' AND "); regionClauses.append("variant_stats.position>=").append(start).append(" AND "); regionClauses.append("variant_stats.position<=").append(end).append(" )"); if (i < (regions.length - 1)) { regionClauses.append(" OR "); } } else { Pattern patternChr = Pattern.compile("(\\w+)"); matcherChr = patternChr.matcher(region); if (matcherChr.find()) { String chr = matcherChr.group(); regionClauses.append("( variant_stats.chromosome='").append(chr).append("')"); if (i < (regions.length - 1)) { regionClauses.append(" OR "); } } else { System.err.println("ERROR: Region (" + region + ")"); } } } regionClauses.append(" ) "); whereClauses.add(regionClauses.toString()); } if (options.containsKey("chr_pos") && !options.get("chr_pos").equals("")) { whereClauses.add("variant_stats.chromosome='" + options.get("chr_pos") + "'"); if (options.containsKey("start_pos") && !options.get("start_pos").equals("")) { whereClauses.add("variant_stats.position>=" + options.get("start_pos")); } if (options.containsKey("end_pos") && !options.get("end_pos").equals("")) { whereClauses.add("variant_stats.position<=" + options.get("end_pos")); } } if (options.containsKey("mend_error") && !options.get("mend_error").equals("")) { String val = options.get("mend_error"); String opt = options.get("option_mend_error"); whereClauses.add("variant_stats.mendel_err " + opt + " " + val); } if (options.containsKey("is_indel") && options.get("is_indel").equalsIgnoreCase("on")) { whereClauses.add("variant_stats.is_indel=1"); } if (options.containsKey("maf") && !options.get("maf").equals("")) { String val = options.get("maf"); String opt = options.get("option_maf"); whereClauses.add("variant_stats.maf " + opt + " " + val); } if (options.containsKey("mgf") && !options.get("mgf").equals("")) { String val = options.get("mgf"); String opt = options.get("option_mgf"); whereClauses.add("variant_stats.mgf " + opt + " " + val); } if (options.containsKey("miss_allele") && !options.get("miss_allele").equals("")) { String val = options.get("miss_allele"); String opt = options.get("option_miss_allele"); whereClauses.add("variant_stats.miss_allele " + opt + " " + val); } if (options.containsKey("miss_gt") && !options.get("miss_gt").equals("")) { String val = options.get("miss_gt"); String opt = options.get("option_miss_gt"); whereClauses.add("variant_stats.miss_gt " + opt + " " + val); } if (options.containsKey("cases_percent_dominant") && !options.get("cases_percent_dominant").equals("")) { String val = options.get("cases_percent_dominant"); String opt = options.get("option_cases_dom"); whereClauses.add("variant_stats.cases_percent_dominant " + opt + " " + val); } if (options.containsKey("controls_percent_dominant") && !options.get("controls_percent_dominant").equals("")) { String val = options.get("controls_percent_dominant"); String opt = options.get("option_controls_dom"); whereClauses.add("variant_stats.controls_percent_dominant " + opt + " " + val); } if (options.containsKey("cases_percent_recessive") && !options.get("cases_percent_recessive").equals("")) { String val = options.get("cases_percent_recessive"); String opt = options.get("option_cases_rec"); whereClauses.add("variant_stats.cases_percent_recessive " + opt + " " + val); } if (options.containsKey("controls_percent_recessive") && !options.get("controls_percent_recessive").equals("")) { String val = options.get("controls_percent_recessive"); String opt = options.get("option_controls_rec"); whereClauses.add("variant_stats.controls_percent_recessive " + opt + " " + val); } if (options.containsKey("biotype") && !options.get("biotype").equals("")) { String[] biotypes = options.get("biotype").split(","); StringBuilder biotypesClauses = new StringBuilder(" ( "); for (int i = 0; i < biotypes.length; i++) { biotypesClauses.append("variant_effect.feature_biotype LIKE '%").append(biotypes[i]).append("%'"); if (i < (biotypes.length - 1)) { biotypesClauses.append(" OR "); } } biotypesClauses.append(" ) "); whereClauses.add(biotypesClauses.toString()); } if (options.containsKey("exc_1000g_controls") && options.get("exc_1000g_controls").equalsIgnoreCase("on")) { whereClauses.add("(key NOT LIKE '1000G%' OR key is null)"); } else if (options.containsKey("maf_1000g_controls") && !options.get("maf_1000g_controls").equals("")) { controlsMAFs.put("1000G", options.get("maf_1000g_controls")); } if (options.containsKey("exc_bier_controls") && options.get("exc_bier_controls").equalsIgnoreCase("on")) { whereClauses.add("(key NOT LIKE 'BIER%' OR key is null)"); } else if (options.containsKey("maf_bier_controls") && !options.get("maf_bier_controls").equals("")) { controlsMAFs.put("BIER", options.get("maf_bier_controls")); } if (options.containsKey("exc_evs_controls") && options.get("exc_evs_controls").equalsIgnoreCase("on")) { whereClauses.add("(key NOT LIKE 'EVS%' OR key is null)"); } else if (options.containsKey("maf_evs_controls") && !options.get("maf_evs_controls").equals("")) { controlsMAFs.put("BIER", options.get("maf_evs_controls")); } if (options.containsKey("conseq_type[]") && !options.get("conseq_type[]").equals("")) { whereClauses.add(processConseqType(options.get("conseq_type[]"))); } if (options.containsKey("genes") && !options.get("genes").equals("")) { whereClauses.add(processGeneList(options.get("genes"))); // processGeneList(options.get("genes")); } if (sampleGenotypes.size() > 0) { StringBuilder sg = new StringBuilder(); int csg = 0; sg.append("("); for (Map.Entry<String, List<String>> entry : sampleGenotypes.entrySet()) { sg.append("("); sg.append("sample_name='").append(entry.getKey()).append("' AND ("); for (int i = 0; i < entry.getValue().size(); i++) { String[] aux = entry.getValue().get(i).split("/"); sg.append("("); sg.append("allele_1=").append(aux[0]).append(" AND allele_2=").append(aux[1]); sg.append(")"); if (i + 1 < entry.getValue().size()) { sg.append(" OR "); } } sg.append(")"); sg.append(" OR sample_name<>'").append(entry.getKey()).append("'"); sg.append(")"); if (csg + 1 < sampleGenotypes.entrySet().size()) { sg.append(" AND "); } csg++; } sg.append(")"); System.out.println(sg); whereClauses.add(sg.toString()); } String sql = "SELECT count(*) as count FROM sample ;"; stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); int numSamples = 0; while (rs.next()) { numSamples = rs.getInt("count"); } stmt.close(); System.out.println("controlsMAFs = " + controlsMAFs); System.out.println("sampleGenotypes = " + sampleGenotypes); String innerJoinVariantSQL = " left join variant_info on variant.id_variant=variant_info.id_variant "; // String innerJoinEffectSQL = " inner join variant_effect on variant_effect.chromosome=variant.chromosome AND variant_effect.position=variant.position AND variant_effect.reference_allele=variant.ref AND variant_effect.alternative_allele = variant.alt "; sql = "SELECT distinct variant.genes,variant.consequence_types, variant.id_variant, variant_info.key, variant_info.value, sample_info.sample_name, sample_info.allele_1, sample_info.allele_2, variant_stats.chromosome ," + "variant_stats.position , variant_stats.allele_ref , variant_stats.allele_alt , variant_stats.id , variant_stats.maf , variant_stats.mgf, " + "variant_stats.allele_maf , variant_stats.genotype_maf , variant_stats.miss_allele , variant_stats.miss_gt , variant_stats.mendel_err ," + "variant_stats.is_indel , variant_stats.cases_percent_dominant , variant_stats.controls_percent_dominant , variant_stats.cases_percent_recessive , variant_stats.controls_percent_recessive, " + "variant.polyphen_score, variant.polyphen_effect, variant.sift_score, variant.sift_effect " + " FROM variant_stats " + "inner join variant on variant_stats.chromosome=variant.chromosome AND variant_stats.position=variant.position AND variant_stats.allele_ref=variant.ref AND variant_stats.allele_alt=variant.alt " + "inner join sample_info on variant.id_variant=sample_info.id_variant " + innerJoinVariantSQL; if (whereClauses.size() > 0) { StringBuilder where = new StringBuilder(" where "); for (int i = 0; i < whereClauses.size(); i++) { where.append(whereClauses.get(i)); if (i < whereClauses.size() - 1) { where.append(" AND "); } } sql += where.toString() + " ORDER BY variant_stats.chromosome , variant_stats.position , variant_stats.allele_ref , variant_stats.allele_alt ;"; } System.out.println(sql); System.out.println("Start SQL"); long start = System.currentTimeMillis(); stmt = con.createStatement(); rs = stmt.executeQuery(sql); VariantStats vs; VariantInfo vi = null; String chr = ""; int pos = 0; String ref = "", alt = ""; System.out.println("End SQL: " + ((System.currentTimeMillis() - start) / 1000.0) + " s."); System.out.println("Processing"); while (rs.next()) { if (!rs.getString("chromosome").equals(chr) || rs.getInt("position") != pos || !rs.getString("allele_ref").equals(ref) || !rs.getString("allele_alt").equals(alt)) { chr = rs.getString("chromosome"); pos = rs.getInt("position"); ref = rs.getString("allele_ref"); alt = rs.getString("allele_alt"); if (vi != null && filterGenotypes(vi, numSamples) && filterControls(vi, controlsMAFs)) { list.add(vi); } vi = new VariantInfo(chr, pos, ref, alt); vs = new VariantStats(chr, pos, ref, alt, rs.getDouble("maf"), rs.getDouble("mgf"), rs.getString("allele_maf"), rs.getString("genotype_maf"), rs.getInt("miss_allele"), rs.getInt("miss_gt"), rs.getInt("mendel_err"), rs.getInt("is_indel") == 1, rs.getDouble("cases_percent_dominant"), rs.getDouble("controls_percent_dominant"), rs.getDouble("cases_percent_recessive"), rs.getDouble("controls_percent_recessive")); vs.setId(rs.getString("id")); // vi.addGenotypes(rs.getString("genotypes")); vi.addStats(vs); vi.addGenes(rs.getString("genes")); vi.addConsequenceTypes(rs.getString("consequence_types")); vi.setPolyphen_score(rs.getDouble("polyphen_score")); vi.setSift_score(rs.getDouble("sift_score")); vi.setPolyphen_effect(rs.getInt("polyphen_effect")); vi.setSift_effect(rs.getInt("sift_effect")); } if (rs.getString("key") != null && rs.getString("value") != null) { vi.addControl(rs.getString("key"), rs.getString("value")); } String sample = rs.getString("sample_name"); String gt = rs.getInt("allele_1") + "/" + rs.getInt("allele_2"); vi.addSammpleGenotype(sample, gt); // vi.addGeneAndConsequenceType(rs.getString("gene_name"), rs.getString("consequence_type_obo")); } if (vi != null && filterGenotypes(vi, numSamples) && filterControls(vi, controlsMAFs)) { list.add(vi); } stmt.close(); System.out.println("Total: (" + list.size() + ")"); System.out.println("End processing: " + ((System.currentTimeMillis() - start) / 1000.0) + " s."); con.close(); } catch (ClassNotFoundException | SQLException e) { System.err.println("STATS: " + e.getClass().getName() + ": " + e.getMessage()); } return list; } private void showDb(String dbName) { System.out.println("DB: " + dbName); } @Override public List<VariantStats> getRecordsStats(Map<String, String> options) { Connection con; Statement stmt; List<VariantStats> list = new ArrayList<>(100); String dbName = options.get("db_name"); try { Class.forName("org.sqlite.JDBC"); con = DriverManager.getConnection("jdbc:sqlite:" + dbName); List<String> whereClauses = new ArrayList<>(10); if (options.containsKey("region_list") && !options.get("region_list").equals("")) { StringBuilder regionClauses = new StringBuilder("("); String[] regions = options.get("region_list").split(","); Pattern pattern = Pattern.compile("(\\w+):(\\d+)-(\\d+)"); Matcher matcher; for (int i = 0; i < regions.length; i++) { String region = regions[i]; matcher = pattern.matcher(region); if (matcher.find()) { String chr = matcher.group(1); int start = Integer.valueOf(matcher.group(2)); int end = Integer.valueOf(matcher.group(3)); regionClauses.append("( variant_stats.chromosome='").append(chr).append("' AND "); regionClauses.append("variant_stats.position>=").append(start).append(" AND "); regionClauses.append("variant_stats.position<=").append(end).append(" )"); if (i < (regions.length - 1)) { regionClauses.append(" OR "); } } } regionClauses.append(" ) "); whereClauses.add(regionClauses.toString()); } if (options.containsKey("mend_error") && !options.get("mend_error").equals("")) { String val = options.get("mend_error"); String opt = options.get("option_mend_error"); whereClauses.add("variant_stats.mendel_err " + opt + " " + val); } if (options.containsKey("is_indel") && options.get("is_indel").equalsIgnoreCase("on")) { whereClauses.add("variant_stats.is_indel=1"); } if (options.containsKey("maf") && !options.get("maf").equals("")) { String val = options.get("maf"); String opt = options.get("option_maf"); whereClauses.add("variant_stats.maf " + opt + " " + val); } if (options.containsKey("mgf") && !options.get("mgf").equals("")) { String val = options.get("mgf"); String opt = options.get("option_mgf"); whereClauses.add("variant_stats.mgf " + opt + " " + val); } if (options.containsKey("miss_allele") && !options.get("miss_allele").equals("")) { String val = options.get("miss_allele"); String opt = options.get("option_miss_allele"); whereClauses.add("variant_stats.miss_allele " + opt + " " + val); } if (options.containsKey("miss_gt") && !options.get("miss_gt").equals("")) { String val = options.get("miss_gt"); String opt = options.get("option_miss_gt"); whereClauses.add("variant_stats.miss_gt " + opt + " " + val); } if (options.containsKey("cases_percent_dominant") && !options.get("cases_percent_dominant").equals("")) { String val = options.get("cases_percent_dominant"); String opt = options.get("option_cases_dom"); whereClauses.add("variant_stats.cases_percent_dominant " + opt + " " + val); } if (options.containsKey("controls_percent_dominant") && !options.get("controls_percent_dominant").equals("")) { String val = options.get("controls_percent_dominant"); String opt = options.get("option_controls_dom"); whereClauses.add("variant_stats.controls_percent_dominant " + opt + " " + val); } if (options.containsKey("cases_percent_recessive") && !options.get("cases_percent_recessive").equals("")) { String val = options.get("cases_percent_recessive"); String opt = options.get("option_cases_rec"); whereClauses.add("variant_stats.cases_percent_recessive " + opt + " " + val); } if (options.containsKey("controls_percent_recessive") && !options.get("controls_percent_recessive").equals("")) { String val = options.get("controls_percent_recessive"); String opt = options.get("option_controls_rec"); whereClauses.add("variant_stats.controls_percent_recessive " + opt + " " + val); } if (options.containsKey("genes") && !options.get("genes").equals("")) { whereClauses.add(processGeneList(options.get("genes"))); } String sql = "SELECT distinct variant_stats.chromosome ," + "variant_stats.position , variant_stats.allele_ref , variant_stats.allele_alt, variant_stats.maf , variant_stats.mgf, " + "variant_stats.allele_maf , variant_stats.genotype_maf , variant_stats.miss_allele , variant_stats.miss_gt , variant_stats.mendel_err ," + "variant_stats.is_indel , variant_stats.cases_percent_dominant , variant_stats.controls_percent_dominant , variant_stats.cases_percent_recessive , variant_stats.controls_percent_recessive" + " FROM variant_stats "; if (whereClauses.size() > 0) { StringBuilder where = new StringBuilder(" where "); for (int i = 0; i < whereClauses.size(); i++) { where.append(whereClauses.get(i)); if (i < whereClauses.size() - 1) { where.append(" AND "); } } sql += where.toString() + " ORDER BY variant_stats.chromosome , variant_stats.position , variant_stats.allele_ref ;"; } System.out.println(sql); System.out.println("Start SQL"); long start = System.currentTimeMillis(); stmt = con.createStatement(); stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); VariantStats vs; VariantInfo vi = null; String chr = ""; int pos = 0; String ref = "", alt = ""; System.out.println("End SQL: " + ((System.currentTimeMillis() - start) / 1000.0) + " s."); System.out.println("Processing"); while (rs.next()) { chr = rs.getString("chromosome"); pos = rs.getInt("position"); ref = rs.getString("allele_ref"); alt = rs.getString("allele_alt"); vs = new VariantStats(chr, pos, ref, alt, rs.getDouble("maf"), rs.getDouble("mgf"), rs.getString("allele_maf"), rs.getString("genotype_maf"), rs.getInt("miss_allele"), rs.getInt("miss_gt"), rs.getInt("mendel_err"), rs.getInt("is_indel") == 1, rs.getDouble("cases_percent_dominant"), rs.getDouble("controls_percent_dominant"), rs.getDouble("cases_percent_recessive"), rs.getDouble("controls_percent_recessive")); list.add(vs); } System.out.println("Total: (" + list.size() + ")"); System.out.println("End processing: " + ((System.currentTimeMillis() - start) / 1000.0) + " s."); stmt.close(); con.close(); } catch (ClassNotFoundException | SQLException e) { System.err.println("STATS: " + e.getClass().getName() + ": " + e.getMessage()); } return list; } @Override public List<VariantEffect> getEffect(Map<String, String> options) { Statement stmt; Connection con; List<VariantEffect> list = new ArrayList<>(100); String dbName = options.get("db_name"); try { Class.forName("org.sqlite.JDBC"); con = DriverManager.getConnection("jdbc:sqlite:" + dbName); String chr = options.get("chr"); int pos = Integer.valueOf(options.get("pos")); String ref = options.get("ref"); String alt = options.get("alt"); String sql = "SELECT * FROM variant_effect WHERE chromosome='" + chr + "' AND position=" + pos + " AND reference_allele='" + ref + "' AND alternative_allele='" + alt + "';"; System.out.println(sql); stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); VariantEffect ve; while (rs.next()) { ve = new VariantEffect(rs.getString("chromosome"), rs.getInt("position"), rs.getString("reference_allele"), rs.getString("alternative_allele"), rs.getString("feature_id"), rs.getString("feature_name"), rs.getString("feature_type"), rs.getString("feature_biotype"), rs.getString("feature_chromosome"), rs.getInt("feature_start"), rs.getInt("feature_end"), rs.getString("feature_strand"), rs.getString("snp_id"), rs.getString("ancestral"), rs.getString("alternative"), rs.getString("gene_id"), rs.getString("transcript_id"), rs.getString("gene_name"), rs.getString("consequence_type"), rs.getString("consequence_type_obo"), rs.getString("consequence_type_desc"), rs.getString("consequence_type_type"), rs.getInt("aa_position"), rs.getString("aminoacid_change"), rs.getString("codon_change")); ve.setPolyphenEffect(rs.getInt("polyphen_effect")); ve.setSiftEffect(rs.getInt("sift_effect")); ve.setPolyphenScore(rs.getDouble("polyphen_score")); ve.setSiftScore(rs.getDouble("sift_score")); list.add(ve); } stmt.close(); con.close(); } catch (ClassNotFoundException | SQLException e) { System.err.println("EFFECT: " + e.getClass().getName() + ": " + e.getMessage()); } return list; } @Override public VariantAnalysisInfo getAnalysisInfo(Map<String, String> options) { Statement stmt; Connection con; VariantAnalysisInfo vi = new VariantAnalysisInfo(); String dbName = options.get("db_name"); showDb(dbName); try { Class.forName("org.sqlite.JDBC"); con = DriverManager.getConnection("jdbc:sqlite:" + dbName); String sql = "SELECT * FROM sample ;"; stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { vi.addSample(rs.getString("name")); } stmt.close(); sql = "select * from consequence_type_count"; stmt = con.createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { vi.addConsequenceType(rs.getString("consequence_type_obo"), rs.getInt("count")); } stmt.close(); sql = "select * from biotype_count;"; stmt = con.createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { vi.addBiotype(rs.getString("feature_biotype"), rs.getInt("count")); } stmt.close(); sql = "select * from global_stats"; stmt = con.createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { vi.addGlobalStats(rs.getString("name").toLowerCase(), rs.getDouble("value")); } stmt.close(); sql = "select count(*) as count, chromosome from variant group by chromosome"; stmt = con.createStatement(); rs = stmt.executeQuery(sql); while (rs.next()) { vi.addChromosome(rs.getString("chromosome"), rs.getInt("count")); } stmt.close(); con.close(); } catch (ClassNotFoundException | SQLException e) { System.err.println("ANALYSIS INFO: " + e.getClass().getName() + ": " + e.getMessage()); } return vi; } @Override public boolean close() { return true; } private String processGeneList(String genes) { System.out.println("genes = " + genes); List<String> list = new ArrayList<>(); // Client client = ClientBuilder.newClient(); // WebTarget webTarget = client.target("http://ws.bioinfo.cipf.es/cellbase/rest/latest/hsa/feature/gene/"); Client client = Client.create(); WebResource webResource = client.resource("http://ws.bioinfo.cipf.es/cellbase/rest/latest/hsa/feature/gene/"); ObjectMapper mapper = new ObjectMapper(); // Response response = webTarget.path(genes).path("info").queryParam("of", "json").request().get(); String response = webResource.path(genes).path("info").queryParam("of", "json").get(String.class); String data = response.toString(); System.out.println("response = " + response); try { JsonNode actualObj = mapper.readTree(data); Iterator<JsonNode> it = actualObj.iterator(); Iterator<JsonNode> aux; StringBuilder sb; while (it.hasNext()) { JsonNode node = it.next(); if (node.isArray()) { aux = node.iterator(); while (aux.hasNext()) { JsonNode auxNode = aux.next(); sb = new StringBuilder("("); System.out.println("auxNode.get(\"chromosome\").asText() = " + auxNode.get("chromosome").asText()); sb.append("variant_stats.chromosome='").append(auxNode.get("chromosome").asText()).append("' AND "); sb.append("variant_stats.position>=").append(auxNode.get("start")).append(" AND "); sb.append("variant_stats.position<=").append(auxNode.get("end")).append(" )"); list.add(sb.toString()); } } } } catch (IOException e) { e.printStackTrace(); } String res = "(" + StringUtils.join(list, " OR ") + ")"; return res; } private boolean filterControls(VariantInfo vi, Map<String, String> controlsMAFs) { boolean res = true; String key; VariantControl vc; float controlMAF; for (Map.Entry<String, VariantControl> entry : vi.getControls().entrySet()) { key = entry.getKey(); vc = entry.getValue(); if (controlsMAFs.containsKey(key)) { controlMAF = Float.valueOf(controlsMAFs.get(key)); if (vc.getMaf() > controlMAF) { return false; } } } return res; } private String processConseqType(String conseqType) { List<String> clauses = new ArrayList<>(10); String[] cts = conseqType.split(","); for (String ct : cts) { clauses.add("(variant.consequence_types LIKE '%" + ct + "%' )"); } String res = ""; if (clauses.size() > 0) { res = "(" + StringUtils.join(clauses, " OR ") + ")"; } return res; } private boolean filterGenotypes(VariantInfo variantInfo, int numSamples) { // if (variantInfo.getSampleGenotypes().size() != numSamples) { // return false; // } else { // return true; // } return variantInfo.getSampleGenotypes().size() == numSamples; } private Map<String, List<String>> processSamplesGT(Map<String, String> options) { Map<String, List<String>> samplesGenotypes = new LinkedHashMap<>(10); List<String> genotypesList; String key, val; for (Map.Entry<String, String> entry : options.entrySet()) { key = entry.getKey(); val = entry.getValue(); if (key.startsWith("sampleGT_")) { String sampleName = key.replace("sampleGT_", "").replace("[]", ""); String[] genotypes = val.split(","); if (samplesGenotypes.containsKey(sampleName)) { genotypesList = samplesGenotypes.get(sampleName); } else { genotypesList = new ArrayList<>(); samplesGenotypes.put(sampleName, genotypesList); } for (int i = 0; i < genotypes.length; i++) { genotypesList.add(genotypes[i]); } } } return samplesGenotypes; } private void processSamplesGT(Map<String, String> options, List<String> whereClauses) { String key, val; List<String> auxClauses = new ArrayList<>(); for (Map.Entry<String, String> entry : options.entrySet()) { key = entry.getKey(); val = entry.getValue(); if (key.startsWith("sampleGT_")) { String sampleName = key.replace("sampleGT_", "").replace("[]", ""); String[] genotypes = val.split(","); StringBuilder sb = new StringBuilder("("); for (int i = 0; i < genotypes.length; i++) { String[] gt = genotypes[i].split("_"); sb.append("("); sb.append("sample_info.sample_name='" + sampleName + "'"); sb.append(" AND sample_info.allele_1=" + gt[0]); sb.append(" AND sample_info.allele_2=" + gt[1]); sb.append(")"); if (i < genotypes.length - 1) { sb.append(" OR "); } } sb.append(")"); auxClauses.add(sb.toString()); } } if (auxClauses.size() > 0) { String finalSampleWhere = StringUtils.join(auxClauses, " AND "); whereClauses.add(finalSampleWhere); } } /* ****************************************** * Path and index checking * * ******************************************/ private Path getMetaDir(Path file) { String inputName = file.getFileName().toString(); return file.getParent().resolve(".meta_" + inputName); } }