/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/ */ package org.phenotips.variantstore.db.solr; import org.phenotips.variantstore.db.AbstractDatabaseController; import org.phenotips.variantstore.db.DatabaseException; import org.phenotips.variantstore.db.solr.tasks.AddIndividualTask; import org.phenotips.variantstore.db.solr.tasks.RemoveIndividualTask; import org.phenotips.variantstore.input.VariantIterator; import org.phenotips.variantstore.shared.ResourceManager; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.function.Function; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; import org.apache.solr.client.solrj.response.Group; import org.apache.solr.client.solrj.response.GroupCommand; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.GroupParams; import org.apache.solr.core.CoreContainer; import org.ga4gh.GAVariant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; /** * Manages an embedded instance of solr. * * @version $Id: 24ad52f78fada1233c894bbc129b6258523152f4 $ */ public class SolrController extends AbstractDatabaseController { /** * the field name of the EXAC allele frequency value in the allele frequency query map. **/ public static final String EXAC_FREQUENCY_FIELD = "EXAC"; /** * the field name of the internal (db) allele frequency value in the allele frequency query map. */ public static final String DB_FREQUENCY_FIELD = "PhenomeCentral"; private Logger logger = LoggerFactory.getLogger(getClass()); // ensure that insertion and deletions are done synchronously, one task at a time private ExecutorService executor = Executors.newFixedThreadPool(1); private CoreContainer cores; private SolrClient server; /** * Create a SolrController, that will store it's files and configuration in a directory inside of rootPath. */ public SolrController() { super(); } @Override protected Path getStoragePathSuffix() { return Paths.get("solr/"); } @Override public void init(Path path) throws DatabaseException { super.init(path); ResourceManager.copyResourcesToPath(this.getStoragePathSuffix(), this.path); // Spin Solr up logger.debug(String.valueOf(this.path)); cores = new CoreContainer(this.path.toString()); cores.load(); server = new EmbeddedSolrServer(cores, "variants"); } @Override public void stop() { executor.shutdownNow(); cores.shutdown(); } @Override public Future addIndividual(final VariantIterator iterator) { FutureTask task = new FutureTask<>(new AddIndividualTask(server, iterator)); executor.submit(task); return task; } @Override public Future removeIndividual(String id) throws DatabaseException { Iterator<SolrDocument> iterator = getAllVariantsDocumentsForIndividual(id).iterator(); FutureTask task = new FutureTask<>(new RemoveIndividualTask(server, iterator, id)); executor.submit(task); return task; } @Override public List<GAVariant> getTopHarmfullVariants(String id, int n) { List<GAVariant> list = new ArrayList<>(); if (id == null || "".equals(id) || n == 0) { return list; } logger.debug(String.format("Searching for id:%s n:%s", id, n)); String queryString = String.format("%s:PASS", VariantsSchema.getCallsetsFieldName(id, VariantsSchema.FILTER)); logger.debug("Query string: " + queryString); SolrQuery q = new SolrQuery() .setQuery(queryString) .setRows(n) .setSort(VariantsSchema.getCallsetsFieldName(id, VariantsSchema.EXOMISER_VARIANT_SCORE), SolrQuery.ORDER.desc); QueryResponse resp; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("Error getting individuals ", e); return list; } // Filter the variants further, to pull out each individual's variant. List<Map<String, GAVariant>> mapList = SolrVariantUtils.documentListToMapList(resp.getResults()); for (Map<String, GAVariant> map : mapList) { if (map.containsKey(id)) { list.add(map.get(id)); } } return list; } /** * GA4GH Beacon implementation. Return the allele count for this specific variant in the database. * * @param chr chr * @param pos pos * @param allele allele * * @return the allele count for this specific variant in the db. */ @Override public int beacon(String chr, long pos, String allele) { checkNotNull(chr); checkArgument(!"".equals(chr)); checkArgument(pos > 0); checkNotNull(allele); String queryString = String.format("%s:%s AND %s:%s AND %s:%S", VariantsSchema.CHROM, chr, VariantsSchema.START, pos - 1, VariantsSchema.ALT, allele); SolrQuery q = new SolrQuery() .setQuery(queryString) .setRows(1); QueryResponse resp; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("Beacon Solr Exception", e); return 0; } SolrDocumentList results = resp.getResults(); if (results.size() > 0) { return (int) results.get(0).get(VariantsSchema.AC_TOT); } return 0; } @Override public long getTotNumVariants() { String queryString = "*:*"; SolrQuery q = new SolrQuery() .setQuery(queryString) .setRows(0); QueryResponse resp; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("TotNumVariants Solr Exception", e); return 0; } return resp.getResults().getNumFound(); } /** * Given an individual id, return all the genes stored for that individual. * * @param id the individual's id * * @return the set of genes. */ @Override public Set<String> getAllGenesForIndividual(String id) { checkArgument(!id.isEmpty()); logger.debug("getAllGenesForIndividual(" + id + ")"); final Set<String> set = new HashSet<>(); String queryString = String.format("%s:%s ", VariantsSchema.CALLSET_IDS, id); SolrQuery q = new SolrQuery().setQuery(queryString); // sort on unique try { SolrUtils.processAllDocs(server, q, VariantsSchema.ID, new Function<Collection<SolrDocument>, Boolean>() { @Override public Boolean apply(Collection<SolrDocument> solrDocuments) { for (SolrDocument doc : solrDocuments) { set.add((String) doc.get(VariantsSchema.GENE)); } return false; } }); } catch (SolrServerException | IOException e) { logger.error("AllGenesForIndividual Solr Exception", e); return set; } return set; } @Override public Double getGeneScore(String id, String gene) { logger.debug(String.format("getGeneScore(%s, %s)", id, gene)); String queryString = String.format("%s:%s AND %s:%s", VariantsSchema.CALLSET_IDS, id, VariantsSchema.GENE, gene); SolrQuery q = new SolrQuery() .setQuery(queryString) .setRows(1); QueryResponse resp; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("GeneScore Solr Exception", e); return 0D; } SolrDocumentList results = resp.getResults(); if (results.size() != 1) { return 0D; } Float result = (Float) results.get(0).get(VariantsSchema.getCallsetsFieldName( id, VariantsSchema.EXOMISER_GENE_COMBINED_SCORE)); if (result != null) { return result.doubleValue(); } else { return 0D; } } @Override public List<String> getTopGenesForIndividual(String id, Integer k) { logger.debug(String.format("getTopGenesForIndividual(%s, %d)", id, k)); final List<String> list = new LinkedList<>(); String queryString = String.format("%s:%s", VariantsSchema.CALLSET_IDS, id); SolrQuery q = new SolrQuery() .setQuery(queryString) .setRows(k) .setSort( VariantsSchema.getCallsetsFieldName(id, VariantsSchema.EXOMISER_GENE_COMBINED_SCORE), SolrQuery.ORDER.desc) .setParam(GroupParams.GROUP, true) .setParam(GroupParams.GROUP_FIELD, VariantsSchema.GENE); QueryResponse resp; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("Solr Exception", e); return list; } for (GroupCommand command : resp.getGroupResponse().getValues()) { for (Group group : command.getValues()) { list.add(group.getGroupValue()); } } return list; } @Override public List<GAVariant> getTopHarmfullVariantsForGene(String id, String gene, Integer k) { logger.debug(String.format("getTopHarmfullVariantsForGene(%s, %s, %d)", id, gene, k)); final List<GAVariant> list = new LinkedList<>(); String queryString = String.format("%s:%s AND %s:%s", VariantsSchema.CALLSET_IDS, id, VariantsSchema.GENE, gene); SolrQuery q = new SolrQuery() .setQuery(queryString) .setRows(k) .setSort( VariantsSchema.getCallsetsFieldName(id, VariantsSchema.EXOMISER_VARIANT_SCORE), SolrQuery.ORDER.desc); QueryResponse resp; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("Caught Solr Exception", e); return list; } for (SolrDocument doc : resp.getResults()) { list.add(SolrVariantUtils.docToVariant(doc, id)); } return list; } @Override public List<GAVariant> getTopHarmfulWithGene(String id, int n, String gene, List<String> variantEffects, Map<String, Double> alleleFrequencies) { List<GAVariant> list = new ArrayList<>(); checkNotNull(id); checkArgument(!"".equals(id)); checkArgument(n != 0); checkNotNull(gene); checkArgument(!"".equals(gene)); checkNotNull(variantEffects); checkArgument(variantEffects.size() != 0); checkNotNull(alleleFrequencies); checkArgument(alleleFrequencies.size() != 0); /** Build Query String **/ String effectQuery = ""; for (String effect : variantEffects) { effectQuery += String.format("%s:%s OR ", VariantsSchema.GENE_EFFECT, ClientUtils.escapeQueryChars(effect)); } // Strip final ' OR ' effectQuery = effectQuery.substring(0, effectQuery.length() - 4); // Find ExAC AF under the specified frequency, or where ExAC is null. String exacQuery = String.format("(-%s:[* TO *] AND *:*) OR %s:[0 TO %s] ", VariantsSchema.EXAC_AF, VariantsSchema.EXAC_AF, ClientUtils.escapeQueryChars(String.valueOf(alleleFrequencies.get(EXAC_FREQUENCY_FIELD))) ); String queryString = String.format("s:%s AND %s:%s AND (%s) AND (%s)", VariantsSchema.CALLSET_IDS, ClientUtils.escapeQueryChars(id), VariantsSchema.GENE, ClientUtils.escapeQueryChars(gene), effectQuery, exacQuery ); SolrQuery q = new SolrQuery() .setRows(n) .setQuery(queryString); QueryResponse resp = null; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("Error getting individuals with variants", e); return list; } Map<String, List<GAVariant>> map = SolrVariantUtils.variantListToCallsetMap( SolrVariantUtils.documentListToMapList(resp.getResults())); if (map.containsKey(id)) { list = map.get(id); } return list; } /** * beacon. * * @return the allele count */ public int beacon() { return 0; } @Override public Map<String, List<GAVariant>> getIndividualsWithGene(String gene, List<String> variantEffects, Map<String, Double> alleleFrequencies, int n, int totIndividuals) { Map<String, List<GAVariant>> map = new HashMap<>(); checkArgument(n != 0, "n cannot be zero"); checkNotNull(gene, "gene cannot be null"); checkArgument(!"".equals(gene), "gene cannot be empty"); checkNotNull(variantEffects, "effects cannot be null"); checkArgument(variantEffects.size() != 0, "effects cannot be empty"); checkNotNull(alleleFrequencies, "allele frequencies cannot be null"); checkArgument(alleleFrequencies.size() != 0, "allele frequencies cannot be empty"); /** Build Query String **/ // alleleFreq = copiesSum / 2*totIndividuals int copiesSum = (int) (alleleFrequencies.get(DB_FREQUENCY_FIELD) * totIndividuals * 2); StringBuilder builder = new StringBuilder(); for (String effect : variantEffects) { builder.append(VariantsSchema.GENE_EFFECT) .append(":") .append(ClientUtils.escapeQueryChars(effect)) .append(" OR "); } // Strip final ' OR ' String effectQuery = builder.toString(); effectQuery = effectQuery.substring(0, effectQuery.length() - 4); // Find ExAC AF under the specified frequency, or where ExAC is null. String exacQuery = String.format("(-%s:[* TO *] AND *:*) OR %s:[0 TO %s]", VariantsSchema.EXAC_AF, VariantsSchema.EXAC_AF, ClientUtils.escapeQueryChars(String.valueOf(alleleFrequencies.get(EXAC_FREQUENCY_FIELD))) ); String queryString = String.format("%s:[* TO %s] AND %s:%s AND (%s) AND (%s)", VariantsSchema.AC_TOT, copiesSum, VariantsSchema.GENE, ClientUtils.escapeQueryChars(gene), effectQuery, exacQuery ); SolrQuery q = new SolrQuery() .setQuery(queryString) .setFilterQueries(queryString); q.setRows(300); QueryResponse resp; try { resp = server.query(q); map = SolrVariantUtils.variantListToCallsetMap(SolrVariantUtils.documentListToMapList(resp.getResults())); } catch (SolrServerException | IOException e) { logger.error("Error getting individals with variant", e); } return map; } @Override public Map<String, List<GAVariant>> getIndividualsWithVariant(String chr, int pos, String ref, String alt) { //TODO: NEWINIESE Map<String, List<GAVariant>> map = new HashMap<>(); return map; } @Override public List<String> getAllIndividuals() { List<String> ids = new ArrayList<String>(); try { SolrDocument metaDoc = SolrVariantUtils.getMetaDocument(server); List<Object> values = new ArrayList<>(metaDoc.getFieldValues(VariantsSchema.CALLSET_IDS)); for (Object item : values) { ids.add((String) item); } } catch (SolrServerException | IOException e) { logger.error("Error getting all individals stored in the variant store", e); } return ids; } @Override public List<GAVariant> getAllVariantsForIndividual(String id) { List<GAVariant> variants = new ArrayList<GAVariant>(); SolrDocumentList list = getAllVariantsDocumentsForIndividual(id); if (list != null) { variants = SolrVariantUtils.documentListToGAVarintList(list, id); } return variants; } private SolrDocumentList getAllVariantsDocumentsForIndividual(String id) { checkArgument(!id.isEmpty()); String queryString = String.format("%s:%s ", VariantsSchema.CALLSET_IDS, id); SolrQuery q = new SolrQuery().setQuery(queryString); QueryResponse resp = null; try { resp = server.query(q); } catch (SolrServerException | IOException e) { logger.error("Error getting variants for individual with id " + id, e); return null; } return resp.getResults(); } }