/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.core.variant.adaptors; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.HashMultiset; import com.google.common.collect.Multiset; import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.StringUtils; import org.hamcrest.CoreMatchers; import org.hamcrest.Matcher; import org.hamcrest.core.IsAnything; import org.junit.*; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.VariantStudy; import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; import org.opencb.biodata.models.variant.avro.*; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.core.results.VariantQueryResult; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.metadata.StudyConfiguration; import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.annotators.CellBaseRestVariantAnnotator; import org.opencb.opencga.storage.core.variant.stats.DefaultVariantStatisticsManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Paths; import java.util.*; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; import static org.hamcrest.CoreMatchers.*; import static org.junit.Assert.*; import static org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.VariantQueryParams.*; import static org.opencb.opencga.storage.core.variant.adaptors.VariantMatchers.*; /** * Tests that all the VariantDBAdaptor filters and methods work correctly. * * Do not check that all the values are loaded correctly * Do not check that variant annotation is correct * * @author Jacobo Coll <jacobo167@gmail.com> */ @Ignore public abstract class VariantDBAdaptorTest extends VariantStorageBaseTest { private static final int QUERIES_LIM = 50; // private static final String GENOMES_PHASE_3 = "1000GENOMES_phase_3"; // private static final String ESP_6500 = "ESP_6500"; private static final String GENOMES_PHASE_3 = "1kG_phase3"; private static final String ESP_6500 = "ESP6500"; protected static int NUM_VARIANTS = 998; protected static Set<String> FORMAT; protected static boolean fileIndexed; protected static VariantSource source; protected static StudyConfiguration studyConfiguration; protected VariantDBAdaptor dbAdaptor; protected QueryOptions options; protected QueryResult<Variant> queryResult; protected QueryResult<Variant> allVariants; private static Logger logger = LoggerFactory.getLogger(VariantDBAdaptorTest.class); @BeforeClass public static void beforeClass() throws IOException { fileIndexed = false; } @Override @Before public void before() throws Exception { dbAdaptor = getVariantStorageEngine().getDBAdaptor(DB_NAME); if (!fileIndexed) { studyConfiguration = newStudyConfiguration(); // variantSource = new VariantSource(smallInputUri.getPath(), "testAlias", "testStudy", "Study for testing purposes"); clearDB(DB_NAME); ObjectMap params = new ObjectMap(VariantStorageEngine.Options.STUDY_TYPE.key(), VariantStudy.StudyType.FAMILY) .append(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), "DS,GL") .append(VariantAnnotationManager.VARIANT_ANNOTATOR_CLASSNAME, CellBaseRestVariantAnnotator.class.getName()) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json") .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true); params.putAll(getOtherParams()); FORMAT = new HashSet<>(); if (!params.getBoolean(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), VariantStorageEngine.Options.EXCLUDE_GENOTYPES.defaultValue())) { FORMAT.add("GT"); } FORMAT.addAll(params.getAsStringList(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key())); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, params); source = variantStorageManager.getVariantReaderUtils().readVariantSource(Paths.get(etlResult.getTransformResult().getPath()).toUri()); NUM_VARIANTS = getExpectedNumLoadedVariants(source); fileIndexed = true; Integer indexedFileId = studyConfiguration.getIndexedFiles().iterator().next(); //Calculate stats if (getOtherParams().getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key(), true)) { QueryOptions options = new QueryOptions(VariantStorageEngine.Options.STUDY_ID.key(), STUDY_ID) .append(VariantStorageEngine.Options.LOAD_BATCH_SIZE.key(), 100) .append(DefaultVariantStatisticsManager.OUTPUT, outputUri) .append(DefaultVariantStatisticsManager.OUTPUT_FILE_NAME, "cohort1.cohort2.stats"); Iterator<Integer> iterator = studyConfiguration.getSamplesInFiles().get(indexedFileId).iterator(); /** Create cohorts **/ HashSet<Integer> cohort1 = new HashSet<>(); cohort1.add(iterator.next()); cohort1.add(iterator.next()); HashSet<Integer> cohort2 = new HashSet<>(); cohort2.add(iterator.next()); cohort2.add(iterator.next()); Map<String, Integer> cohortIds = new HashMap<>(); cohortIds.put("cohort1", 10); cohortIds.put("cohort2", 11); studyConfiguration.getCohortIds().putAll(cohortIds); studyConfiguration.getCohorts().put(10, cohort1); studyConfiguration.getCohorts().put(11, cohort2); dbAdaptor.getStudyConfigurationManager().updateStudyConfiguration(studyConfiguration, QueryOptions.empty()); variantStorageManager.calculateStats(studyConfiguration.getStudyName(), new ArrayList<>(cohortIds.keySet()), DB_NAME, options); } if (params.getBoolean(VariantStorageEngine.Options.ANNOTATE.key())) { for (int i = 0; i < 30 ; i++) { allVariants = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.SORT, true)); Long annotated = dbAdaptor.count(new Query(ANNOTATION_EXISTS.key(), true)).first(); Long all = dbAdaptor.count(new Query()).first(); System.out.println("count annotated = " + annotated); System.out.println("count = " + all); System.out.println("get = " + allVariants.getNumResults()); List<Variant> nonAnnotatedVariants = allVariants.getResult() .stream() .filter(variant -> variant.getAnnotation() == null) .collect(Collectors.toList()); if (!nonAnnotatedVariants.isEmpty()) { System.out.println(nonAnnotatedVariants.size() + " variants not annotated:"); System.out.println("Variants not annotated: " + nonAnnotatedVariants); } if (Objects.equals(annotated, all)) { break; } } assertEquals(dbAdaptor.count(new Query(ANNOTATION_EXISTS.key(), true)).first(), dbAdaptor.count(new Query()).first()); } } allVariants = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.SORT, true)); options = new QueryOptions(); } @After public void after() throws IOException { dbAdaptor.close(); } protected ObjectMap getOtherParams() { return new ObjectMap(); } @Test public void testGetAllVariants() { long numResults = dbAdaptor.count(null).first(); assertEquals(NUM_VARIANTS, numResults); } @Test public void testGetAllVariants_limit_skip() { limitSkip(new Query(), new QueryOptions()); } @Test public void testGetAllVariants_limit_skip_sorted() { limitSkip(new Query(), new QueryOptions(QueryOptions.SORT, true)); } @Test public void testGetAllVariants_limit_skip_filters() { limitSkip(new Query(ANNOT_POLYPHEN.key(), "<0.5"), new QueryOptions()); } @Test public void testGetAllVariants_limit_skip_sorted_filters() { limitSkip(new Query(ANNOT_POLYPHEN.key(), "<0.5"), new QueryOptions(QueryOptions.SORT, true)); } public void limitSkip(Query query, QueryOptions options) { VariantQueryResult<Variant> expected = dbAdaptor.get(query, options); int numVariants = expected.getNumResults(); // expected.getResult().forEach(v -> logger.info("expected variant: == " + v)); for (int batchSize : new int[]{50, 100, 1000}) { List<Variant> variants = new ArrayList<>(); Set<String> variantStr = new HashSet<>(); for (int i = 0; i < numVariants / batchSize + 1; i++) { QueryResult<Variant> result = dbAdaptor.get(query, new QueryOptions(options) .append(QueryOptions.LIMIT, batchSize) .append(QueryOptions.SKIP, i * batchSize)); logger.info("Got " + result.getNumResults() + " results"); variants.addAll(result.getResult()); for (Variant variant : result.getResult()) { boolean repeated = !variantStr.add(variant.toString()); assertFalse("Repeated variant! : " + variant.toString(), repeated); } } assertEquals(numVariants, variants.size()); assertEquals(numVariants, variantStr.size()); assertEquals(expected.getResult().stream().map(Object::toString).collect(Collectors.toSet()), variantStr); } } @Test public void testGetVariantsByType() { Set<Variant> snv = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), VariantType.SNV), new QueryOptions()).getResult()); System.out.println("SNV = " + snv.size()); snv.forEach(variant -> assertThat(EnumSet.of(VariantType.SNV, VariantType.SNP), hasItem(variant.getType()))); Set<Variant> not_snv = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), "!" + VariantType.SNV), new QueryOptions()).getResult()); System.out.println("!SNV = " + not_snv.size()); not_snv.forEach(variant -> assertFalse(EnumSet.of(VariantType.SNV, VariantType.SNP).contains(variant.getType()))); Set<Variant> snv_snp = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), VariantType.SNV + "," + VariantContext.Type.SNP), new QueryOptions()).getResult()); System.out.println("SNV_SNP = " + snv_snp.size()); assertEquals(snv_snp, snv); Set<Variant> snp = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), VariantType.SNP), new QueryOptions()).getResult()); snp.forEach(variant -> assertEquals(VariantType.SNP, variant.getType())); snp.forEach(variant -> assertThat(snv, hasItem(variant))); System.out.println("SNP = " + snp.size()); Set<Variant> indels = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), VariantType.INDEL), new QueryOptions()).getResult()); indels.forEach(variant -> assertEquals(VariantType.INDEL, variant.getType())); System.out.println("INDEL = " + indels.size()); Set<Variant> indels_snp = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), VariantType.INDEL + "," + VariantType.SNP), new QueryOptions()).getResult()); indels_snp.forEach(variant -> assertThat(EnumSet.of(VariantType.INDEL, VariantType.SNP), hasItem(variant.getType()))); indels_snp.forEach(variant -> assertTrue(indels.contains(variant) || snp.contains(variant))); System.out.println("INDEL_SNP = " + indels_snp.size()); Set<Variant> indels_snv = new HashSet<>(dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.TYPE.key(), VariantType.INDEL + "," + VariantType.SNV), new QueryOptions()).getResult()); indels_snv.forEach(variant -> assertThat(EnumSet.of(VariantType.INDEL, VariantType.SNP, VariantType.SNV), hasItem(variant.getType()))); indels_snv.forEach(variant -> assertTrue(indels.contains(variant) || snv.contains(variant))); System.out.println("INDEL_SNV = " + indels_snv.size()); } @Test public void testGetAllVariants_populationFrequencyRef() { final PopulationFrequency defaultPopulation = new PopulationFrequency(null, null, null, null, 0F, 0F, 0F, 0F, 0F); Query query; query = new Query() .append(ANNOT_POPULATION_REFERENCE_FREQUENCY.key(), GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopRefFreq(GENOMES_PHASE_3, "AFR", lte(0.05001))))); } @Test public void testGetAllVariants_populationFrequency() { final PopulationFrequency defaultPopulation = new PopulationFrequency(null, null, null, null, 0F, 0F, 0F, 0F, 0F); Query query; query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), ESP_6500 + ":AA>0.05001"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopAltFreq(ESP_6500, "AA", gt(0.05001))))); // filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && map.get(ESP_6500 + ":AA").getAltAlleleFreq() > 0.05001), filter); query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001))))); // filterPopulation(map -> (!map.containsKey(GENOMES_PHASE_3 + ":AFR") || map.get(GENOMES_PHASE_3 + ":AFR").getAltAlleleFreq() <= 0.05001), filter); query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), ESP_6500 + ":AA>0.05001;" + GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(allOf( hasPopAltFreq(ESP_6500, "AA", gt(0.05001)), hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001)))))); // filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && map.get(ESP_6500 + ":AA").getAltAlleleFreq() > 0.05001 // && (!map.containsKey(GENOMES_PHASE_3 + ":AFR") || map.get(GENOMES_PHASE_3 + ":AFR").getAltAlleleFreq() <= 0.05001)), filter); query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), ESP_6500 + ":AA>0.05001," + GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(anyOf( hasPopAltFreq(ESP_6500, "AA", gt(0.05001)), hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001)))))); // filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && map.get(ESP_6500 + ":AA").getAltAlleleFreq() > 0.05001 // || (!map.containsKey(GENOMES_PHASE_3 + ":AFR") || map.get(GENOMES_PHASE_3 + ":AFR").getAltAlleleFreq() <= 0.05001)), filter); } @Test public void testGetAllVariants_population_maf() { final PopulationFrequency defaultPopulation = new PopulationFrequency(null, null, null, null, 0F, 0F, 0F, 0F, 0F); Query baseQuery = new Query(); Query query = new Query(baseQuery) .append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), GENOMES_PHASE_3 + ":AFR<=0.0501"); queryResult = dbAdaptor.get(query, options); filterPopulation(map -> (Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); query = new Query(baseQuery) .append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), ESP_6500 + ":AA>0.0501"); queryResult = dbAdaptor.get(query, options); filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && Math.min(map.get(ESP_6500 + ":AA").getRefAlleleFreq(), map.get(ESP_6500 + ":AA").getAltAlleleFreq()) > 0.0501)); query = new Query(baseQuery) .append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(),GENOMES_PHASE_3 + ":AFR<=0.0501"); queryResult = dbAdaptor.get(query, options); filterPopulation(map -> (Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); query = new Query(baseQuery) .append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(),GENOMES_PHASE_3 + ":ALL<=0.0501"); queryResult = dbAdaptor.get(query, options); filterPopulation(map -> (Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":ALL", defaultPopulation).getRefAlleleFreq(), map.getOrDefault(GENOMES_PHASE_3 + ":ALL", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); query = new Query(baseQuery) .append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), ESP_6500 + ":AA>0.0501;" + GENOMES_PHASE_3 + ":AFR<=0.0501"); queryResult = dbAdaptor.get(query, options); filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && Math.min(map.get(ESP_6500 + ":AA").getRefAlleleFreq(), map.get(ESP_6500 + ":AA").getAltAlleleFreq()) > 0.0501 && Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); query = new Query(baseQuery) .append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), ESP_6500 + ":AA>0.0501," + GENOMES_PHASE_3 + ":AFR<=0.0501"); queryResult = dbAdaptor.get(query, options); filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && Math.min(map.get(ESP_6500 + ":AA").getRefAlleleFreq(), map.get(ESP_6500 + ":AA").getAltAlleleFreq()) > 0.0501 || Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); } public long filterPopulation(Predicate<Map<String, PopulationFrequency>> predicate) { return filterPopulation(queryResult, v -> true, predicate); } public long filterPopulation(QueryResult<Variant> queryResult, Predicate<Variant> filterVariants, Predicate<Map<String, PopulationFrequency>> predicate) { queryResult.getResult().forEach(variant -> { assertNotNull(variant); assertNotNull("In " + variant, variant.getAnnotation()); // assertNotNull("In " + variant, variant.getAnnotation().getPopulationFrequencies()); }); Set<String> expectedVariants = allVariants.getResult() .stream() .filter(filterVariants.and(variant -> variant.getAnnotation() != null)) .filter(variant -> { Map<String, PopulationFrequency> map; if (variant.getAnnotation().getPopulationFrequencies() == null) { map = Collections.emptyMap(); } else { map = new HashMap<>(); for (PopulationFrequency p : variant.getAnnotation().getPopulationFrequencies()) { map.put(p.getStudy() + ":" + p.getPopulation(), p); } } return predicate.test(map); }) .map(Variant::toString) .collect(Collectors.toSet()); assertTrue("Expect to get at least one result", expectedVariants.size() > 0); for (String variant : expectedVariants) { Set<String> result = queryResult.getResult().stream().map(Variant::toString).collect(Collectors.toSet()); if (!result.contains(variant)) { System.out.println("variant missing = " + variant); } } for (Variant variant : queryResult.getResult()) { if (!expectedVariants.contains(variant.toString())) { System.out.println("variant not suppose to be = " + variant); } } assertEquals(expectedVariants.size(), queryResult.getNumResults()); long count = queryResult.getResult().stream() .map(variant -> { Map<String, PopulationFrequency> map; if (variant.getAnnotation().getPopulationFrequencies() == null) { map = Collections.emptyMap(); } else { map = new HashMap<>(); for (PopulationFrequency p : variant.getAnnotation().getPopulationFrequencies()) { map.put(p.getStudy() + ":" + p.getPopulation(), p); } } return map; }) .filter(predicate.negate()) .count(); assertEquals(0, count); return count; } @Test public void testGetAllVariants_id() { testGetAllVariants_rs(ID.key()); } @Test public void testGetAllVariants_variantId() { int i = 0; List<Variant> variants = new ArrayList<>(); for (Variant variant : allVariants.getResult()) { if (i++ % 10 == 0) { variants.add(variant); } } List<Variant> result = dbAdaptor.get(new Query(ID.key(), variants), new QueryOptions()).getResult(); assertTrue(variants.size() > 0); List<String> expectedList = variants.stream().map(Object::toString).sorted().collect(Collectors.toList()); List<String> actualList = result.stream().map(Object::toString).sorted().collect(Collectors.toList()); for (String expected : expectedList) { if (!actualList.contains(expected)) { System.out.println("missing expected = " + expected); } } for (String actual : actualList) { if (!expectedList.contains(actual)) { System.out.println("extra actual = " + actual); } } assertEquals(expectedList, actualList); } @Test public void testGetAllVariants_xref() { Query query = new Query(ANNOT_XREF.key(), "3:108634973:C:A,rs2032582,HP:0001250,VAR_048225,Q9BY64,ENSG00000250026,TMPRSS11B,COSM1421316"); queryResult = dbAdaptor.get(query, null); assertThat(queryResult, everyResult(allVariants, anyOf( hasAnnotation(at("3:108634973:C:A")), with("id", Variant::getId, is("rs2032582")), hasAnnotation(with("GeneTraitAssociation", VariantAnnotation::getGeneTraitAssociation, hasItem(with("HPO", GeneTraitAssociation::getHpo, is("HP:0001250"))))), hasAnnotation(with("ConsequenceType", VariantAnnotation::getConsequenceTypes, hasItem(with("ProteinVariantAnnotation", ConsequenceType::getProteinVariantAnnotation, with("UniprotVariantId", ProteinVariantAnnotation::getUniprotVariantId, is("VAR_048225")))))), hasAnnotation(with("ConsequenceType", VariantAnnotation::getConsequenceTypes, hasItem(with("ProteinVariantAnnotation", ConsequenceType::getProteinVariantAnnotation, with("UniprotName", ProteinVariantAnnotation::getUniprotAccession, is("Q9BY64")))))), hasAnnotation(with("ConsequenceType", VariantAnnotation::getConsequenceTypes, hasItem(with("EnsemblGene", ConsequenceType::getEnsemblGeneId, is("ENSG00000250026"))))), hasAnnotation(with("ConsequenceType", VariantAnnotation::getConsequenceTypes, hasItem(with("GeneName", ConsequenceType::getGeneName, is("TMPRSS11B"))))), hasAnnotation(with("VariantTraitAssociation", VariantAnnotation::getVariantTraitAssociation, with("Cosmic", VariantTraitAssociation::getCosmic, hasItem(with("MutationId", Cosmic::getMutationId, is("COSM1421316")))))) ))); } @Test public void testGetAllVariants_xref_rs() { testGetAllVariants_rs(ANNOT_XREF.key()); } public void testGetAllVariants_rs(String key) { // This test queries a single ID with no more options Query query = new Query(key, "rs1137005"); queryResult = dbAdaptor.get(query, null); Variant variant = queryResult.first(); assertEquals(1, queryResult.getNumResults()); assertEquals(variant.getStart(), Integer.valueOf(1650807)); assertThat(variant.getIds(), hasItem("rs1137005")); query = new Query(key, "rs1137005,rs150535390"); queryResult = dbAdaptor.get(query, options); assertEquals(2, queryResult.getNumResults()); queryResult.getResult().forEach(v -> assertThat(v.getIds(), anyOf(hasItem("rs1137005"), hasItem("rs150535390")))); } @Test public void testGetAllVariants_ct() { Query query; query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566"); queryResult = dbAdaptor.get(query, null); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(hasItem("SO:0001566"))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(911, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "1566"); queryResult = dbAdaptor.get(query, null); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(hasItem("SO:0001566"))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(911, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566,SO:0001583"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(947, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), ConsequenceTypeMappings.accessionToTerm.get(1566) + ",SO:0001583"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "1566,SO:0001583"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(947, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566;SO:0001583"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(allOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(396, queryResult.getNumResults()); } @Test public void testGetAllVariants_ct_gene() { queryGeneCT("BIRC6", "SO:0001566"); // Should return 0 results queryGeneCT("BIRC6", "SO:0001583"); queryGeneCT("DNAJC6", "SO:0001819"); queryGeneCT("SH2D5", "SO:0001632"); queryGeneCT("ERMAP,SH2D5", "SO:0001632"); queryGeneCT("ERMAP,SH2D5", "SO:0001632", new Query() .append(ANNOT_XREF.key(), "ERMAP,SH2D5,4:42895308:G:A") .append(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001632"), at("4:42895308:G:A")); queryGeneCT("ERMAP,SH2D5", "SO:0001632", new Query() .append(GENE.key(), "ERMAP") .append(ANNOT_XREF.key(), "SH2D5,rs12345") .append(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001632"), with("id", VariantAnnotation::getId, is("rs1171830"))); queryGeneCT("ERMAP,SH2D5", "SO:0001632", new Query() .append(ANNOT_XREF.key(), "ERMAP,rs1171830,SH2D5,RCV000036856,4:42895308:G:A,COSM3760638") .append(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001632"), anyOf( with("id", VariantAnnotation::getId, is("rs1171830")), at("4:42895308:G:A"))); assertThat(dbAdaptor.get(new Query(ANNOT_XREF.key(), "rs1171830").append(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566"), null), everyResult(allVariants, allOf( with("id", Variant::getId, is("rs1171830")), hasAnnotation(hasSO(hasItem(is("SO:0001566"))))))); } private void queryGeneCT(String gene, String so) { queryGeneCT(gene, so, new Query().append(ANNOT_CONSEQUENCE_TYPE.key(), so).append(GENE.key(), gene), not(new IsAnything<>())); } private void queryGeneCT(String gene, String so, Query query, Matcher<VariantAnnotation> regionMatcher) { logger.info(query.toJson()); queryResult = dbAdaptor.get(query, null); logger.info(" -> numResults " + queryResult.getNumResults()); Matcher<String> geneMatcher; List<String> genes = Arrays.asList(gene.split(",")); if (gene.contains(",")) { geneMatcher = anyOf(genes.stream().map(CoreMatchers::is).collect(Collectors.toList())); } else { geneMatcher = is(gene); } assertThat(queryResult, everyResult(allVariants, hasAnnotation( anyOf( allOf( hasAnyGeneOf(genes), withAny("consequence type", VariantAnnotation::getConsequenceTypes, allOf( with("gene", ConsequenceType::getGeneName, geneMatcher), withAny("SO", ConsequenceType::getSequenceOntologyTerms, with("accession", SequenceOntologyTerm::getAccession, is(so)))))) , allOf( regionMatcher, // not(hasAnyGeneOf(genes)), hasSO(hasItem(so)) ))))); } @Test public void testGetAllVariants_transcriptionAnnotationFlags() { //ANNOT_TRANSCRIPTION_FLAGS Query query; Multiset<String> flags = HashMultiset.create(); Set<String> flagsInVariant = new HashSet<>(); for (Variant variant : allVariants.getResult()) { if (variant.getAnnotation().getConsequenceTypes() != null) { for (ConsequenceType consequenceType : variant.getAnnotation().getConsequenceTypes()) { if (consequenceType.getTranscriptAnnotationFlags() != null) { flagsInVariant.addAll(consequenceType.getTranscriptAnnotationFlags()); } } } flags.addAll(flagsInVariant); flagsInVariant.clear(); } System.out.println(flags); assertThat(flags, hasItem("basic")); assertThat(flags, hasItem("CCDS")); assertThat(flags, hasItem("mRNA_start_NF")); assertThat(flags, hasItem("mRNA_end_NF")); assertThat(flags, hasItem("cds_start_NF")); assertThat(flags, hasItem("cds_end_NF")); for (String flag : flags.elementSet()) { System.out.println(flag + ", " + flags.count(flag)); query = new Query(ANNOT_TRANSCRIPTION_FLAGS.key(), flag); queryResult = dbAdaptor.get(query, null); assertEquals(flags.count(flag), queryResult.getNumResults()); } } @Test public void testGetAllVariants_geneTraits() { //ANNOT_GENE_TRAITS_ID //ANNOT_GENE_TRAITS_NAME Query query; Map<String, Integer> idsMap = new HashMap<>(); Map<String, Integer> namesMap = new HashMap<>(); Map<String, Integer> hposMap = new HashMap<>(); for (Variant variant : allVariants.getResult()) { Set<String> ids = new HashSet<>(); Set<String> names = new HashSet<>(); Set<String> hpos = new HashSet<>(); if (variant.getAnnotation().getGeneTraitAssociation() != null) { for (GeneTraitAssociation geneTrait : variant.getAnnotation().getGeneTraitAssociation()) { ids.add(geneTrait.getId()); names.add(geneTrait.getName()); if (StringUtils.isNotEmpty(geneTrait.getHpo())) { hpos.add(geneTrait.getHpo()); } } } for (String id : ids) { idsMap.put(id, idsMap.getOrDefault(id, 0) + 1); } for (String name : names) { namesMap.put(name, namesMap.getOrDefault(name, 0) + 1); } for (String hpo : hpos) { hposMap.put(hpo, hposMap.getOrDefault(hpo, 0) + 1); } } System.out.println(idsMap.size()); System.out.println(namesMap.size()); System.out.println(hposMap.size()); // for (Map.Entry<String, Integer> entry : namesMap.entrySet()) { // query = new Query(VariantDBAdaptor.VariantQueryParams.ANNOT_GENE_TRAITS_NAME.key(), "~="+entry.getKey()); // queryResult = dbAdaptor.get(query, null); // assertEquals(entry.getKey(), entry.getValue().intValue(), queryResult.getNumResults()); // } int i = 0; for (Map.Entry<String, Integer> entry : idsMap.entrySet()) { query = new Query(ANNOT_GENE_TRAITS_ID.key(), entry.getKey()); queryResult = dbAdaptor.get(query, null); assertEquals(entry.getValue().intValue(), queryResult.getNumResults()); if (i++ == QUERIES_LIM) { break; } } i = 0; for (Map.Entry<String, Integer> entry : hposMap.entrySet()) { query = new Query(ANNOT_HPO.key(), entry.getKey()); queryResult = dbAdaptor.get(query, null); assertEquals(entry.getKey(), entry.getValue().intValue(), queryResult.getNumResults()); if (i++ == QUERIES_LIM) { break; } } } @Test public void testGoQuery() { // MMP26 -> GO:0004222,GO:0005578,GO:0006508 // CEBPA -> GO:0000050 int totalResults = 0; Collection<String> genes; Query query; QueryResult<Variant> result; query = new Query(ANNOT_GO.key(), "GO:XXXXXXX"); result = dbAdaptor.get(query, null); assertEquals(0, result.getNumResults()); query = new Query(ANNOT_GO.key(), "GO:0006508"); result = dbAdaptor.get(query, null); System.out.println("numResults: " + result.getNumResults()); for (Variant variant : result.getResult()) { System.out.println(variant); } assertNotEquals(0, result.getNumResults()); genes = dbAdaptor.getDBAdaptorUtils().getGenesByGo(query.getAsStringList(ANNOT_GO.key())); assertThat(result, everyResult(hasAnnotation(hasAnyGeneOf(genes)))); totalResults = result.getNumResults(); query = new Query(ANNOT_GO.key(), "GO:0000050"); result = dbAdaptor.get(query, null); System.out.println("numResults: " + result.getNumResults()); for (Variant variant : result.getResult()) { System.out.println(variant); } genes = dbAdaptor.getDBAdaptorUtils().getGenesByGo(query.getAsStringList(ANNOT_GO.key())); assertThat(result, everyResult(hasAnnotation(hasAnyGeneOf(genes)))); assertNotEquals(0, result.getNumResults()); totalResults += result.getNumResults(); query = new Query(ANNOT_GO.key(), "GO:0006508,GO:0000050"); result = dbAdaptor.get(query, null); System.out.println("numResults: " + result.getNumResults()); for (Variant variant : result.getResult()) { System.out.println(variant); } genes = dbAdaptor.getDBAdaptorUtils().getGenesByGo(query.getAsStringList(ANNOT_GO.key())); assertThat(result, everyResult(hasAnnotation(hasAnyGeneOf(genes)))); assertNotEquals(0, result.getNumResults()); assertEquals(result.getNumResults(), totalResults); } @Test public void testExpressionQuery() { Collection<String> genes; Query query = new Query(ANNOT_EXPRESSION.key(), "non_existing_tissue"); QueryResult<Variant> result = dbAdaptor.get(query, null); assertEquals(0, result.getNumResults()); for (String tissue : Arrays.asList("skin", "brain")) { query = new Query(ANNOT_EXPRESSION.key(), tissue); result = dbAdaptor.get(query, null); System.out.println("result.getNumResults() = " + result.getNumResults()); assertNotEquals(0, result.getNumResults()); assertNotEquals(allVariants.getNumResults(), result.getNumResults()); genes = dbAdaptor.getDBAdaptorUtils().getGenesByExpression(query.getAsStringList(ANNOT_EXPRESSION.key())); assertThat(result, everyResult(hasAnnotation(hasAnyGeneOf(genes)))); } } @Test public void testGetAllVariants_proteinKeywords() { //ANNOT_PROTEIN_KEYWORDS Query query; Map<String, Integer> keywords = new HashMap<>(); int combinedKeywordsOr = 0; int combinedKeywordsAnd = 0; int combinedKeywordsAndNot = 0; for (Variant variant : allVariants.getResult()) { Set<String> keywordsInVariant = new HashSet<>(); if (variant.getAnnotation().getConsequenceTypes() != null) { for (ConsequenceType consequenceType : variant.getAnnotation().getConsequenceTypes()) { if (consequenceType.getProteinVariantAnnotation() != null && consequenceType.getProteinVariantAnnotation().getKeywords() != null) { keywordsInVariant.addAll(consequenceType.getProteinVariantAnnotation().getKeywords()); } } } for (String flag : keywordsInVariant) { keywords.put(flag, keywords.getOrDefault(flag, 0) + 1); } if (keywordsInVariant.contains("Complete proteome") || keywordsInVariant.contains("Transmembrane helix")) { combinedKeywordsOr++; } if (keywordsInVariant.contains("Complete proteome") && keywordsInVariant.contains("Transmembrane helix")) { combinedKeywordsAnd++; } if (keywordsInVariant.contains("Complete proteome") && !keywordsInVariant.contains("Transmembrane helix")) { combinedKeywordsAndNot++; } } assertTrue(combinedKeywordsOr > 0); assertTrue(combinedKeywordsAnd > 0); assertTrue(combinedKeywordsAndNot > 0); query = new Query(ANNOT_PROTEIN_KEYWORDS.key(), "Complete proteome,Transmembrane helix"); assertEquals(combinedKeywordsOr, dbAdaptor.count(query).first().intValue()); query = new Query(ANNOT_PROTEIN_KEYWORDS.key(), "Complete proteome;Transmembrane helix"); assertEquals(combinedKeywordsAnd, dbAdaptor.count(query).first().intValue()); query = new Query(ANNOT_PROTEIN_KEYWORDS.key(), "Complete proteome;!Transmembrane helix"); assertEquals(combinedKeywordsAndNot, dbAdaptor.count(query).first().intValue()); int i = 0; for (Map.Entry<String, Integer> entry : keywords.entrySet()) { System.out.println(entry); query = new Query(ANNOT_PROTEIN_KEYWORDS.key(), entry.getKey()); queryResult = dbAdaptor.get(query, null); assertEquals(entry.getValue().intValue(), queryResult.getNumResults()); if (++i > QUERIES_LIM) { break; } } } @Test public void testGetAllVariants_drugs() { //ANNOT_DRUG Query query; Map<String, Integer> drugs = new HashMap<>(); for (Variant variant : allVariants.getResult()) { Set<String> drugsInVariant = new HashSet<>(); for (GeneDrugInteraction drugInteraction : variant.getAnnotation().getGeneDrugInteraction()) { drugsInVariant.add(drugInteraction.getDrugName()); } for (String flag : drugsInVariant) { drugs.put(flag, drugs.getOrDefault(flag, 0) + 1); } } int i = 0; for (Map.Entry<String, Integer> entry : drugs.entrySet()) { if (entry.getKey().contains(",")) { continue; } query = new Query(ANNOT_DRUG.key(), entry.getKey()); queryResult = dbAdaptor.get(query, null); assertEquals(entry.getKey(), entry.getValue().intValue(), queryResult.getNumResults()); if (++i > QUERIES_LIM) { break; } } } @Test public void testGetAllVariants_polyphenSift() { //POLYPHEN //SIFT Map<String, Matcher<Double>> queries = new HashMap<>(); queries.put("<0.101", lt(0.101)); queries.put("<0.201", lt(0.201)); queries.put("<0.501", lt(0.501)); queries.put("<0.901", lt(0.901)); queries.put(">0.101", gt(0.101)); queries.put(">0.201", gt(0.201)); queries.put(">0.501", gt(0.501)); queries.put(">0.901", gt(0.901)); for (Map.Entry<String, Matcher<Double>> entry : queries.entrySet()) { String q = entry.getKey(); Matcher<Double> m = entry.getValue(); System.out.println("q = " + q + " -> " + m); queryResult = dbAdaptor.get(new Query(ANNOT_SIFT.key(), q), null); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnySift(m)))); queryResult = dbAdaptor.get(new Query(ANNOT_POLYPHEN.key(), q), null); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnyPolyphen(m)))); } Query query = new Query(ANNOT_POLYPHEN.key(), "sift>0.5"); thrown.expect(VariantQueryException.class); dbAdaptor.get(query, null); // for (Map.Entry<Double, Integer> entry : polyphen.entrySet()) { // query = new Query(VariantDBAdaptor.VariantQueryParams.SIFT.key(), entry.getKey()); // queryResult = dbAdaptor.get(query, null); // assertEquals(entry.getKey(), entry.getValue(), queryResult.getNumResults()); // } } @Test public void testGetAlVariants_polyphenSiftDescription() { for (String p : Arrays.asList("benign", "possibly damaging", "probably damaging", "unknown")) { queryResult = dbAdaptor.get(new Query(ANNOT_POLYPHEN.key(), p), null); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnyPolyphenDesc(equalTo(p))))); } for (String s : Arrays.asList("deleterious", "tolerated")) { queryResult = dbAdaptor.get(new Query(ANNOT_SIFT.key(), s), null); assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnySiftDesc(equalTo(s))))); } } @Test public void testGetAllVariants_functionalScore() { //ANNOT_FUNCTIONAL_SCORE assertTrue(countFunctionalScore("cadd_scaled", allVariants, s -> s > 5.0) > 0); System.out.println("countFunctionalScore(\"cadd_scaled\", allVariants, s -> s > 5.0) = " + countFunctionalScore("cadd_scaled", allVariants, s -> s > 5.0)); checkFunctionalScore(new Query(ANNOT_FUNCTIONAL_SCORE.key(), "cadd_scaled>5"), s -> s > 5.0, "cadd_scaled"); checkFunctionalScore(new Query(ANNOT_FUNCTIONAL_SCORE.key(), "cadd_raw<0.5"), s1 -> s1 < 0.5, "cadd_raw"); checkFunctionalScore(new Query(ANNOT_FUNCTIONAL_SCORE.key(), "cadd_scaled<=0.5"), s -> s <= 0.5, "cadd_scaled"); } @Test public void testGetAllVariants_functionalScore_wrongSource() { String value = "cad<=0.5"; VariantQueryException expected = VariantQueryException.malformedParam(ANNOT_FUNCTIONAL_SCORE, value); thrown.expect(expected.getClass()); thrown.expectMessage(expected.getMessage()); dbAdaptor.get(new Query(ANNOT_FUNCTIONAL_SCORE.key(), value), null); } @Test public void testGetAllVariants_functionalScore_wrongValue() { String value = "cadd_scaled<=A"; VariantQueryException expected = VariantQueryException.malformedParam(ANNOT_FUNCTIONAL_SCORE, value); thrown.expect(expected.getClass()); thrown.expectMessage(expected.getMessage()); dbAdaptor.get(new Query(ANNOT_FUNCTIONAL_SCORE.key(), value), null); } @Test public void testGetAllVariants_conservationScore() { //ANNOT_CONSERVATION long phastCons = countConservationScore("phastCons", allVariants, s -> s > 0.5); assertTrue(phastCons > 0); checkConservationScore(new Query(ANNOT_CONSERVATION.key(), "phylop>0.5"), s -> s > 0.5, "phylop"); checkConservationScore(new Query(ANNOT_CONSERVATION.key(), "phastCons<0.5"), s1 -> s1 < 0.5, "phastCons"); checkConservationScore(new Query(ANNOT_CONSERVATION.key(), "gerp<=0.5"), s -> s <= 0.5, "gerp"); checkScore(new Query(ANNOT_CONSERVATION.key(), "gerp<=0.5,phastCons<0.5"), ((Predicate<List<Score>>) scores -> scores.stream().anyMatch(s -> s.getSource().equalsIgnoreCase("gerp") && s.getScore() <= 0.5)) .or(scores -> scores.stream().anyMatch(s -> s.getSource().equalsIgnoreCase("phastCons") && s.getScore() < 0.5)), VariantAnnotation::getConservation); checkScore(new Query(ANNOT_CONSERVATION.key(), "gerp<=0.5;phastCons<0.5"), ((Predicate<List<Score>>) scores -> scores.stream().anyMatch(s -> s.getSource().equalsIgnoreCase("gerp") && s.getScore() <= 0.5)) .and(scores -> scores.stream().anyMatch(s -> s.getSource().equalsIgnoreCase("phastCons") && s.getScore() < 0.5)), VariantAnnotation::getConservation); } @Test public void testGetAllVariants_conservationScoreWrongSource() { VariantQueryException e = VariantQueryException.malformedParam(ANNOT_CONSERVATION, "phast<0.5"); thrown.expect(e.getClass()); thrown.expectMessage(e.getMessage()); dbAdaptor.get(new Query(ANNOT_CONSERVATION.key(), "phast<0.5"), null); } @Test public void testGetAllVariants_conservationScoreWrongValue() { VariantQueryException e = VariantQueryException.malformedParam(ANNOT_CONSERVATION, "phastCons<a"); thrown.expect(e.getClass()); thrown.expectMessage(e.getMessage()); dbAdaptor.get(new Query(ANNOT_CONSERVATION.key(), "phastCons<a"), null); } public void checkConservationScore(Query query, Predicate<Double> doublePredicate, String source) { checkScore(query, doublePredicate, source, VariantAnnotation::getConservation); } public void checkFunctionalScore(Query query, Predicate<Double> doublePredicate, String source) { checkScore(query, doublePredicate, source, VariantAnnotation::getFunctionalScore); } public void checkScore(Query query, Predicate<Double> doublePredicate, String source, Function<VariantAnnotation, List<Score>> mapper) { checkScore(query, scores -> scores.stream().anyMatch(score -> score.getSource().equalsIgnoreCase(source) && doublePredicate.test(score.getScore())), mapper); } public void checkScore(Query query, Predicate<List<Score>> scorePredicate, Function<VariantAnnotation, List<Score>> mapper) { QueryResult<Variant> result = dbAdaptor.get(query, null); long expected = countScore(allVariants, scorePredicate, mapper); long actual = countScore(result, scorePredicate, mapper); assertTrue("Expecting a query returning some value.", expected > 0); assertEquals(expected, result.getNumResults()); assertEquals(expected, actual); } private long countConservationScore(String source, QueryResult<Variant> variantQueryResult, Predicate<Double> doublePredicate) { return countScore(source, variantQueryResult, doublePredicate, VariantAnnotation::getConservation); } private long countFunctionalScore(String source, QueryResult<Variant> variantQueryResult, Predicate<Double> doublePredicate) { return countScore(source, variantQueryResult, doublePredicate, VariantAnnotation::getFunctionalScore); } private long countScore(String source, QueryResult<Variant> variantQueryResult, Predicate<Double> doublePredicate, Function<VariantAnnotation, List<Score>> mapper) { return countScore(variantQueryResult, scores -> scores.stream().anyMatch(score -> score.getSource().equalsIgnoreCase(source) && doublePredicate.test(score.getScore())), mapper); } private long countScore(QueryResult<Variant> variantQueryResult, Predicate<List<Score>> predicate, Function<VariantAnnotation, List<Score>> mapper) { long c = 0; for (Variant variant : variantQueryResult.getResult()) { List<Score> list = mapper.apply(variant.getAnnotation()); if (list != null) { if (predicate.test(list)) { c++; } } } return c; } @Test public void testGetSortedVariantsDefault() { QueryOptions options = new QueryOptions(QueryOptions.SORT, true); VariantDBIterator iterator = dbAdaptor.iterator(null, options); Variant next, prev; prev = iterator.next(); while (iterator.hasNext()) { next = iterator.next(); if (next.getChromosome().equals(prev.getChromosome())) { assertTrue(prev + " <= " + next, prev.getStart() <= next.getStart()); } prev = next; } } @Test public void testGetSortedVariantsAscending() { QueryOptions options = new QueryOptions(QueryOptions.SORT, true).append(QueryOptions.ORDER, QueryOptions.ASCENDING); VariantDBIterator iterator = dbAdaptor.iterator(null, options); Variant next, prev; prev = iterator.next(); while (iterator.hasNext()) { next = iterator.next(); if (next.getChromosome().equals(prev.getChromosome())) { assertTrue(prev + " <= " + next, prev.getStart() <= next.getStart()); } prev = next; } } @Test public void testGetSortedVariantsReverse() { QueryOptions options = new QueryOptions(QueryOptions.SORT, true).append(QueryOptions.ORDER, QueryOptions.DESCENDING); VariantDBIterator iterator = dbAdaptor.iterator(null, options); Variant next, prev; prev = iterator.next(); while (iterator.hasNext()) { next = iterator.next(); if (next.getChromosome().equals(prev.getChromosome())) { assertTrue(prev + " >= " + next, prev.getStart() >= next.getStart()); } prev = next; } } @Test public void testGetAllVariants_region() { Query query = new Query(REGION.key(), "1:13910417-13910417,1:165389129-165389129"); queryResult = dbAdaptor.get(query, options); assertEquals(2, queryResult.getNumResults()); query = new Query(REGION.key(), Arrays.asList("1:13910417-13910417", "1:165389129-165389129")); queryResult = dbAdaptor.get(query, options); assertEquals(2, queryResult.getNumResults()); query = new Query(REGION.key(), Arrays.asList(Region.parseRegion("1:13910417-13910417"), Region.parseRegion("1:165389129-165389129"))); queryResult = dbAdaptor.get(query, options); assertEquals(2, queryResult.getNumResults()); options.put(QueryOptions.SORT, true); query = new Query(REGION.key(), "1:14000000-160000000"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, overlaps(new Region("1:14000000-160000000")))); int lastStart = 0; for (Variant variant : queryResult.getResult()) { assertEquals("1", variant.getChromosome()); assertTrue(lastStart <= variant.getStart()); lastStart = variant.getStart(); } // Basic queries checkRegion(new Region("1:1000000-2000000")); checkRegion(new Region("1:10000000-20000000")); checkRegion(new Region("1:14000000-160000000")); checkRegion(new Region("1"), new Region("1")); checkRegion(new Region("ch1"), new Region("1")); checkRegion(new Region("chr1"), new Region("1")); checkRegion(new Region("chrm1"), new Region("1")); checkRegion(new Region("chrom1"), new Region("1")); checkRegion(new Region("2")); checkRegion(new Region("X")); checkRegion(new Region("30")); checkRegion(new Region("3:1-200000000")); checkRegion(new Region("X:1-200000000")); // Exactly in the limits checkRegion(new Region("20:238441-7980390")); // Just inside the limits checkRegion(new Region("20:238440-7980391")); // Just outside the limits checkRegion(new Region("20:238441-7980389")); checkRegion(new Region("20:238442-7980390")); checkRegion(new Region("20:238442-7980389")); query = new Query(CHROMOSOME.key(), "chr2"); queryResult = dbAdaptor.get(query, options); assertThat(queryResult, everyResult(allVariants, overlaps(new Region("2")))); } public void checkRegion(Region region) { checkRegion(region, region); } public void checkRegion(Region queryRegion, Region overlappingRegion) { queryResult = dbAdaptor.get(new Query(REGION.key(), queryRegion), null); assertThat(queryResult, everyResult(allVariants, overlaps(overlappingRegion))); } @Test public void testGetAllVariants_genes() { Query query = new Query(GENE.key(), "FLG-AS1"); QueryResult<Variant> result = dbAdaptor.get(query, new QueryOptions()); assertThat(result, everyResult(allVariants, hasAnnotation(hasGenes(Collections.singletonList("FLG-AS1"))))); for (Variant variant : result.getResult()) { System.out.println("variant = " + variant); } query = new Query(GENE.key(), "WRONG_GENE"); result = dbAdaptor.get(query, new QueryOptions()); assertThat(result, everyResult(allVariants, hasAnnotation(hasGenes(Collections.singletonList("WRONG_GENE"))))); assertThat(result, numResults(is(0))); for (Variant variant : result.getResult()) { System.out.println("variant = " + variant); } } @Test public void testGetAllVariants_studies() { Query query = new Query(STUDIES.key(), studyConfiguration.getStudyName()); long numResults = dbAdaptor.count(query).first(); assertEquals(allVariants.getNumResults(), numResults); query = new Query(STUDIES.key(), studyConfiguration.getStudyId()); numResults = dbAdaptor.count(query).first(); assertEquals(allVariants.getNumResults(), numResults); query = new Query(STUDIES.key(), "!" + studyConfiguration.getStudyId()); numResults = dbAdaptor.count(query).first(); assertEquals(0, numResults); query = new Query(STUDIES.key(), "!" + studyConfiguration.getStudyName()); numResults = dbAdaptor.count(query).first(); assertEquals(0, numResults); } @Test public void testGetAllVariants_files() { Query query = new Query(FILES.key(), 6); long numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query = new Query(FILES.key(), 6).append(STUDIES.key(), studyConfiguration.getStudyId()); numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query = new Query().append(STUDIES.key(), studyConfiguration.getStudyId()); numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query = new Query(FILES.key(), -1); numResults = dbAdaptor.count(query).first(); assertEquals("There is no file with ID -1", 0, numResults); } @Test public void testGetAllVariants_filter() { // FILTER Query query = new Query(FILTER.key(), "PASS"); long numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query.append(FILTER.key(), "NO_PASS"); assertEquals(0, dbAdaptor.count(query).first().longValue()); // FILTER+FILE query = new Query(FILES.key(), 6).append(FILTER.key(), "PASS"); numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query.append(FILTER.key(), "NO_PASS"); assertEquals(0, dbAdaptor.count(query).first().longValue()); // FILTER+STUDY query = new Query(STUDIES.key(), studyConfiguration.getStudyId()).append(FILTER.key(), "PASS"); numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query.append(FILTER.key(), "NO_PASS"); assertEquals(0, dbAdaptor.count(query).first().longValue()); // FILTER+FILE+STUDY query = new Query(FILES.key(), 6).append(STUDIES.key(), studyConfiguration.getStudyId()).append(FILTER.key(), "PASS"); numResults = dbAdaptor.count(query).first(); assertEquals(NUM_VARIANTS, numResults); query.append(FILTER.key(), "NO_PASS"); assertEquals(0, dbAdaptor.count(query).first().longValue()); } @Test public void testGetAllVariants_returned_samples() { checkSamplesData("NA19600"); checkSamplesData("NA19660"); checkSamplesData("NA19661"); checkSamplesData("NA19685"); checkSamplesData("NA19600,NA19685"); checkSamplesData("NA19685,NA19600"); checkSamplesData("NA19660,NA19661,NA19600"); checkSamplesData(null); checkSamplesData(VariantDBAdaptorUtils.ALL); checkSamplesData(VariantDBAdaptorUtils.NONE); } public void checkSamplesData(String returnedSamples) { Query query = new Query(SAMPLES_METADATA.key(), true); QueryOptions options = new QueryOptions(QueryOptions.SORT, true); //no limit; query.put(RETURNED_SAMPLES.key(), returnedSamples); VariantQueryResult<Variant> queryResult = dbAdaptor.get(query, options); List<String> samplesName; if (returnedSamples == null || returnedSamples.equals(VariantDBAdaptorUtils.ALL)) { samplesName = new ArrayList<>(StudyConfiguration.getSortedIndexedSamplesPosition(studyConfiguration).keySet()); } else if (returnedSamples.equals(VariantDBAdaptorUtils.NONE)) { samplesName = Collections.emptyList(); } else { samplesName = query.getAsStringList(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key()); } Map<String, List<String>> expectedSamples = Collections.singletonMap(studyConfiguration.getStudyName(), samplesName); Iterator<Variant> it_1 = allVariants.getResult().iterator(); Iterator<Variant> it_2 = queryResult.getResult().iterator(); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); LinkedHashMap<String, Integer> samplesPosition1 = null; LinkedHashMap<String, Integer> samplesPosition2 = null; for (int i = 0; i < queryResult.getNumResults(); i++) { Variant variant1 = it_1.next(); Variant variant2 = it_2.next(); assertEquals(variant1.toString(), variant2.toString()); assertEquals(expectedSamples, queryResult.getSamples()); if (samplesPosition1 == null) { samplesPosition1 = variant1.getStudy(studyConfiguration.getStudyName()).getSamplesPosition(); } if (samplesPosition2 == null) { samplesPosition2 = variant2.getStudy(studyConfiguration.getStudyName()).getSamplesPosition(); assertEquals(samplesName, new ArrayList<>(samplesPosition2.keySet())); } assertSame(samplesPosition1, variant1.getStudy(studyConfiguration.getStudyName()).getSamplesPosition()); assertSame(samplesPosition2, variant2.getStudy(studyConfiguration.getStudyName()).getSamplesPosition()); for (String sampleName : samplesName) { String gt1 = variant1.getStudy(studyConfiguration.getStudyName()).getSampleData(sampleName, "GT"); String gt2 = variant2.getStudy(studyConfiguration.getStudyName()).getSampleData(sampleName, "GT"); assertEquals(sampleName + " " + variant1.getChromosome() + ":" + variant1.getStart(), gt1, gt2); } } } @Test public void testIterator() { int numVariants = 0; Query query = new Query(RETURNED_FILES.key(), 6); for (VariantDBIterator iterator = dbAdaptor.iterator(query, new QueryOptions()); iterator.hasNext(); ) { Variant variant = iterator.next(); numVariants++; StudyEntry entry = variant.getStudiesMap().entrySet().iterator().next().getValue(); // assertEquals("6", entry.getFileId()); assertEquals(studyConfiguration.getStudyName(), entry.getStudyId()); assertEquals(studyConfiguration.getSampleIds().keySet(), entry.getSamplesName()); } assertEquals(NUM_VARIANTS, numVariants); } @Test public void testGetAllVariants_genotypes() { Integer na19600 = studyConfiguration.getSampleIds().get("NA19600"); Integer na19685 = studyConfiguration.getSampleIds().get("NA19685"); Query query = new Query(GENOTYPE.key(), na19600 + ":1|1"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertEquals(282, queryResult.getNumTotalResults()); queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals("1|1", vse.getSampleData("NA19600", "GT") ))); query = new Query(GENOTYPE.key(), STUDY_NAME + ":NA19600:1|1"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertEquals(282, queryResult.getNumTotalResults()); queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals("1|1", vse.getSampleData("NA19600", "GT") ))); query = new Query(GENOTYPE.key(), "NA19600:1|1") .append(STUDIES.key(), STUDY_NAME); queryResult = dbAdaptor.get(query, new QueryOptions()); assertEquals(282, queryResult.getNumTotalResults()); queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals("1|1", vse.getSampleData("NA19600", "GT") ))); query = new Query(GENOTYPE.key(), "NA19600:1|1"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertEquals(282, queryResult.getNumTotalResults()); queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals("1|1", vse.getSampleData("NA19600", "GT") ))); //get for each genotype. Should return all variants query = new Query(GENOTYPE.key(), na19600 + ":0|0,0|1,1|0,1|1,./."); long numResults = dbAdaptor.count(null).first(); assertEquals(NUM_VARIANTS, numResults); //Get all missing genotypes for sample na19600 query = new Query(GENOTYPE.key(), na19600 + ":./."); queryResult = dbAdaptor.get(query, new QueryOptions()); assertEquals(9, queryResult.getNumTotalResults()); queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals("./.", vse.getSampleData("NA19600", "GT") ))); // //This works, but is incorrect. Better use "./." // query = new Query(GENOTYPE.key(), na19600 + ":-1/-1"); // queryResult = dbAdaptor.get(query, new QueryOptions()); // assertEquals(9, queryResult.getNumTotalResults()); // queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals("./.", vse.getSampleData("NA19600", "GT") // ))); //Get all variants with 1|1 for na19600 and 0|0 or 1|0 for na19685 query = new Query(GENOTYPE.key(), na19600 + ":1|1" + ";" + na19685 + ":0|0,1|0"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertEquals(14, queryResult.getNumTotalResults()); queryResult.getResult().forEach(v -> v.getStudiesMap().forEach((s, vse) -> { assertEquals("1|1", vse.getSampleData("NA19600", "GT")); assertTrue(Arrays.asList("0|0", "1|0").contains(vse.getSampleData("NA19685", "GT"))); })); } @Test public void testGetAllVariants_negatedGenotypes() { Query query; Integer na19600 = studyConfiguration.getSampleIds().get("NA19600"); Integer na19685 = studyConfiguration.getSampleIds().get("NA19685"); //Get all variants with not 1|1 for na19600 query = new Query(GENOTYPE.key(), na19600 + ":!1|1"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", not(is("1|1")))))); //Get all variants with not 0/0 for na19600 query = new Query(GENOTYPE.key(), na19600 + ":!0/0"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", not(is("0/0")))))); //Get all variants with not 0/0 or 0|1 for na19600 query = new Query(GENOTYPE.key(), na19600 + ":!0/0,!0|1"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", allOf(not(is("0/0")), not(is("0|1"))))))); query = new Query(GENOTYPE.key(), na19600 + ":!0/0,0|1"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", is("0|1"))))); //Get all variants with 1|1 for na19600 and 0|0 or 1|0 for na19685 query = new Query(GENOTYPE.key(), na19600 + ":1|1" + ';' + na19685 + ":!0|0,!1|0"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, allOf( withSampleData("NA19600", "GT", is("1|1")), withSampleData("NA19685", "GT", allOf(not(is("0/0")), not(is("1|0")))))))); } @Test public void testGetAllVariants_samples() { Query query; QueryResult<Variant> allVariants = dbAdaptor.get(new Query(RETURNED_SAMPLES.key(), "NA19600"), new QueryOptions()); query = new Query(SAMPLES.key(), "NA19600"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", containsString("1"))))); allVariants = dbAdaptor.get(new Query(RETURNED_SAMPLES.key(), "NA19685"), new QueryOptions()); query = new Query(SAMPLES.key(), "NA19685"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19685", "GT", containsString("1"))))); query = new Query(STUDIES.key(), studyConfiguration.getStudyName()).append(SAMPLES.key(), "NA19685"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19685", "GT", containsString("1"))))); allVariants = dbAdaptor.get(new Query(RETURNED_SAMPLES.key(), "NA19600,NA19685"), new QueryOptions()); query = new Query(SAMPLES.key(), "NA19600,NA19685"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, allOf( withSampleData("NA19600", "GT", containsString("1")), withSampleData("NA19685", "GT", containsString("1")))))); query = new Query(SAMPLES.key(), "NA19600").append(GENOTYPE.key(), "NA19685:0|0").append(RETURNED_SAMPLES.key(), "NA19600,NA19685"); queryResult = dbAdaptor.get(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, allOf( withSampleData("NA19600", "GT", containsString("1")), withSampleData("NA19685", "GT", is("0|0")))))); } @Test public void testGetAllVariants_genotypes_wrong_values() { Query query = new Query(GENOTYPE.key(), "WRONG_SAMPLE:1|1"); thrown.expect(VariantQueryException.class); queryResult = dbAdaptor.get(query, new QueryOptions()); } @Test public void groupBy_gene_limit_0() throws Exception { QueryResult queryResult = dbAdaptor.groupBy(new Query(), "gene", new QueryOptions("limit", 0).append("count", true)); assertTrue(queryResult.getNumResults() > 0); } @Test public void groupBy_gene() throws Exception { int limit = 10; QueryResult<Map<String, Object>> queryResult_count = dbAdaptor.groupBy(new Query(), "gene", new QueryOptions("limit", limit) .append("count", true)); Map<String, Long> counts = queryResult_count.getResult().stream().collect(Collectors.toMap(o -> ((Map<String, Object>) o).get ("id").toString(), o -> Long.parseLong(((Map<String, Object>) o).get("count").toString()))); QueryResult<Map<String, Object>> queryResult_group = dbAdaptor.groupBy(new Query(), "gene", new QueryOptions("limit", limit)); // System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(queryResult_group)); System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(queryResult_count)); assertEquals(limit, queryResult_count.getNumResults()); assertEquals(limit, queryResult_group.getNumResults()); for (Map<String, Object> resultMap : queryResult_group.getResult()) { System.out.println("resultMap = " + resultMap); String id = resultMap.get("id").toString(); assertTrue("Should contain key " + id, counts.containsKey(id)); assertEquals("Size and count for id (" + id + ")are different", ((List) resultMap.get("values")).size(), counts.get(id) .intValue()); QueryOptions queryOptions = new QueryOptions("limit", 1).append("skipCount", false); QueryResult<Variant> queryResult3 = dbAdaptor.get(new Query(GENE.key(), id), queryOptions); assertEquals("Count for ID " + id, counts.get(id).longValue(), queryResult3.getNumTotalResults()); assertEquals(1, queryResult3.getNumResults()); } } @Test public void rank_gene() throws Exception { int limit = 40; QueryResult<Map<String, Object>> queryResult_rank = dbAdaptor.rank(new Query(), "gene", limit, false); System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(queryResult_rank)); assertEquals(limit, queryResult_rank.getNumResults()); for (Map<String, Object> map : queryResult_rank.getResult()) { QueryResult<Long> variantQueryResult = dbAdaptor.count(new Query(GENE.key(), map.get ("id"))); assertEquals(((Number) variantQueryResult.first()).intValue(), ((Number) map.get("count")).intValue()); } } @Test public void rank_ct() throws Exception { int limit = 20; QueryResult<Map<String, Object>> queryResult_rank = dbAdaptor.rank(new Query(), "ct", limit, false); System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(queryResult_rank)); assertEquals(limit, queryResult_rank.getNumResults()); for (Map<String, Object> map : queryResult_rank.getResult()) { QueryResult<Long> variantQueryResult = dbAdaptor.count(new Query(ANNOT_CONSEQUENCE_TYPE .key(), map.get("id"))); assertEquals(((Number) variantQueryResult.first()).intValue(), ((Number) map.get("count")).intValue()); } } @Test public void testGetAllVariants_maf() throws Exception { QueryResult<Variant> queryResult; long numResults; // numResults = dbAdaptor.count(new Query(STATS_MAF.key(), ">0.2")).first(); // System.out.println("queryResult.getNumTotalResults() = " + numResults); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":" + StudyEntry.DEFAULT_COHORT + ">0.2"), null); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats(StudyEntry.DEFAULT_COHORT, withMaf(gt(0.2)))))); int expectedCount = (int) count(allVariants.getResult(), withStudy(STUDY_NAME, withStats("cohort1", withMaf(gt(0.2))))); numResults = dbAdaptor.count(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort1>0.2")).first(); assertEquals(expectedCount, numResults); numResults = dbAdaptor.count(new Query(STATS_MAF.key(), "1:10>0.2")).first(); assertEquals(expectedCount, numResults); numResults = dbAdaptor.count(new Query(STATS_MAF.key(), STUDY_NAME + ":10>0.2")).first(); assertEquals(expectedCount, numResults); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), "1:cohort1>0.2"), null); assertEquals(expectedCount, queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), STUDY_NAME).append(STATS_MAF.key(), "cohort1>0.2"), null); assertEquals(expectedCount, queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), STUDY_NAME).append(STATS_MAF.key(), "10>0.2"), null); assertEquals(expectedCount, queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), 1).append(STATS_MAF.key(), "10>0.2"), null); assertEquals(expectedCount, queryResult.getNumResults()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("cohort1", withMaf(gt(0.2)))))); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2"), null); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("cohort2", withMaf(gt(0.2)))))); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2," + STUDY_NAME + ":cohort2<=0.2"), null); assertThat(queryResult, numResults(is(allVariants.getNumResults()))); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2;" + STUDY_NAME + ":cohort2<=0.2"), null); assertThat(queryResult, numResults(is(0))); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2;" + STUDY_NAME + ":cohort1<0.2"), null); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, allOf( withStats("cohort2", withMaf(gt(0.2))), withStats("cohort1", withMaf(lt(0.2))))))); queryResult = dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2," + STUDY_NAME + ":cohort1<0.2"), null); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, anyOf( withStats("cohort2", withMaf(gt(0.2))), withStats("cohort1", withMaf(lt(0.2))))))); } @Test public void testGetAllVariants_maf_cohortNotFound() throws Exception { VariantQueryException exception = VariantQueryException.cohortNotFound("cohort3", studyConfiguration.getStudyId(), studyConfiguration.getCohortIds().keySet()); thrown.expect(instanceOf(exception.getClass())); thrown.expectCause(is(exception.getCause())); dbAdaptor.get(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort3>0.2"), null); } @Test public void testGetAllVariants_mgf() throws Exception { queryResult = dbAdaptor.get(new Query(STATS_MGF.key(), STUDY_NAME + ":ALL>0.2"), null); System.out.println(queryResult.getNumResults()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("ALL", withMgf(gt(0.2)))))); queryResult = dbAdaptor.get(new Query(STATS_MGF.key(), STUDY_NAME + ":ALL<0.2"), null); System.out.println(queryResult.getNumResults()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("ALL", withMgf(lt(0.2)))))); } @Test public void testGetAllVariants_cohorts() throws Exception { queryResult = dbAdaptor.get(new Query(COHORTS.key(), STUDY_NAME + ":cohort2"), null); assertEquals(allVariants.getNumResults(), queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(COHORTS.key(), STUDY_NAME + ":cohort1"), null); assertEquals(allVariants.getNumResults(), queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), STUDY_NAME) .append(COHORTS.key(), "cohort1"), null); assertEquals(allVariants.getNumResults(), queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), 1) .append(COHORTS.key(), "cohort1"), null); assertEquals(allVariants.getNumResults(), queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), 1) .append(COHORTS.key(), 10), null); assertEquals(allVariants.getNumResults(), queryResult.getNumResults()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), 1) .append(COHORTS.key(), "!cohort1"), null); assertEquals(0, queryResult.getNumResults()); } @Test public void testGetAllVariants_cohorts_fail1() throws Exception { VariantQueryException expected = VariantQueryException.cohortNotFound("cohort5_dont_exists", 1, studyConfiguration.getCohortIds().keySet()); thrown.expect(expected.getClass()); thrown.expectMessage(expected.getMessage()); queryResult = dbAdaptor.get(new Query(STUDIES.key(), 1) .append(COHORTS.key(), "!cohort5_dont_exists"), null); } @Test public void testGetAllVariants_missingAllele() throws Exception { queryResult = dbAdaptor.get(new Query(MISSING_ALLELES.key(), STUDY_NAME + ":" + StudyEntry.DEFAULT_COHORT + ">4"), null); assertEquals(9, queryResult.getNumTotalResults()); queryResult.getResult().stream().map(variant -> variant.getStudiesMap().get(STUDY_NAME).getStats()) .forEach(map -> assertTrue(map.get(StudyEntry.DEFAULT_COHORT).getMissingAlleles() > 4)); } @Test public void testIncludeAll() { for (Variant variant : allVariants.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNotNull(variant.getAnnotation()); } } @Test public void testExcludeChromosome() { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.EXCLUDE, "chromosome")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertNotNull(variant.getChromosome()); } } @Test public void testExcludeStudies() { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.EXCLUDE, "studies")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), is(Collections.emptyList())); } } @Test public void testReturnNoneStudies() { queryResult = dbAdaptor.get(new Query(RETURNED_STUDIES.key(), VariantDBAdaptorUtils.NONE), new QueryOptions()); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), is(Collections.emptyList())); } } @Test public void testExcludeStats() { for (String exclude : Arrays.asList("studies.stats", "stats")) { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.EXCLUDE, exclude)); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); } } } @Test public void testExcludeFiles() { for (String exclude : Arrays.asList("studies.files", "files")) { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.EXCLUDE, exclude)); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); assertThat(new HashSet<>(variant.getStudies().get(0).getFormat()), is(FORMAT)); } } } @Test public void testReturnNoneFiles() { queryResult = dbAdaptor.get(new Query(RETURNED_FILES.key(), VariantDBAdaptorUtils.NONE), new QueryOptions()); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); assertThat(new HashSet<>(variant.getStudies().get(0).getFormat()), is(FORMAT)); } } @Test public void testExcludeSamples() { for (String exclude : Arrays.asList("studies.samplesData", "samplesData", "samples")) { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.EXCLUDE, exclude)); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getSamplesData(), is(Collections.emptyList())); } } } @Test public void testReturnNoneSamples() { queryResult = dbAdaptor.get(new Query(RETURNED_SAMPLES.key(), VariantDBAdaptorUtils.NONE), new QueryOptions()); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getSamplesData(), is(Collections.emptyList())); } } @Test public void testExcludeAnnotation() { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.EXCLUDE, "annotation")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); VariantAnnotation defaultAnnotation = new VariantAnnotation(); defaultAnnotation.setConsequenceTypes(Collections.emptyList()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getAnnotation(), anyOf(is((VariantAnnotation) null), is(defaultAnnotation))); } } @Test public void testExcludeAnnotationParts() { List<Variant> allVariants = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.SORT, true)).getResult(); queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.SORT, true).append(QueryOptions.EXCLUDE, VariantField.ANNOTATION_XREFS)); assertEquals(allVariants.size(), queryResult.getResult().size()); List<Variant> result = queryResult.getResult(); for (int i = 0; i < result.size(); i++) { Variant expectedVariant = allVariants.get(i); Variant variant = result.get(i); assertEquals(expectedVariant.toString(), variant.toString()); assertNotNull(expectedVariant.getAnnotation()); assertNotNull(variant.getAnnotation()); VariantAnnotation expectedAnnotation = expectedVariant.getAnnotation(); VariantAnnotation annotation = variant.getAnnotation(); expectedAnnotation.setXrefs(null); assertEquals(expectedAnnotation, annotation); } } @Test public void testInclude() { queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.INCLUDE, "studies")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNull(variant.getAnnotation()); } queryResult = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.INCLUDE, "annotation")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), is(Collections.emptyList())); assertNotNull(variant.getAnnotation()); } } /* @Test public void testGetAllVariants() { QueryOptions options = new QueryOptions(); options.put("id", "rs1137005,rs150535390"); options.put("region", "1:13910417-13910417,1:165389129-165389129"); options.put("gene", "RCC2,HRNR"); options.put("mgf", "<=0.5"); QueryResult queryResult = vqb.getAllVariants(options); assertEquals(5, queryResult.getNumResults()); // System.out.println(queryResult); } @Test public void testGetVariantById() { QueryResult queryResult; // This test queries a single ID with no more options queryResult = vqb.getVariantById("rs1137005", null); Variant object = (Variant) queryResult.getResult().get(0); assertEquals(object.getStart(), 1650807); // This test adds a few other options. Options related with genomic coordinates must be // added as a logical OR while others as and logical AND. QueryOptions options = new QueryOptions("type", "SNV"); options.put("id", "rs150535390"); options.put("region", "1:13910417-13910417,1:165389129-165389129"); options.put("gene", "RCC2,HRNR"); options.put("mgf", "<=0.5"); queryResult = vqb.getVariantById("rs1137005", options); assertEquals(5, queryResult.getNumResults()); // System.out.println("queryResult = " + queryResult); } @Test public void testGetAllVariantsByRegion() { QueryResult queryResult; // Basic queries queryResult = vqb.getAllVariantsByRegion(new Region("1:1000000-2000000"), null); assertEquals(3, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegion(new Region("1:10000000-20000000"), null); assertEquals(11, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegion(new Region("3:1-200000000"), null); assertEquals(50, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegion(new Region("X:1-200000000"), null); assertEquals(11, queryResult.getNumResults()); // Exactly in the limits queryResult = vqb.getAllVariantsByRegion(new Region("20:238441-7980390"), null); assertEquals(5, queryResult.getNumResults()); // Just inside the limits queryResult = vqb.getAllVariantsByRegion(new Region("20:238440-7980391"), null); assertEquals(5, queryResult.getNumResults()); // Just outside the limits queryResult = vqb.getAllVariantsByRegion(new Region("20:238441-7980389"), null); assertEquals(4, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegion(new Region("20:238442-7980390"), null); assertEquals(4, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegion(new Region("20:238442-7980389"), null); assertEquals(3, queryResult.getNumResults()); } @Test public void testGetAllVariantFrequencyByRegion() { QueryResult queryResult; QueryOptions options = new QueryOptions("interval", 100000); options.put("mgf", "<=0.5"); // Basic queries queryResult = vqb.getVariantFrequencyByRegion(new Region("1:10000000-20000000"), options); System.out.println("queryResult = " + queryResult); // assertEquals(3, queryResult.getNumResults()); } @Test public void testGetAllVariantsByRegionAndStudy() { QueryResult queryResult; // Basic queries queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("1:1000000-2000000"), Arrays.asList(study.getStudyId()), null); System.out.println("queryResult = " + queryResult); assertEquals(3, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("1:10000000-20000000"), Arrays.asList(study.getStudyId()), null); assertEquals(11, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("3:1-200000000"), Arrays.asList(study.getStudyId()), null); assertEquals(50, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("X:1-200000000"), Arrays.asList(study.getStudyId()), null); assertEquals(11, queryResult.getNumResults()); // Exactly in the limits queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("20:238441-7980390"), Arrays.asList(study.getStudyId()), null); assertEquals(5, queryResult.getNumResults()); // Just inside the limits queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("20:238440-7980391"), Arrays.asList(study.getStudyId()), null); assertEquals(5, queryResult.getNumResults()); // Just outside the limits queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("20:238441-7980389"), Arrays.asList(study.getStudyId()), null); assertEquals(4, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("20:238442-7980390"), Arrays.asList(study.getStudyId()), null); assertEquals(4, queryResult.getNumResults()); queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("20:238442-7980389"), Arrays.asList(study.getStudyId()), null); assertEquals(3, queryResult.getNumResults()); // Non-existing study queryResult = vqb.getAllVariantsByRegionAndStudies(new Region("1:1000000-2000000"), Arrays.asList("FalseStudy"), null); assertEquals(0, queryResult.getNumResults()); } @Test public void testGetAllVariantsByGene() { QueryResult queryResult; // Gene present in the dataset queryResult = vqb.getAllVariantsByGene("MIB2", null); assertNotEquals(0, queryResult.getNumResults()); List<Variant> variantsInGene = queryResult.getResult(); for (Variant v : variantsInGene) { assertEquals("1", v.getChromosome()); } // Gene not present in the dataset queryResult = vqb.getAllVariantsByGene("NonExistingGene", null); assertEquals(0, queryResult.getNumResults()); } @Test public void testGetMostAffectedGenes() { // QueryResult queryResult = vqb.getMostAffectedGenes(10, null); QueryResult queryResult = vqb.groupBy("gene", null); assertEquals(10, queryResult.getNumResults()); System.out.println(Arrays.deepToString(queryResult.getResult().toArray())); System.out.println("queryResult = " + queryResult); List<DBObject> result = queryResult.getResult(); for (int i = 1; i < queryResult.getNumResults(); i++) { DBObject prevObject = result.get(i-1); DBObject object = result.get(i); assertTrue(((int) prevObject.get("count")) >= ((int) object.get("count"))); } } @Test public void testGetLeastAffectedGenes() { QueryResult queryResult = vqb.getLeastAffectedGenes(10, null); assertEquals(10, queryResult.getNumResults()); System.out.println(Arrays.deepToString(queryResult.getResult().toArray())); List<DBObject> result = queryResult.getResult(); for (int i = 1; i < queryResult.getNumResults(); i++) { DBObject prevObject = result.get(i-1); DBObject object = result.get(i); assertTrue(((int) prevObject.get("count")) <= ((int) object.get("count"))); } } @Test public void testGetTopConsequenceTypes() { QueryResult queryResult = vqb.getTopConsequenceTypes(5, null); assertEquals(5, queryResult.getNumResults()); System.out.println(Arrays.deepToString(queryResult.getResult().toArray())); List<DBObject> result = queryResult.getResult(); for (int i = 1; i < queryResult.getNumResults(); i++) { DBObject prevObject = result.get(i-1); DBObject object = result.get(i); assertTrue(((int) prevObject.get("count")) >= ((int) object.get("count"))); } } @Test public void testGetBottomConsequenceTypes() { QueryResult queryResult = vqb.getBottomConsequenceTypes(5, null); assertEquals(5, queryResult.getNumResults()); System.out.println(Arrays.deepToString(queryResult.getResult().toArray())); List<DBObject> result = queryResult.getResult(); for (int i = 1; i < queryResult.getNumResults(); i++) { DBObject prevObject = result.get(i-1); DBObject object = result.get(i); assertTrue(((int) prevObject.get("count")) <= ((int) object.get("count"))); } } // @Test // public void testGetRecords() throws Exception { // // Map<String, String> opts = new HashMap<>(); // opts.put("studyId", "aaleman_-_XOidGTJMUq1Cr1J"); //// opts.put("region_list", "6:1-15021068"); //// opts.put("sampleGT_D801[]", "1/1,0/1"); //// opts.put("sampleGT_muestra_B[]", "0/1"); //// opts.put("conseq_type[]", "non_synonymous_codon,intron_variant"); //// opts.put("mend_error", "1"); //// opts.put("option_mend_error", ">="); //// opts.put("maf", "0.1"); //// opts.put("option_maf", "<="); // // MutableInt count = new MutableInt(-1); // // QueryResult<VariantInfo> records = ((VariantMongoDBAdaptor) vqb).getRecordsMongo(1, 0, 25, count, opts); //// // System.out.println(records.getResult().get(0).getSampleGenotypes()); // } // // @Test // public void testAnalysisInfo() throws Exception { // // QueryResult<VariantAnalysisInfo> res = ((VariantMongoDBAdaptor) vqb).getAnalysisInfo("aaleman_-_XOidGTJMUq1Cr1J"); // VariantAnalysisInfo vi = res.getResult().get(0); // // System.out.println("vi.getSamples() = " + vi.getSamples()); // System.out.println("vi.getConsequenceTypes() = " + vi.getConsequenceTypes()); // System.out.println("vi.getGlobalStats() = " + vi.getGlobalStats()); // // // } */ }