/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.core.manager.variant.operations; import org.junit.After; import org.junit.Test; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.biodata.tools.variant.stats.VariantAggregatedStatsCalculator; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.catalog.db.api.CohortDBAdaptor; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.models.Cohort; import org.opencb.opencga.catalog.models.File; import org.opencb.opencga.catalog.models.Job; import org.opencb.opencga.catalog.models.Study; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.manager.variant.AbstractVariantStorageOperationTest; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStorageEngine; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileInputStream; import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.*; import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doThrow; import static org.opencb.biodata.models.variant.StudyEntry.DEFAULT_COHORT; import static org.opencb.opencga.storage.core.variant.VariantStorageBaseTest.getResourceUri; /** * * Created by hpccoll1 on 08/07/15. */ public class StatsVariantStorageTest extends AbstractVariantStorageOperationTest { Logger logger = LoggerFactory.getLogger(StatsVariantStorageTest.class); private long all; private long[] coh = new long[5]; public void before () throws Exception { File file = opencga.createFile(studyId, "1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId); List<Long> sampleIds = file.getSampleIds(); for (int i = 0; i < coh.length; i++) { coh[i] = catalogManager.createCohort(studyId, "coh" + i, Study.Type.CONTROL_SET, "", sampleIds.subList(sampleIds.size() / coh.length * i, sampleIds.size() / coh.length * (i + 1)), null, sessionId).first().getId(); } QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, String.valueOf(outputId)); variantManager.index(null, String.valueOf(file.getId()), createTmpOutdir(file), queryOptions, sessionId); all = catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first().getId(); } public File beforeAggregated(String fileName, VariantSource.Aggregation aggregation) throws Exception { Map<String, Object> attributes; if (aggregation != null) { attributes = Collections.singletonMap(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), aggregation); } else { attributes = Collections.emptyMap(); } catalogManager.modifyStudy(studyId, new ObjectMap(StudyDBAdaptor.QueryParams.ATTRIBUTES.key(), attributes), sessionId); File file1 = opencga.createFile(studyId, fileName, sessionId); // coh0 = catalogManager.createCohort(studyId, "coh0", Cohort.Type.CONTROL_SET, "", file1.getSampleIds(), null, sessionId).first().getId(); QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, String.valueOf(outputId)); variantManager.index(null, String.valueOf(file1.getId()), createTmpOutdir(file1), queryOptions, sessionId); return file1; } public String createTmpOutdir(File file) throws CatalogException { return createTmpOutdir("_FILE_" + file.getId()); } public String createTmpOutdir(String sufix) throws CatalogException { return opencga.createTmpOutdir(studyId, sufix, sessionId); } public static List<Cohort> createCohorts(String sessionId, long studyId, String tagmapPath, CatalogManager catalogManager, Logger logger) throws IOException, CatalogException { List<Cohort> queryResults = new ArrayList<>(); Properties tagmap = new Properties(); tagmap.load(new FileInputStream(tagmapPath)); Map<String, Cohort> cohorts = catalogManager.getAllCohorts(studyId, null, null, sessionId).getResult().stream().collect(Collectors.toMap(Cohort::getName, c->c)); Set<String> catalogCohorts = cohorts.keySet(); for (String cohortName : VariantAggregatedStatsCalculator.getCohorts(tagmap)) { if (!catalogCohorts.contains(cohortName)) { QueryResult<Cohort> cohort = catalogManager.createCohort(studyId, cohortName, Study.Type.COLLECTION, "", Collections.emptyList(), null, sessionId); queryResults.add(cohort.first()); } else { logger.warn("cohort {} was already created", cohortName); queryResults.add(cohorts.get(cohortName)); } } return queryResults; } @Override protected VariantSource.Aggregation getAggregation() { return VariantSource.Aggregation.NONE; } @After public void after () throws Exception { // catalogManagerTest.tearDown(); } @Test public void testCalculateStatsOneByOne() throws Exception { before(); Map<String, Cohort> cohorts = new HashMap<>(); calculateStats(coh[0]); cohorts.put("coh0", catalogManager.getCohort(coh[0], null, sessionId).first()); // cohorts.put("all", null); checkCalculatedStats(cohorts); // Job job = variantStorage.calculateStats(outputId, Collections.singletonList(coh1), sessionId, new QueryOptions(ExecutorManager.EXECUTE, true)).first(); // assertEquals(Status.READY, job.getStatus().getName()); cohorts.put("coh1", catalogManager.getCohort(coh[1], null, sessionId).first()); calculateStats(coh[1]); checkCalculatedStats(cohorts); calculateStats(coh[2]); cohorts.put("coh2", catalogManager.getCohort(coh[2], null, sessionId).first()); checkCalculatedStats(cohorts); calculateStats(coh[3]); cohorts.put("coh3", catalogManager.getCohort(coh[3], null, sessionId).first()); checkCalculatedStats(cohorts); calculateStats(coh[4]); cohorts.put("coh4", catalogManager.getCohort(coh[4], null, sessionId).first()); checkCalculatedStats(cohorts); } public void calculateStats(long cohortId) throws Exception { calculateStats(cohortId, new QueryOptions()); } public void calculateStats(long cohortId, QueryOptions options) throws Exception { String tmpOutdir = createTmpOutdir("_STATS_" + cohortId); List<String> cohortIds = Collections.singletonList(String.valueOf(cohortId)); options.put(StorageOperation.CATALOG_PATH, String.valueOf(outputId)); variantManager.stats(String.valueOf(catalogManager.getStudyIdByCohortId(cohortId)), cohortIds, tmpOutdir, options, sessionId); } public void calculateStats(QueryOptions options, Long... cohortIds) throws Exception { calculateStats(options, Arrays.stream(cohortIds).map(Object::toString).collect(Collectors.toList())); } public void calculateStats(QueryOptions options, List<String> cohorts) throws Exception { String tmpOutdir = createTmpOutdir("_STATS_" + cohorts.stream().collect(Collectors.joining("_"))); options.put(StorageOperation.CATALOG_PATH, String.valueOf(outputId)); variantManager.stats(String.valueOf(studyId), cohorts, tmpOutdir, options, sessionId); } @Test public void testCalculateStatsGroups() throws Exception { before(); Map<String, Cohort> cohorts = new HashMap<>(); calculateStats(new QueryOptions(), coh[0], coh[1], coh[2]); cohorts.put("coh0", catalogManager.getCohort(coh[0], null, sessionId).first()); cohorts.put("coh1", catalogManager.getCohort(coh[1], null, sessionId).first()); cohorts.put("coh2", catalogManager.getCohort(coh[2], null, sessionId).first()); checkCalculatedStats(cohorts); try { calculateStats(new QueryOptions(), all, coh[3], -coh[4]); fail(); } catch (CatalogException e) { logger.info("received expected exception. this is OK, there is no cohort " + (-coh[4]) + '\n'); } assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohort(all, null, sessionId).first().getStatus().getName()); assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohort(coh[3], null, sessionId).first().getStatus().getName()); assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohort(coh[4], null, sessionId).first().getStatus().getName()); calculateStats(new QueryOptions(), all, coh[3], coh[4]); cohorts.put(DEFAULT_COHORT, catalogManager.getCohort(all, null, sessionId).first()); cohorts.put("coh3", catalogManager.getCohort(coh[3], null, sessionId).first()); cohorts.put("coh4", catalogManager.getCohort(coh[4], null, sessionId).first()); checkCalculatedStats(cohorts); } @Test public void testCalculateStats() throws Exception { before(); assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohort(coh[0], null, sessionId).first().getStatus().getName()); calculateStats(coh[0]); // TODO: Check status "CALCULATING" // Job job = variantStorage.calculateStats(outputId, Collections.singletonList(coh0), sessionId, new QueryOptions()).first(); // assertEquals(Cohort.CohortStatus.CALCULATING, catalogManager.getCohort(coh0, null, sessionId).first().getStatus().getName()); // runStorageJob(job, sessionId); assertEquals(Cohort.CohortStatus.READY, catalogManager.getCohort(coh[0], null, sessionId).first().getStatus().getName()); Map<String, Cohort> cohorts = new HashMap<>(); cohorts.put("coh0", catalogManager.getCohort(coh[0], null, sessionId).first()); checkCalculatedStats(cohorts); catalogManager.modifyCohort(coh[0], new ObjectMap("description", "NewDescription"), new QueryOptions(), sessionId); assertEquals(Cohort.CohortStatus.READY, catalogManager.getCohort(coh[0], null, sessionId).first().getStatus().getName()); catalogManager.modifyCohort(coh[0], new ObjectMap("samples", catalogManager.getCohort(coh[0], null, sessionId).first() .getSamples().subList(0, 100)), new QueryOptions(), sessionId); assertEquals(Cohort.CohortStatus.INVALID, catalogManager.getCohort(coh[0], null, sessionId).first().getStatus().getName()); calculateStats(coh[0]); assertEquals(Cohort.CohortStatus.READY, catalogManager.getCohort(coh[0], null, sessionId).first().getStatus().getName()); cohorts.put("coh0", catalogManager.getCohort(coh[0], null, sessionId).first()); checkCalculatedStats(cohorts); } @Test public void testCalculateInvalidStats() throws Exception { before(); calculateStats(coh[0]); DummyVariantStorageEngine vsm = mockVariantStorageManager(); String message = "Error"; doThrow(new StorageEngineException(message)).when(vsm).calculateStats(any(), any(), any(), any()); try { calculateStats(coh[1]); fail(); } catch (StorageEngineException e) { assertEquals(message, e.getCause().getMessage()); } Cohort coh1 = catalogManager.getCohort(coh[1], null, sessionId).first(); assertEquals(Cohort.CohortStatus.INVALID, coh1.getStatus().getName()); vsm = mockVariantStorageManager(); calculateStats(coh[1]); } @Test public void testResumeCalculateStats() throws Exception { before(); calculateStats(coh[0]); catalogManager.getCohortManager().setStatus(String.valueOf(coh[1]), Cohort.CohortStatus.CALCULATING, "", sessionId); Cohort coh1 = catalogManager.getCohort(coh[1], null, sessionId).first(); Exception expected = VariantStatsStorageOperation.unableToCalculateCohortCalculating(coh1); try { calculateStats(coh[1]); fail(); } catch (Exception e) { assertThat(e, instanceOf(expected.getClass())); assertThat(e, hasMessage(is(expected.getMessage()))); } calculateStats(coh[1], new QueryOptions(VariantStorageEngine.Options.RESUME.key(), true)); } @Test public void testCalculateAggregatedStats() throws Exception { beforeAggregated("variant-test-aggregated-file.vcf.gz", VariantSource.Aggregation.BASIC); calculateAggregatedStats(new QueryOptions()); } @Test public void testCalculateAggregatedStatsWithoutCohorts() throws Exception { beforeAggregated("variant-test-aggregated-file.vcf.gz", VariantSource.Aggregation.BASIC); calculateStats(new QueryOptions()); } @Test public void testCalculateAggregatedStatsNonAggregatedStudy() throws Exception { beforeAggregated("variant-test-aggregated-file.vcf.gz", null); calculateAggregatedStats(new QueryOptions(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), VariantSource.Aggregation.BASIC)); Study study = catalogManager.getStudy(studyId, sessionId).first(); String agg = study.getAttributes().get(VariantStorageEngine.Options.AGGREGATED_TYPE.key()).toString(); assertNotNull(agg); assertEquals(VariantSource.Aggregation.BASIC.toString(), agg); } public void calculateAggregatedStats(QueryOptions options) throws Exception { // coh0 = catalogManager.createCohort(studyId, "ALL", Cohort.Type.COLLECTION, "", file.getSampleIds(), null, sessionId).first().getId(); long cohId = catalogManager.getAllCohorts(studyId, null, null, sessionId).first().getId(); calculateStats(cohId, options); checkCalculatedAggregatedStats(Collections.singleton(DEFAULT_COHORT), dbName); } @Test public void testCalculateAggregatedExacStats() throws Exception { beforeAggregated("exachead.vcf.gz", VariantSource.Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); List<String> cohortIds = createCohorts(sessionId, studyId, tagMap, catalogManager, logger) .stream().map(Cohort::getId).map(Object::toString).collect(Collectors.toList()); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options, cohortIds); List<Cohort> cohorts = catalogManager.getAllCohorts(studyId, null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getName) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); } @Test public void testCalculateAggregatedExacStatsExplicitCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", VariantSource.Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options, Arrays.asList("AFR", "ALL", "AMR", "EAS", "FIN", "NFE", "OTH", "SAS")); List<Cohort> cohorts = catalogManager.getAllCohorts(studyId, null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getName) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); } @Test public void testCalculateAggregatedExacStatsWrongExplicitCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", VariantSource.Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); thrown.expectMessage(VariantStatsStorageOperation.differentCohortsThanMappingFile().getMessage()); calculateStats(options, Arrays.asList("AFR", "ALL")); } @Test public void testCalculateAggregatedExacMissingAggregationMappingFile() throws Exception { beforeAggregated("exachead.vcf.gz", VariantSource.Aggregation.EXAC); QueryOptions options = new QueryOptions(); thrown.expectMessage(VariantStatsStorageOperation.missingAggregationMappingFile(VariantSource.Aggregation.EXAC).getMessage()); calculateStats(options, Collections.emptyList()); } @Test public void testCalculateNonAggregatedWithAggregationMappingFile() throws Exception { before(); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); thrown.expectMessage(VariantStatsStorageOperation.nonAggregatedWithMappingFile().getMessage()); calculateStats(options, Arrays.asList("ALL")); } @Test public void testCalculateAggregatedExacStatsWithoutCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", VariantSource.Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options); List<Cohort> cohorts = catalogManager.getAllCohorts(studyId, null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getName) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); } public void checkCalculatedStats(Map<String, Cohort> cohorts) throws Exception { checkCalculatedStats(cohorts, catalogManager, dbName, sessionId); } public static void checkCalculatedStats(Map<String, Cohort> cohorts, CatalogManager catalogManager, String dbName, String sessionId) throws Exception { VariantDBAdaptor dbAdaptor = StorageEngineFactory.get().getVariantStorageEngine().getDBAdaptor(dbName); for (Variant variant : dbAdaptor) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals("In variant " + variant.toString(), cohorts.size(), sourceEntry.getStats().size()); for (Map.Entry<String, VariantStats> entry : sourceEntry.getStats().entrySet()) { assertTrue("In variant " + variant.toString(), cohorts.containsKey(entry.getKey())); if (cohorts.get(entry.getKey()) != null) { assertEquals("Variant: " + variant.toString() + " does not have the correct number of samples in cohort '" + entry.getKey() + "'.", cohorts.get(entry.getKey()).getSamples().size(), entry.getValue().getGenotypesCount().values().stream().reduce((integer, integer2) -> integer + integer2).orElse(0).intValue()); } } } } for (Cohort cohort : cohorts.values()) { cohort = catalogManager.getCohort(cohort.getId(), null, sessionId).first(); assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } } public static void checkCalculatedAggregatedStats(Set<String> cohortNames, String dbName) throws Exception { VariantDBAdaptor dbAdaptor = StorageEngineFactory.get().getVariantStorageEngine().getDBAdaptor(dbName); for (Variant variant : dbAdaptor) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals(cohortNames, sourceEntry.getStats().keySet()); for (Map.Entry<String, VariantStats> entry : sourceEntry.getStats().entrySet()) { assertTrue(cohortNames.contains(entry.getKey())); } } } } /** * Do not execute Job using its command line, won't find the opencga-storage.sh * Call directly to the OpenCGAStorageMain */ private Job runStorageJob(Job storageJob, String sessionId) throws IOException, CatalogException { // storageJob.setCommandLine(storageJob.getCommandLine() + " --job-id " + storageJob.getId()); Job job = opencga.runStorageJob(storageJob, sessionId); assertEquals(Job.JobStatus.READY, job.getStatus().getName()); return job; } }