/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.variant.stats;
import org.junit.*;
import org.junit.rules.ExpectedException;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.VariantStorageManagerTest;
import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Created by hpccoll1 on 01/06/15.
*/
@Ignore
public abstract class VariantStatisticsManagerTest extends VariantStorageBaseTest {
public static final String VCF_TEST_FILE_NAME = "variant-test-file.vcf.gz";
private StudyConfiguration studyConfiguration;
private VariantDBAdaptor dbAdaptor;
@Rule
public ExpectedException thrown = ExpectedException.none();
@BeforeClass
public static void beforeClass() throws IOException {
Path rootDir = getTmpRootDir();
Path inputPath = rootDir.resolve(VCF_TEST_FILE_NAME);
Files.copy(VariantStorageManagerTest.class.getClassLoader().getResourceAsStream(VCF_TEST_FILE_NAME), inputPath,
StandardCopyOption.REPLACE_EXISTING);
inputUri = inputPath.toUri();
}
@Override
@Before
public void before() throws Exception {
studyConfiguration = newStudyConfiguration();
clearDB(DB_NAME);
runDefaultETL(inputUri, getVariantStorageEngine(), studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), false));
dbAdaptor = getVariantStorageEngine().getDBAdaptor(DB_NAME);
}
@Test
public void calculateStatsMultiCohortsTest() throws Exception {
//Calculate stats for 2 cohorts at one time
DefaultVariantStatisticsManager vsm = (DefaultVariantStatisticsManager) variantStorageManager.newVariantStatisticsManager(dbAdaptor);
checkCohorts(dbAdaptor, studyConfiguration);
Integer fileId = studyConfiguration.getFileIds().get(Paths.get(inputUri).getFileName().toString());
QueryOptions options = new QueryOptions(VariantStorageEngine.Options.FILE_ID.key(), fileId);
options.put(VariantStorageEngine.Options.LOAD_BATCH_SIZE.key(), 100);
Iterator<String> iterator = studyConfiguration.getSampleIds().keySet().iterator();
/** Create cohorts **/
HashSet<String> cohort1 = new HashSet<>();
cohort1.add(iterator.next());
cohort1.add(iterator.next());
HashSet<String> cohort2 = new HashSet<>();
cohort2.add(iterator.next());
cohort2.add(iterator.next());
Map<String, Set<String>> cohorts = new HashMap<>();
Map<String, Integer> cohortIds = new HashMap<>();
cohorts.put("cohort1", cohort1);
cohorts.put("cohort2", cohort2);
cohortIds.put("cohort1", 10);
cohortIds.put("cohort2", 11);
//Calculate stats
URI stats = vsm.createStats(dbAdaptor, outputUri.resolve("cohort1.cohort2.stats"), cohorts, cohortIds, studyConfiguration, options);
vsm.loadStats(dbAdaptor, stats, studyConfiguration, options);
checkCohorts(dbAdaptor, studyConfiguration);
}
@Test
public void calculateStatsSeparatedCohortsTest() throws Exception {
//Calculate stats for 2 cohorts separately
DefaultVariantStatisticsManager vsm = (DefaultVariantStatisticsManager) variantStorageManager.newVariantStatisticsManager(dbAdaptor);
int studyId = studyConfiguration.getStudyId();
String studyName = studyConfiguration.getStudyName();
Integer fileId = studyConfiguration.getFileIds().get(Paths.get(inputUri).getFileName().toString());
QueryOptions options = new QueryOptions(VariantStorageEngine.Options.FILE_ID.key(), fileId);
options.put(VariantStorageEngine.Options.LOAD_BATCH_SIZE.key(), 100);
Iterator<String> iterator = studyConfiguration.getSampleIds().keySet().iterator();
StudyConfiguration studyConfiguration;
/** Create first cohort **/
studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyName, null).first();
HashSet<String> cohort1 = new HashSet<>();
cohort1.add(iterator.next());
cohort1.add(iterator.next());
Map<String, Set<String>> cohorts;
Map<String, Integer> cohortIds;
cohorts = new HashMap<>();
cohortIds = new HashMap<>();
cohorts.put("cohort1", cohort1);
cohortIds.put("cohort1", 10);
//Calculate stats for cohort1
URI stats = vsm.createStats(dbAdaptor, outputUri.resolve("cohort1.stats"), cohorts, cohortIds, studyConfiguration, options);
vsm.loadStats(dbAdaptor, stats, studyConfiguration, options);
assertTrue(studyConfiguration.getCalculatedStats().contains(10));
checkCohorts(dbAdaptor, studyConfiguration);
/** Create second cohort **/
studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyName, null).first();
HashSet<String> cohort2 = new HashSet<>();
cohort2.add(iterator.next());
cohort2.add(iterator.next());
cohorts = new HashMap<>();
cohortIds = new HashMap<>();
cohorts.put("cohort2", cohort2);
cohortIds.put("cohort2", 11);
//Calculate stats for cohort2
stats = vsm.createStats(dbAdaptor, outputUri.resolve("cohort2.stats"), cohorts, cohortIds, studyConfiguration, options);
vsm.loadStats(dbAdaptor, stats, studyConfiguration, options);
assertTrue(studyConfiguration.getCalculatedStats().contains(10));
assertTrue(studyConfiguration.getCalculatedStats().contains(11));
checkCohorts(dbAdaptor, studyConfiguration);
//Try to recalculate stats for cohort2. Will fail
studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyName, null).first();
thrown.expect(StorageEngineException.class);
stats = vsm.createStats(dbAdaptor, outputUri.resolve("cohort2.stats"), cohorts, cohortIds, studyConfiguration, options);
vsm.loadStats(dbAdaptor, stats, studyConfiguration, options);
}
private static void checkCohorts(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration) {
for (Variant variant : dbAdaptor) {
for (StudyEntry sourceEntry : variant.getStudies()) {
Map<String, VariantStats> cohortStats = sourceEntry.getStats();
String calculatedCohorts = cohortStats.keySet().toString();
for (Map.Entry<String, Integer> entry : studyConfiguration.getCohortIds().entrySet()) {
assertTrue("CohortStats should contain stats for cohort " + entry.getKey() + ". Only contains stats for " +
calculatedCohorts,
cohortStats.containsKey(entry.getKey())); //Check stats are calculated
assertEquals("Stats have less genotypes than expected.",
studyConfiguration.getCohorts().get(entry.getValue()).size(), //Check numGenotypes are correct (equals to
// the number of samples)
cohortStats.get(entry.getKey()).getGenotypesCount().values().stream().reduce(0, (a, b) -> a + b).intValue());
}
}
}
}
}