/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.variant;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.avro.generic.GenericRecord;
import org.junit.Ignore;
import org.junit.Test;
import org.opencb.biodata.formats.variant.io.VariantReader;
import org.opencb.biodata.models.variant.*;
import org.opencb.biodata.models.variant.avro.FileEntry;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.storage.core.StoragePipelineResult;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager;
import org.opencb.opencga.storage.core.search.VariantSearchManager;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.opencb.opencga.storage.core.variant.io.json.mixin.GenericRecordAvroJsonMixin;
import org.opencb.opencga.storage.core.variant.io.json.mixin.VariantStatsJsonMixin;
import org.opencb.opencga.storage.core.variant.stats.VariantStatsWrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.net.URI;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import static org.junit.Assert.*;
/**
* @author Jacobo Coll <jacobo167@gmail.com>
*/
@Ignore
public abstract class VariantStorageManagerTest extends VariantStorageBaseTest {
private static Logger logger = LoggerFactory.getLogger(VariantStorageManagerTest.class);
@Test
public void basicIndex() throws Exception {
clearDB(DB_NAME);
StudyConfiguration studyConfiguration = newStudyConfiguration();
StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageManager, studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json"));
assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.json.gz'",
Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.json.gz"));
VariantSource source = VariantReaderUtils.readVariantSource(Paths.get(etlResult.getTransformResult().getPath()), null);
assertTrue(studyConfiguration.getIndexedFiles().contains(6));
checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration);
checkLoadedVariants(variantStorageManager.getDBAdaptor(DB_NAME), studyConfiguration, true, false, getExpectedNumLoadedVariants(source));
}
@Test
public void avroBasicIndex() throws Exception {
clearDB(DB_NAME);
StudyConfiguration studyConfiguration = newStudyConfiguration();
StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageManager, studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro"));
assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.avro.gz'",
Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.avro.gz"));
assertTrue(studyConfiguration.getIndexedFiles().contains(6));
VariantSource variantSource = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration);
checkLoadedVariants(variantStorageManager.getDBAdaptor(DB_NAME), studyConfiguration, true, false, getExpectedNumLoadedVariants
(variantSource));
}
@Test
public void multiIndex() throws Exception {
clearDB(DB_NAME);
int expectedNumVariants = NUM_VARIANTS - 37; //37 variants have been removed from this dataset because had the genotype 0|0 for
// each sample
StudyConfiguration studyConfigurationMultiFile = new StudyConfiguration(1, "multi");
StoragePipelineResult etlResult;
ObjectMap options = new ObjectMap()
.append(VariantStorageEngine.Options.STUDY_TYPE.key(), VariantStudy.StudyType.CONTROL)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false);
runDefaultETL(getResourceUri("1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
variantStorageManager, studyConfigurationMultiFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 5));
Integer defaultCohortId = studyConfigurationMultiFile.getCohortIds().get(StudyEntry.DEFAULT_COHORT);
assertTrue(studyConfigurationMultiFile.getCohorts().containsKey(defaultCohortId));
assertEquals(500, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size());
assertEquals(Collections.emptySet(), studyConfigurationMultiFile.getCalculatedStats());
assertEquals(Collections.emptySet(), studyConfigurationMultiFile.getInvalidStats());
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(5));
options.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
runDefaultETL(getResourceUri("1000g_batches/501-1000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
variantStorageManager, studyConfigurationMultiFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 6));
assertEquals(1000, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size());
assertEquals(Collections.singleton(defaultCohortId), studyConfigurationMultiFile.getCalculatedStats());
assertEquals(Collections.emptySet(), studyConfigurationMultiFile.getInvalidStats());
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(6));
options.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
runDefaultETL(getResourceUri("1000g_batches/1001-1500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
variantStorageManager, studyConfigurationMultiFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 7));
assertEquals(1500, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size());
assertEquals(Collections.emptySet(), studyConfigurationMultiFile.getCalculatedStats());
assertEquals(Collections.singleton(defaultCohortId), studyConfigurationMultiFile.getInvalidStats());
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(7));
options.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
runDefaultETL(getResourceUri("1000g_batches/1501-2000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
variantStorageManager, studyConfigurationMultiFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 8));
assertEquals(2000, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size());
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(8));
runDefaultETL(getResourceUri("1000g_batches/2001-2504.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
variantStorageManager, studyConfigurationMultiFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 9));
assertEquals(2504, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size());
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(5));
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(6));
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(7));
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(8));
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(9));
VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(DB_NAME);
checkLoadedVariants(dbAdaptor, studyConfigurationMultiFile, true, false, expectedNumVariants);
//Load, in a new study, the same dataset in one single file
StudyConfiguration studyConfigurationSingleFile = new StudyConfiguration(2, "single");
etlResult = runDefaultETL(getResourceUri("filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
variantStorageManager, studyConfigurationSingleFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 10));
assertTrue(studyConfigurationSingleFile.getIndexedFiles().contains(10));
checkTransformedVariants(etlResult.getTransformResult(), studyConfigurationSingleFile);
//Check that both studies contains the same information
VariantDBIterator iterator = dbAdaptor.iterator(new Query(VariantDBAdaptor.VariantQueryParams.STUDIES.key(),
studyConfigurationMultiFile.getStudyId() + "," + studyConfigurationSingleFile.getStudyId()), new QueryOptions());
int numVariants = 0;
for (; iterator.hasNext(); ) {
Variant variant = iterator.next();
numVariants++;
// Map<String, VariantSourceEntry> map = variant.getStudies().stream().collect(Collectors.toMap
// (VariantSourceEntry::getStudyId, Function.<VariantSourceEntry>identity()));
Map<String, StudyEntry> map = variant.getStudiesMap();
assertTrue(variant.toString(), map.containsKey(studyConfigurationMultiFile.getStudyName()));
assertTrue(variant.toString(), map.containsKey(studyConfigurationSingleFile.getStudyName()));
String expected = map.get(studyConfigurationSingleFile.getStudyName()).getSamplesData().toString();
String actual = map.get(studyConfigurationMultiFile.getStudyName()).getSamplesData().toString();
assertWithConflicts(variant, () -> assertEquals(variant.toString(), expected, actual));
}
assertEquals(expectedNumVariants, numVariants);
}
@Test
public void multiIndexPlatinum() throws Exception {
multiIndexPlatinum(new ObjectMap());
}
public void multiIndexPlatinum(ObjectMap options) throws Exception {
clearDB(DB_NAME);
// each sample
StudyConfiguration studyConfigurationMultiFile = new StudyConfiguration(1, "multi");
StudyConfiguration studyConfigurationBatchFile = new StudyConfiguration(2, "batch");
options.putIfAbsent(VariantStorageEngine.Options.STUDY_TYPE.key(), VariantStudy.StudyType.COLLECTION);
options.putIfAbsent(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
options.putIfAbsent(VariantStorageEngine.Options.ANNOTATE.key(), false);
options.put(VariantStorageEngine.Options.DB_NAME.key(), DB_NAME);
VariantStorageEngine variantStorageManager = getVariantStorageEngine();
VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(DB_NAME);
StudyConfigurationManager studyConfigurationManager = dbAdaptor.getStudyConfigurationManager();
int i = 1;
for (int fileId = 77; fileId <= 93; fileId++) {
ObjectMap fileOptions = new ObjectMap();
fileOptions.append(VariantStorageEngine.Options.SAMPLE_IDS.key(), "NA128" + fileId + ':' + (i - 1))
.append(VariantStorageEngine.Options.FILE_ID.key(), i)
.putAll(options);
runDefaultETL(getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA128" + fileId + "_S1.genome.vcf.gz"),
variantStorageManager, studyConfigurationMultiFile, fileOptions);
studyConfigurationMultiFile = studyConfigurationManager.getStudyConfiguration(studyConfigurationMultiFile.getStudyId(), null).first();
assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(i));
i++;
}
List<URI> uris = new LinkedList<>();
for (int fileId = 77; fileId <= 93; fileId++) {
uris.add(getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA128" + fileId + "_S1.genome.vcf.gz"));
}
variantStorageManager = getVariantStorageEngine();
variantStorageManager.getConfiguration().getStorageEngine(variantStorageManager.getStorageEngineId()).getVariant().getOptions()
.append(VariantStorageEngine.Options.STUDY_NAME.key(), studyConfigurationBatchFile.getStudyName())
.append(VariantStorageEngine.Options.STUDY_ID.key(), studyConfigurationBatchFile.getStudyId())
.putAll(options);
List<StoragePipelineResult> results = variantStorageManager.index(uris, outputUri, true, true, true);
for (StoragePipelineResult result : results) {
System.out.println(result.toString());
assertTrue(result.isTransformExecuted());
assertNull(result.getTransformError());
assertTrue(result.isLoadExecuted());
assertNull(result.getLoadError());
}
studyConfigurationBatchFile = studyConfigurationManager.getStudyConfiguration(studyConfigurationBatchFile.getStudyId(), null).first();
checkLoadedVariants(dbAdaptor, studyConfigurationBatchFile, true, false, -1);
dbAdaptor.close();
studyConfigurationManager.close();
//
// //Load, in a new study, the same dataset in one single file
// StudyConfiguration studyConfigurationSingleFile = new StudyConfiguration(2, "single");
// etlResult = runDefaultETL(getResourceUri("filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"),
// variantStorageManager, studyConfigurationSingleFile, options.append(VariantStorageEngine.Options.FILE_ID.key(), 10));
// assertTrue(studyConfigurationSingleFile.getIndexedFiles().contains(10));
//
// checkTransformedVariants(etlResult.getTransformResult(), studyConfigurationSingleFile);
//
//
// //Check that both studies contains the same information
// VariantDBIterator iterator = dbAdaptor.iterator(new Query(VariantDBAdaptor.VariantQueryParams.STUDIES.key(),
// studyConfigurationMultiFile.getStudyId() + "," + studyConfigurationSingleFile.getStudyId()), new QueryOptions());
// int numVariants = 0;
// for (; iterator.hasNext(); ) {
// Variant variant = iterator.next();
// numVariants++;
//// Map<String, VariantSourceEntry> map = variant.getStudies().stream().collect(Collectors.toMap
//// (VariantSourceEntry::getStudyId, Function.<VariantSourceEntry>identity()));
// Map<String, StudyEntry> map = variant.getStudiesMap();
//
// assertTrue(map.containsKey(studyConfigurationMultiFile.getStudyName()));
// assertTrue(map.containsKey(studyConfigurationSingleFile.getStudyName()));
// assertEquals(map.get(studyConfigurationSingleFile.getStudyName()).getSamplesData(), map.get(studyConfigurationMultiFile
// .getStudyName()).getSamplesData());
// }
// assertEquals(expectedNumVariants - 4, numVariants);
}
@Test
public void multiRegionBatchIndex() throws Exception {
clearDB(DB_NAME);
StudyConfiguration studyConfiguration = new StudyConfiguration(1, "multiRegion");
StoragePipelineResult etlResult;
ObjectMap options = new ObjectMap()
.append(VariantStorageEngine.Options.STUDY_TYPE.key(), VariantStudy.StudyType.CONTROL)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false);
VariantStorageEngine variantStorageManager = getVariantStorageEngine();
URI chr1 = getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz");
URI chr22 = getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz");
// runDefaultETL(chr1, this.variantStorageManager,
// studyConfiguration, options.append(VariantStorageEngine.Options.FILE_ID.key(), 5));
// Integer defaultCohortId = studyConfiguration.getCohortIds().get(StudyEntry.DEFAULT_COHORT);
// assertTrue(studyConfiguration.getCohorts().containsKey(defaultCohortId));
// assertEquals(2504, studyConfiguration.getCohorts().get(defaultCohortId).size());
// assertTrue(studyConfiguration.getIndexedFiles().contains(5));
// checkLoadedVariants(variantStorageManager.getDBAdaptor(DB_NAME), studyConfiguration, true, false, -1);
//
// runDefaultETL(chr22, this.variantStorageManager,
//// runDefaultETL(getResourceUri("1k.chr21.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"), variantStorageManager,
// studyConfiguration, options.append(VariantStorageEngine.Options.FILE_ID.key(), 6));
variantStorageManager.getOptions().append(VariantStorageEngine.Options.DB_NAME.key(), DB_NAME)
.append(VariantStorageEngine.Options.STUDY_NAME.key(), STUDY_NAME)
.append(VariantStorageEngine.Options.STUDY_ID.key(), STUDY_ID);
List<StoragePipelineResult> results = variantStorageManager.index(Arrays.asList(chr1, chr22), outputUri, true, true, true);
for (StoragePipelineResult result : results) {
System.out.println(result.toString());
assertTrue(result.isTransformExecuted());
assertNull(result.getTransformError());
assertTrue(result.isLoadExecuted());
assertNull(result.getLoadError());
}
}
@Test
public void multiRegionIndex() throws Exception {
clearDB(DB_NAME);
StudyConfiguration studyConfiguration = new StudyConfiguration(1, "multiRegion");
StoragePipelineResult etlResult;
ObjectMap options = new ObjectMap()
.append(VariantStorageEngine.Options.STUDY_TYPE.key(), VariantStudy.StudyType.CONTROL)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false);
runDefaultETL(getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageManager,
studyConfiguration, options.append(VariantStorageEngine.Options.FILE_ID.key(), 5));
Integer defaultCohortId = studyConfiguration.getCohortIds().get(StudyEntry.DEFAULT_COHORT);
assertTrue(studyConfiguration.getCohorts().containsKey(defaultCohortId));
assertEquals(2504, studyConfiguration.getCohorts().get(defaultCohortId).size());
assertTrue(studyConfiguration.getIndexedFiles().contains(5));
checkLoadedVariants(getVariantStorageEngine().getDBAdaptor(DB_NAME), studyConfiguration, true, false, -1);
runDefaultETL(getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageManager,
// runDefaultETL(getResourceUri("1k.chr21.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"), variantStorageManager,
studyConfiguration, options.append(VariantStorageEngine.Options.FILE_ID.key(), 6));
assertTrue(studyConfiguration.getIndexedFiles().contains(6));
checkLoadedVariants(getVariantStorageEngine().getDBAdaptor(DB_NAME), studyConfiguration, true, false, -1);
assertEquals(studyConfiguration.getSamplesInFiles().get(5), studyConfiguration.getSamplesInFiles().get(6));
//Check generated stats files
assertEquals(2504, studyConfiguration.getCohorts().get(defaultCohortId).size());
File[] statsFile1 = getTmpRootDir().toFile().listFiles((dir, name1) ->
name1.startsWith(VariantStorageEngine.buildFilename(studyConfiguration.getStudyName(), 5))
&& name1.contains("variants"));
File[] statsFile2 = getTmpRootDir().toFile().listFiles((dir, name1) ->
name1.startsWith(VariantStorageEngine.buildFilename(studyConfiguration.getStudyName(), 6))
&& name1.contains("variants"));
assertEquals(1, statsFile1.length);
assertEquals(1, statsFile2.length);
JsonFactory jsonFactory = new JsonFactory();
ObjectMapper jsonObjectMapper = new ObjectMapper(jsonFactory);
jsonObjectMapper.addMixIn(VariantStats.class, VariantStatsJsonMixin.class);
jsonObjectMapper.addMixIn(GenericRecord.class, GenericRecordAvroJsonMixin.class);
try (JsonParser parser = jsonFactory.createParser(new GZIPInputStream(new FileInputStream(statsFile1[0])))) {
while (parser.nextToken() != null) {
VariantStatsWrapper variantStatsWrapper = parser.readValueAs(VariantStatsWrapper.class);
assertEquals("1", variantStatsWrapper.getChromosome());
}
}
try (JsonParser parser = jsonFactory.createParser(new GZIPInputStream(new FileInputStream(statsFile2[0])))) {
while (parser.nextToken() != null) {
VariantStatsWrapper variantStatsWrapper = parser.readValueAs(VariantStatsWrapper.class);
assertEquals("22", variantStatsWrapper.getChromosome());
}
}
}
/**
* Single Thread indexation. "Old Style" indexation
* With samples and "src"
* Gzip compression
**/
@Test
public void singleThreadIndex() throws Exception {
clearDB(DB_NAME);
ObjectMap params = new ObjectMap();
StudyConfiguration studyConfiguration = newStudyConfiguration();
params.put(VariantStorageEngine.Options.STUDY_ID.key(), studyConfiguration.getStudyId());
params.put(VariantStorageEngine.Options.STUDY_NAME.key(), studyConfiguration.getStudyName());
params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json");
params.put(VariantStorageEngine.Options.FILE_ID.key(), 6);
params.put(VariantStorageEngine.Options.COMPRESS_METHOD.key(), "gZiP");
params.put(VariantStorageEngine.Options.TRANSFORM_THREADS.key(), 1);
params.put(VariantStorageEngine.Options.LOAD_THREADS.key(), 1);
// params.put(VariantStorageEngine.Options.INCLUDE_GENOTYPES.key(), true);
// params.put(VariantStorageEngine.Options.INCLUDE_SRC.key(), true);
params.put(VariantStorageEngine.Options.DB_NAME.key(), DB_NAME);
StoragePipelineResult etlResult = runETL(variantStorageManager, params, true, true, true);
VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(DB_NAME);
studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first();
assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.json.gz'",
Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.json.gz"));
assertTrue(studyConfiguration.getIndexedFiles().contains(6));
VariantSource source = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration);
checkLoadedVariants(variantStorageManager.getDBAdaptor(DB_NAME), studyConfiguration, true, false, getExpectedNumLoadedVariants(source));
}
/**
* Fast indexation.
* Without "src" and samples information.
* MultiThreads
* CompressMethod snappy
**/
@Test
public void fastIndex() throws Exception {
clearDB(DB_NAME);
ObjectMap params = new ObjectMap();
StudyConfiguration studyConfiguration = newStudyConfiguration();
params.put(VariantStorageEngine.Options.STUDY_ID.key(), studyConfiguration.getStudyId());
params.put(VariantStorageEngine.Options.STUDY_NAME.key(), studyConfiguration.getStudyName());
params.put(VariantStorageEngine.Options.FILE_ID.key(), 6);
params.put(VariantStorageEngine.Options.COMPRESS_METHOD.key(), "snappy");
params.put(VariantStorageEngine.Options.TRANSFORM_THREADS.key(), 8);
params.put(VariantStorageEngine.Options.LOAD_THREADS.key(), 8);
// params.put(VariantStorageEngine.Options.INCLUDE_GENOTYPES.key(), false);
// params.put(VariantStorageEngine.Options.INCLUDE_SRC.key(), false);
params.put(VariantStorageEngine.Options.DB_NAME.key(), DB_NAME);
StoragePipelineResult etlResult = runETL(variantStorageManager, params, true, true, true);
System.out.println("etlResult = " + etlResult);
VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(DB_NAME);
studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first();
assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.avro.snappy'",
Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.avro.snappy"));
assertTrue(studyConfiguration.getIndexedFiles().contains(6));
VariantSource variantSource = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration);
checkLoadedVariants(variantStorageManager.getDBAdaptor(DB_NAME), studyConfiguration, false, false, getExpectedNumLoadedVariants
(variantSource));
}
@Test
public void indexWithOtherFields() throws Exception {
//GT:DS:GL
StudyConfiguration studyConfiguration = newStudyConfiguration();
StoragePipelineResult etlResult = runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), Arrays.asList("GL", "DS"))
.append(VariantStorageEngine.Options.FILE_ID.key(), 2)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false)
);
VariantSource source = VariantReaderUtils.readVariantSource(Paths.get(etlResult.getTransformResult().getPath()), null);
checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration, source.getStats().getNumRecords());
VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(DB_NAME);
checkLoadedVariants(dbAdaptor, studyConfiguration, true, false, getExpectedNumLoadedVariants(source));
VariantReader reader = VariantReaderUtils.getVariantReader(Paths.get(etlResult.getTransformResult().getPath()),
new VariantSource("", "2", STUDY_NAME, STUDY_NAME));
reader.open();
reader.pre();
for (Variant variant : reader.read(999)) {
if (variant.getAlternate().startsWith("<") || variant.getStart().equals(70146475) || variant.getStart().equals(107976940)) {
continue;
}
StudyEntry studyEntry = variant.getStudies().get(0);
studyEntry.setStudyId(STUDY_NAME);
studyEntry.getFiles().get(0).setFileId("2");
variant.setStudies(Collections.singletonList(studyEntry));
Variant loadedVariant = dbAdaptor.get(new Query(VariantDBAdaptor.VariantQueryParams.ID.key(), variant.toString()), new QueryOptions()).first();
loadedVariant.setAnnotation(null); //Remove annotation
StudyEntry loadedStudy = loadedVariant.getStudy(STUDY_NAME);
loadedStudy.setFormat(Arrays.asList(loadedStudy.getFormat().get(0), loadedStudy.getFormat().get(2), loadedStudy.getFormat().get(1)));
loadedStudy.setStats(Collections.emptyMap()); //Remove calculated stats
loadedStudy.getSamplesData().forEach(values -> {
values.set(0, values.get(0).replace("0/0", "0|0"));
String v1 = values.get(1);
values.set(1, values.get(2));
values.set(2, v1);
while (values.get(2).length() < 5) values.set(2, values.get(2) + "0"); //Set lost zeros
});
variant.resetLength();
assertEquals("\n" + variant.toJson() + "\n" + loadedVariant.toJson(), variant, loadedVariant);
}
dbAdaptor.close();
reader.post();
reader.close();
}
@Test
public void indexWithOtherFieldsNoGT() throws Exception {
//GL:DP:GU:TU:AU:CU
StudyConfiguration studyConfiguration = newStudyConfiguration();
StoragePipelineResult etlResult = runDefaultETL(getResourceUri("variant-test-somatic.vcf"), getVariantStorageEngine(), studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), Arrays.asList("GL", "DP", "AU", "CU", "GU", "TU"))
.append(VariantStorageEngine.Options.FILE_ID.key(), 2)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false)
);
VariantDBIterator iterator = getVariantStorageEngine().getDBAdaptor(DB_NAME).iterator(new Query(VariantDBAdaptor.VariantQueryParams.UNKNOWN_GENOTYPE.key(), "./."), new QueryOptions());
while (iterator.hasNext()) {
Variant variant = iterator.next();
assertEquals("./.", variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GT"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "DP"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GL"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "AU"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "CU"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GU"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "TU"));
}
}
@Test
public void indexWithOtherFieldsExcludeGT() throws Exception {
//GL:DP:GU:TU:AU:CU
StudyConfiguration studyConfiguration = newStudyConfiguration();
List<String> extraFields = Arrays.asList("GL", "DP", "AU", "CU", "GU", "TU");
StoragePipelineResult etlResult = runDefaultETL(getResourceUri("variant-test-somatic.vcf"), getVariantStorageEngine(), studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), extraFields)
.append(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS_COMPRESS.key(), false)
.append(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), true)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false)
.append(VariantStorageEngine.Options.FILE_ID.key(), 2)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false)
);
etlResult = runDefaultETL(getResourceUri("variant-test-somatic_2.vcf"), getVariantStorageEngine(), studyConfiguration,
new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), extraFields)
.append(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS_COMPRESS.key(), true)
.append(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false)
.append(VariantStorageEngine.Options.FILE_ID.key(), 3)
.append(VariantStorageEngine.Options.ANNOTATE.key(), false)
);
VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(DB_NAME);
studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first();
assertEquals(true, studyConfiguration.getAttributes().getBoolean(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), false));
assertEquals(extraFields, studyConfiguration.getAttributes().getAsStringList(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key()));
for (Variant variant : dbAdaptor) {
System.out.println(variant.toJson());
assertNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GT"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "DP"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GL"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "AU"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "CU"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GU"));
assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "TU"));
}
VariantDBIterator iterator = dbAdaptor.iterator(new Query(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), "SAMPLE_1"), new QueryOptions());
iterator.forEachRemaining(variant -> {
assertEquals(1, variant.getStudy(STUDY_NAME).getSamplesData().size());
assertEquals(Collections.singleton("SAMPLE_1"), variant.getStudy(STUDY_NAME).getSamplesName());
assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() > 0);
assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() <= 2);
});
iterator = dbAdaptor.iterator(new Query(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), "SAMPLE_2"), new QueryOptions());
iterator.forEachRemaining(variant -> {
assertEquals(1, variant.getStudy(STUDY_NAME).getSamplesData().size());
assertEquals(Collections.singleton("SAMPLE_2"), variant.getStudy(STUDY_NAME).getSamplesName());
assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() > 0);
assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() <= 2);
});
iterator = dbAdaptor.iterator(new Query(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), "SAMPLE_2")
.append(VariantDBAdaptor.VariantQueryParams.FILES.key(), 3)
.append(VariantDBAdaptor.VariantQueryParams.RETURNED_FILES.key(), 3), new QueryOptions());
iterator.forEachRemaining(variant -> {
System.out.println("variant.toJson() = " + variant.toJson());
assertEquals(1, variant.getStudy(STUDY_NAME).getSamplesData().size());
assertEquals(Collections.singleton("SAMPLE_2"), variant.getStudy(STUDY_NAME).getSamplesName());
if (!variant.getStudy(STUDY_NAME).getFiles().isEmpty()) {
assertEquals("3", variant.getStudy(STUDY_NAME).getFiles().get(0).getFileId());
}
});
}
@Test
public void checkAndUpdateStudyConfigurationWithoutSampleIdsTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
studyConfiguration.getSampleIds().put("s0", 1);
studyConfiguration.getSampleIds().put("s10", 4);
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap();
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
assertTrue(studyConfiguration.getSampleIds().keySet().containsAll(Arrays.asList("s0", "s1", "s2", "s3", "s4", "s5")));
assertTrue(studyConfiguration.getSamplesInFiles().get(fileId).stream()
.map(s -> studyConfiguration.getSampleIds().inverse().get(s))
.collect(Collectors.toList())
.equals(Arrays.asList("s0", "s1", "s2", "s3", "s4", "s5"))
);
assertEquals(Integer.valueOf(1), studyConfiguration.getSampleIds().get("s0"));
studyConfiguration.getSamplesInFiles().get(fileId).forEach((i) -> System.out.println(studyConfiguration.getSampleIds().inverse()
.get(i) + " = " + i));
}
@Test
public void checkAndUpdateStudyConfigurationWithSampleIdsTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
studyConfiguration.getSampleIds().put("s10", 4);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:25");
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
assertTrue(studyConfiguration.getSampleIds().keySet().containsAll(Arrays.asList("s0", "s1", "s2", "s3", "s4", "s5")));
assertEquals(Arrays.asList("s0", "s1", "s2", "s3", "s4", "s5"),
studyConfiguration.getSamplesInFiles().get(fileId).stream()
.map(s -> studyConfiguration.getSampleIds().inverse().get(s))
.collect(Collectors.toList())
);
assertEquals(Arrays.asList(20, 21, 22, 23, 24, 25), new ArrayList<>(studyConfiguration.getSamplesInFiles().get(fileId)));
assertEquals(Integer.valueOf(20), studyConfiguration.getSampleIds().get("s0"));
assertEquals(Integer.valueOf(21), studyConfiguration.getSampleIds().get("s1"));
assertEquals(Integer.valueOf(22), studyConfiguration.getSampleIds().get("s2"));
assertEquals(Integer.valueOf(23), studyConfiguration.getSampleIds().get("s3"));
assertEquals(Integer.valueOf(24), studyConfiguration.getSampleIds().get("s4"));
assertEquals(Integer.valueOf(25), studyConfiguration.getSampleIds().get("s5"));
studyConfiguration.getSamplesInFiles().get(fileId).forEach((i) -> System.out.println(studyConfiguration.getSampleIds().inverse()
.get(i) + " = " + i));
}
@Test
public void checkAndUpdateStudyConfigurationWithSamplesInFilesTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:25");
studyConfiguration.getSamplesInFiles().put(fileId, new LinkedHashSet<>(Arrays.asList(20, 21, 22, 23, 24, 25)));
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithRepeatedSampleIdsTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:25");
studyConfiguration.getSampleIds().put("s0", 0);
thrown.expect(StorageEngineException.class);
thrown.expectMessage("s0:20"); //Already present
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithExtraSampleIdsTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:25," +
"UNEXISTING_SAMPLE:30");
thrown.expect(StorageEngineException.class);
thrown.expectMessage("UNEXISTING_SAMPLE"); //Not in file
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithAlphanumericSampleIdsTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:NaN,s5:25");
thrown.expect(StorageEngineException.class);
thrown.expectMessage("NaN"); //Not a number
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithMalformedSampleIds1Test() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:");
thrown.expect(StorageEngineException.class);
thrown.expectMessage("s5:"); //Malformed
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithMalformedSampleIds2Test() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3,s4:24,s5:25");
thrown.expect(StorageEngineException.class);
thrown.expectMessage("s3"); //Malformed
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithMissingSampleIdsTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20");
thrown.expect(StorageEngineException.class);
thrown.expectMessage("[s1, s2, s3, s4, s5]"); //Missing samples
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithMissingSamplesInFilesTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:25");
studyConfiguration.getSamplesInFiles().put(fileId, new LinkedHashSet<>(Arrays.asList(20, 21, 22, 23, 24)));
thrown.expect(StorageEngineException.class);
thrown.expectMessage("s5");
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
@Test
public void checkAndUpdateStudyConfigurationWithExtraSamplesInFilesTest() throws StorageEngineException {
StudyConfiguration studyConfiguration = newStudyConfiguration();
Integer fileId = 5;
VariantSource source = createVariantSource(studyConfiguration, fileId);
ObjectMap options = new ObjectMap(VariantStorageEngine.Options.SAMPLE_IDS.key(), "s0:20,s1:21,s2:22,s3:23,s4:24,s5:25");
studyConfiguration.getSampleIds().put("GhostSample", 0);
studyConfiguration.getSamplesInFiles().put(fileId, new LinkedHashSet<>(Arrays.asList(20, 21, 22, 23, 24, 25, 0)));
thrown.expect(StorageEngineException.class);
VariantStoragePipeline.checkAndUpdateStudyConfiguration(studyConfiguration, fileId, source, options);
}
protected VariantSource createVariantSource(StudyConfiguration studyConfiguration, Integer fileId) {
studyConfiguration.getFileIds().put("fileName", fileId);
VariantSource source = new VariantSource("fileName", fileId.toString(), studyConfiguration.getStudyId() + "", studyConfiguration
.getStudyName());
Map<String, Integer> samplesPosition = new HashMap<>();
samplesPosition.put("s0", 0);
samplesPosition.put("s1", 1);
samplesPosition.put("s2", 2);
samplesPosition.put("s3", 3);
samplesPosition.put("s4", 4);
samplesPosition.put("s5", 5);
source.setSamplesPosition(samplesPosition);
return source;
}
/* ---------------------------------------------------- */
/* Check methods for loaded and transformed Variants */
/* ---------------------------------------------------- */
private VariantSource checkTransformedVariants(URI variantsJson, StudyConfiguration studyConfiguration) throws StorageEngineException {
return checkTransformedVariants(variantsJson, studyConfiguration, -1);
}
private VariantSource checkTransformedVariants(URI variantsJson, StudyConfiguration studyConfiguration, int expectedNumVariants)
throws StorageEngineException {
long start = System.currentTimeMillis();
VariantSource source = new VariantSource(VCF_TEST_FILE_NAME, "6", "", "");
VariantReader variantReader = VariantReaderUtils.getVariantReader(Paths.get(variantsJson.getPath()), source);
variantReader.open();
variantReader.pre();
List<Variant> read;
int numVariants = 0;
while ((read = variantReader.read(100)) != null && !read.isEmpty()) {
numVariants += read.size();
}
variantReader.post();
variantReader.close();
if (expectedNumVariants < 0) {
expectedNumVariants = source.getStats().getNumRecords();
} else {
assertEquals(expectedNumVariants, source.getStats().getNumRecords()); //9792
}
assertEquals(expectedNumVariants, numVariants); //9792
logger.info("checkTransformedVariants time : " + (System.currentTimeMillis() - start) / 1000.0 + "s");
return source;
}
private void checkLoadedVariants(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration, boolean includeSamples, boolean
includeSrc) {
checkLoadedVariants(dbAdaptor, studyConfiguration, includeSamples, includeSrc, NUM_VARIANTS/*9792*/);
}
private void checkLoadedVariants(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration,
boolean includeSamples, boolean includeSrc, int expectedNumVariants) {
long start = System.currentTimeMillis();
int numVariants = 0;
String expectedStudyId = studyConfiguration.getStudyName();
QueryResult<Long> count = dbAdaptor.count(new Query());
assertEquals(1, count.getNumResults());
if (expectedNumVariants >= 0) {
assertEquals(expectedNumVariants, count.first().intValue());
}
// for (Integer fileId : studyConfiguration.getIndexedFiles()) {
// assertTrue(studyConfiguration.getHeaders().containsKey(fileId));
// }
for (Variant variant : dbAdaptor) {
for (Map.Entry<String, StudyEntry> entry : variant.getStudiesMap().entrySet()) {
if (!entry.getValue().getStudyId().equals(expectedStudyId)) {
continue;
} else {
numVariants++;
}
assertEquals(expectedStudyId, entry.getValue().getStudyId());
if (includeSamples) {
assertNotNull(entry.getValue().getSamplesData());
assertEquals(studyConfiguration.getSampleIds().size(), entry.getValue().getSamplesData().size());
assertEquals(studyConfiguration.getSampleIds().size(), entry.getValue().getSamplesData().size());
assertEquals(studyConfiguration.getSampleIds().keySet(), entry.getValue().getSamplesDataAsMap().keySet());
}
for (FileEntry fileEntry : entry.getValue().getFiles()) {
if (includeSrc) {
assertNotNull(fileEntry.getAttributes().get(VariantVcfFactory.SRC));
} else {
assertNull(fileEntry.getAttributes().getOrDefault(VariantVcfFactory.SRC, null));
}
}
for (Integer cohortId : studyConfiguration.getCalculatedStats()) {
String cohortName = StudyConfiguration.inverseMap(studyConfiguration.getCohortIds()).get(cohortId);
assertTrue(entry.getValue().getStats().containsKey(cohortName));
assertEquals(variant + " has incorrect stats for cohort \"" + cohortName + "\":" + cohortId,
studyConfiguration.getCohorts().get(cohortId).size(),
entry.getValue().getStats().get(cohortName).getGenotypesCount().values().stream().reduce((a, b) -> a + b)
.orElse(0).intValue());
}
}
}
if (expectedNumVariants >= 0) {
assertEquals(expectedNumVariants, numVariants);
}
logger.info("checkLoadedVariants time : " + (System.currentTimeMillis() - start) / 1000.0 + "s");
}
@Test
@Ignore
public void insertVariantIntoSolr() throws Exception {
clearDB(DB_NAME);
ObjectMap params = new ObjectMap();
StudyConfiguration studyConfiguration = newStudyConfiguration();
params.put(VariantStorageEngine.Options.STUDY_ID.key(), studyConfiguration.getStudyId());
params.put(VariantStorageEngine.Options.STUDY_NAME.key(), studyConfiguration.getStudyName());
params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json");
params.put(VariantStorageEngine.Options.FILE_ID.key(), 6);
params.put(VariantStorageEngine.Options.COMPRESS_METHOD.key(), "gZiP");
params.put(VariantStorageEngine.Options.TRANSFORM_THREADS.key(), 1);
params.put(VariantStorageEngine.Options.LOAD_THREADS.key(), 1);
params.put(VariantStorageEngine.Options.DB_NAME.key(), DB_NAME);
params.put(VariantStorageEngine.Options.ANNOTATE.key(), true);
runETL(variantStorageManager, params, true, true, true);
VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(DB_NAME);
VariantSearchManager variantSearchManager = new VariantSearchManager(null, variantStorageManager.getConfiguration());
variantSearchManager.load(variantStorageManager.getConfiguration().getSearch().getCollection(), dbAdaptor.iterator());
}
}