/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.manager.variant;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.ExpectedException;
import org.mockito.Mockito;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.mongodb.MongoDataStore;
import org.opencb.commons.datastore.mongodb.MongoDataStoreManager;
import org.opencb.commons.test.GenericTest;
import org.opencb.opencga.catalog.config.Policies;
import org.opencb.opencga.catalog.db.api.CohortDBAdaptor;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogFileUtils;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.models.*;
import org.opencb.opencga.catalog.utils.FileMetadataReader;
import org.opencb.opencga.storage.core.StoragePipelineResult;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.config.DatabaseCredentials;
import org.opencb.opencga.storage.core.config.StorageConfiguration;
import org.opencb.opencga.storage.core.config.StorageEngineConfiguration;
import org.opencb.opencga.storage.core.config.StorageEtlConfiguration;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.manager.OpenCGATestExternalResource;
import org.opencb.opencga.storage.core.manager.variant.operations.StorageOperation;
import org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.dummy.DummyStudyConfigurationManager;
import org.opencb.opencga.storage.core.variant.dummy.DummyVariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStoragePipeline;
import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStorageEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.Assert.*;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.spy;
import static org.opencb.biodata.models.variant.StudyEntry.DEFAULT_COHORT;
import static org.opencb.opencga.storage.core.manager.variant.operations.StatsVariantStorageTest.checkCalculatedStats;
import static org.opencb.opencga.storage.core.variant.VariantStorageBaseTest.DB_NAME;
import static org.opencb.opencga.storage.core.variant.VariantStorageBaseTest.getResourceUri;
/**
* Created on 05/05/16
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public abstract class AbstractVariantStorageOperationTest extends GenericTest {
protected CatalogManager catalogManager;
protected String sessionId;
protected final String userId = "user";
protected long projectId;
protected long studyId;
protected String studyStr;
protected long outputId;
protected String outputStr;
protected String outputPath;
protected long studyId2;
protected long outputId2;
private List<File> files;
private final static String[] FILE_NAMES = {
"1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz",
"1000g_batches/501-1000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz",
"1000g_batches/1001-1500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz",
"1000g_batches/1501-2000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz",
"1000g_batches/2001-2504.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"};
protected FileMetadataReader fileMetadataReader;
protected CatalogFileUtils catalogFileUtils;
protected org.opencb.opencga.storage.core.manager.variant.VariantStorageManager variantManager;
protected final String dbName = DB_NAME;
protected static final String STORAGE_ENGINE_DUMMY = DummyVariantStorageEngine.STORAGE_ENGINE_ID;
protected static final String STORAGE_ENGINE_MONGODB = "mongodb";
protected static final String STORAGE_ENGINE_HADOOP = "hadoop";
private Logger logger = LoggerFactory.getLogger(AbstractVariantStorageOperationTest.class);
@Rule
public ExpectedException thrown = ExpectedException.none();
@Rule
public OpenCGATestExternalResource opencga = new OpenCGATestExternalResource(getStorageEngine().equals(STORAGE_ENGINE_HADOOP));
private File smallFile;
@Before
public final void setUpAbstract() throws Exception {
catalogManager = opencga.getCatalogManager();
StorageConfiguration storageConfiguration = opencga.getStorageConfiguration();
storageConfiguration.setDefaultStorageEngineId(STORAGE_ENGINE_DUMMY);
storageConfiguration.getStorageEngines().add(new StorageEngineConfiguration(
STORAGE_ENGINE_DUMMY,
new StorageEtlConfiguration(),
new StorageEtlConfiguration(DummyVariantStorageEngine.class.getName(), new ObjectMap(), new DatabaseCredentials()),
new ObjectMap()
));
StorageEngineFactory factory = StorageEngineFactory.get(storageConfiguration);
factory.unregisterVariantStorageManager(DummyVariantStorageEngine.STORAGE_ENGINE_ID);
DummyStudyConfigurationManager.clear();
variantManager = new org.opencb.opencga.storage.core.manager.variant.VariantStorageManager(catalogManager, factory);
clearDB(dbName);
fileMetadataReader = FileMetadataReader.get(catalogManager);
catalogFileUtils = new CatalogFileUtils(catalogManager);
Policies policies = new Policies();
policies.setUserCreation(Policies.UserCreation.ALWAYS);
User user = catalogManager.createUser(userId, "User", "user@email.org", "user", "ACME", null, null).first();
sessionId = catalogManager.login(userId, "user", "localhost").first().getId();
projectId = catalogManager.getProjectManager().create("p1", "p1", "Project 1", "ACME", "Homo sapiens",
null, null, "GRCh38", new QueryOptions(), sessionId).first().getId();
studyId = catalogManager.createStudy(projectId, "s1", "s1", Study.Type.CASE_CONTROL, null, "Study 1", null,
null, null, null, Collections.singletonMap(File.Bioformat.VARIANT, new DataStore(getStorageEngine(), dbName)), null,
Collections.singletonMap(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), getAggregation()),
null, sessionId).first().getId();
studyStr = String.valueOf(studyId);
outputId = catalogManager.getFileManager().createFolder(studyStr, Paths.get("data", "index").toString(), null, true, null,
QueryOptions.empty(), sessionId).first().getId();
outputStr = String.valueOf(outputId);
outputPath = "data/index/";
studyId2 = catalogManager.createStudy(projectId, "s2", "s2", Study.Type.CASE_CONTROL, null, "Study 2", null,
null, null, null, Collections.singletonMap(File.Bioformat.VARIANT, new DataStore(getStorageEngine(), dbName)), null,
Collections.singletonMap(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), getAggregation()),
null, sessionId).first().getId();
outputId2 = catalogManager.getFileManager().createFolder(Long.toString(studyId2), Paths.get("data", "index").toString(), null,
true, null, QueryOptions.empty(), sessionId).first().getId();
files = Arrays.asList(new File[5]);
}
@After
public void tearDown() throws Exception {
DummyStudyConfigurationManager.writeAndClear(opencga.getOpencgaHome());
}
protected String getStorageEngine() {
return STORAGE_ENGINE_DUMMY;
}
protected abstract VariantSource.Aggregation getAggregation();
protected void clearDB(String dbName) {
logger.info("Cleaning MongoDB {}" , dbName);
MongoDataStoreManager mongoManager = new MongoDataStoreManager("localhost", 27017);
MongoDataStore mongoDataStore = mongoManager.get(dbName);
mongoManager.drop(dbName);
}
protected File getFile(int index) throws IOException, CatalogException {
if (files.get(index) == null) {
files.set(index, create(FILE_NAMES[index]));
}
return files.get(index);
}
protected File getSmallFile() throws IOException, CatalogException {
if (smallFile == null) {
smallFile = create("variant-test-file.vcf.gz");
}
return smallFile;
}
protected File create(String resourceName) throws IOException, CatalogException {
return create(studyId, getResourceUri(resourceName));
}
protected File create(long studyId, URI uri) throws IOException, CatalogException {
File file;
file = fileMetadataReader.create(studyId, uri, "data/vcfs/", "", true, null, sessionId).first();
// File.Format format = FormatDetector.detect(uri);
// File.Bioformat bioformat = BioformatDetector.detect(uri);
// file = catalogManager.createFile(studyId, format, bioformat, "data/vcfs/", "", true, -1, sessionId).first();
catalogFileUtils.upload(uri, file, null, sessionId, false, false, true, false, Long.MAX_VALUE);
return catalogManager.getFile(file.getId(), sessionId).first();
}
protected Cohort getDefaultCohort(long studyId) throws CatalogException {
return catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), DEFAULT_COHORT),
new QueryOptions(), sessionId).first();
}
protected File transformFile(File inputFile, QueryOptions queryOptions) throws CatalogException, IOException, StorageEngineException, URISyntaxException {
queryOptions.append(VariantFileIndexerStorageOperation.TRANSFORM, true);
queryOptions.append(VariantFileIndexerStorageOperation.LOAD, false);
queryOptions.append(StorageOperation.CATALOG_PATH, "data/index/");
boolean calculateStats = queryOptions.getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key());
long studyId = catalogManager.getStudyIdByFileId(inputFile.getId());
//Default cohort should not be modified
Cohort defaultCohort = getDefaultCohort(studyId);
String outdir = opencga.createTmpOutdir(studyId, "_TRANSFORM_", sessionId);
variantManager.index(null, String.valueOf(inputFile.getId()), outdir, queryOptions, sessionId);
inputFile = catalogManager.getFile(inputFile.getId(), sessionId).first();
assertEquals(FileIndex.IndexStatus.TRANSFORMED, inputFile.getIndex().getStatus().getName());
// Default cohort should not be modified
assertEquals(defaultCohort, getDefaultCohort(studyId));
//Get transformed file
Query searchQuery = new Query(FileDBAdaptor.QueryParams.DIRECTORY.key(), "data/index/")
.append(FileDBAdaptor.QueryParams.NAME.key(), "~" + inputFile.getName() + ".variants.(json|avro)");
File transformedFile = catalogManager.getAllFiles(studyId, searchQuery, new QueryOptions(), sessionId).first();
assertNotNull(inputFile.getStats().get(FileMetadataReader.VARIANT_STATS));
return transformedFile;
}
protected List<StoragePipelineResult> loadFile(File file, QueryOptions queryOptions, long outputId) throws Exception {
return loadFiles(Collections.singletonList(file), queryOptions, outputId);
}
protected List<StoragePipelineResult> loadFiles(List<File> files, QueryOptions queryOptions, long outputId) throws Exception {
return loadFiles(files, files, queryOptions, outputId);
}
protected List<StoragePipelineResult> loadFiles(List<File> files, List<File> expectedLoadedFiles, QueryOptions queryOptions, long outputId) throws Exception {
queryOptions.append(VariantFileIndexerStorageOperation.TRANSFORM, false);
queryOptions.append(VariantFileIndexerStorageOperation.LOAD, true);
queryOptions.append(StorageOperation.CATALOG_PATH, String.valueOf(outputId));
boolean calculateStats = queryOptions.getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key());
Long studyId = catalogManager.getStudyIdByFileId(files.get(0).getId());
List<String> fileIds = files.stream().map(File::getId).map(Object::toString).collect(Collectors.toList());
String outdir = opencga.createTmpOutdir(studyId, "_LOAD_", sessionId);
List<StoragePipelineResult> etlResults = variantManager.index(studyId.toString(), fileIds, outdir, queryOptions, sessionId);
assertEquals(expectedLoadedFiles.size(), etlResults.size());
checkEtlResults(studyId, etlResults, FileIndex.IndexStatus.READY);
Cohort defaultCohort = getDefaultCohort(studyId);
for (File file : expectedLoadedFiles) {
assertTrue(defaultCohort.getSamples().containsAll(file.getSampleIds()));
}
if (calculateStats) {
assertEquals(Cohort.CohortStatus.READY, defaultCohort.getStatus().getName());
checkCalculatedStats(Collections.singletonMap(DEFAULT_COHORT, defaultCohort), catalogManager, dbName, sessionId);
}
return etlResults;
}
protected List<StoragePipelineResult> indexFile(File file, QueryOptions queryOptions, long outputId) throws Exception {
return indexFiles(Collections.singletonList(file), queryOptions, outputId);
}
protected List<StoragePipelineResult> indexFiles(List<File> files, QueryOptions queryOptions, long outputId) throws Exception {
return indexFiles(files, files, queryOptions, outputId);
}
protected List<StoragePipelineResult> indexFiles(List<File> files, List<File> expectedLoadedFiles, QueryOptions queryOptions, long outputId) throws Exception {
queryOptions.append(VariantFileIndexerStorageOperation.TRANSFORM, true);
queryOptions.append(VariantFileIndexerStorageOperation.LOAD, true);
queryOptions.append(StorageOperation.CATALOG_PATH, String.valueOf(outputId));
boolean calculateStats = queryOptions.getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key());
Long studyId = catalogManager.getStudyIdByFileId(files.get(0).getId());
String outdir = opencga.createTmpOutdir(studyId, "_INDEX_", sessionId);
List<String> fileIds = files.stream().map(File::getId).map(Object::toString).collect(Collectors.toList());
List<StoragePipelineResult> etlResults = variantManager.index(studyId.toString(), fileIds, outdir, queryOptions, sessionId);
assertEquals(expectedLoadedFiles.size(), etlResults.size());
checkEtlResults(studyId, etlResults, FileIndex.IndexStatus.READY);
Cohort defaultCohort = getDefaultCohort(studyId);
for (File file : expectedLoadedFiles) {
assertTrue(defaultCohort.getSamples().containsAll(file.getSampleIds()));
}
if (calculateStats) {
assertEquals(Cohort.CohortStatus.READY, defaultCohort.getStatus().getName());
checkCalculatedStats(Collections.singletonMap(DEFAULT_COHORT, defaultCohort), catalogManager, dbName, sessionId);
}
return etlResults;
}
protected void checkEtlResults(long studyId, List<StoragePipelineResult> etlResults, String expectedStatus) throws CatalogException {
for (StoragePipelineResult etlResult : etlResults) {
File input = catalogManager.searchFile(studyId, new Query(FileDBAdaptor.QueryParams.URI.key(), etlResult.getInput()), sessionId).first();
long indexedFileId;
if (input.getRelatedFiles().isEmpty()) {
indexedFileId = input.getId();
} else {
indexedFileId = input.getRelatedFiles().get(0).getFileId();
}
assertEquals(expectedStatus, catalogManager.getFile(indexedFileId, sessionId).first().getIndex().getStatus().getName());
System.out.println("etlResult = " + etlResult);
}
}
protected DummyVariantStorageEngine mockVariantStorageManager() {
DummyVariantStorageEngine vsm = spy(new DummyVariantStorageEngine());
vsm.setConfiguration(opencga.getStorageConfiguration(), DummyVariantStorageEngine.STORAGE_ENGINE_ID);
StorageEngineFactory.get(opencga.getStorageConfiguration()).registerStorageManager(vsm);
return vsm;
}
protected DummyVariantDBAdaptor mockVariantDBAdaptor() throws StorageEngineException {
DummyVariantStorageEngine vsm = mockVariantStorageManager();
return mockVariantDBAdaptor(vsm);
}
protected DummyVariantDBAdaptor mockVariantDBAdaptor(DummyVariantStorageEngine vsm) throws StorageEngineException {
DummyVariantDBAdaptor dbAdaptor = spy(new DummyVariantDBAdaptor(""));
doReturn(dbAdaptor).when(vsm).getDBAdaptor();
doReturn(dbAdaptor).when(vsm).getDBAdaptor(anyString());
return dbAdaptor;
}
protected DummyVariantStoragePipeline mockVariantStorageETL() throws StorageEngineException {
DummyVariantStorageEngine vsm = mockVariantStorageManager();
return mockVariantStorageETL(vsm);
}
protected DummyVariantStoragePipeline mockVariantStorageETL(DummyVariantStorageEngine vsm) throws StorageEngineException {
DummyVariantStoragePipeline storageETL = spy(vsm.newStoragePipeline(true));
// doReturn(storageETL).when(vsm).newStoragePipeline(anyBoolean());
Mockito.doAnswer(invocation -> {
DummyVariantStoragePipeline etl = (DummyVariantStoragePipeline) invocation.callRealMethod();
storageETL.init(etl.getOptions());
return storageETL;
}).when(vsm).newStoragePipeline(anyBoolean());
return storageETL;
}
}