/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.manager.variant.operations;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.mockito.ArgumentMatchers;
import org.mockito.Mockito;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.catalog.db.api.CohortDBAdaptor;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.models.Cohort;
import org.opencb.opencga.catalog.models.File;
import org.opencb.opencga.catalog.models.FileIndex;
import org.opencb.opencga.catalog.utils.FileMetadataReader;
import org.opencb.opencga.core.common.UriUtils;
import org.opencb.opencga.storage.core.StoragePipelineResult;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.exceptions.StoragePipelineException;
import org.opencb.opencga.storage.core.manager.variant.AbstractVariantStorageOperationTest;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStoragePipeline;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import static java.util.Collections.singletonList;
import static org.junit.Assert.*;
import static org.opencb.biodata.models.variant.StudyEntry.DEFAULT_COHORT;
import static org.opencb.opencga.storage.core.manager.variant.operations.StatsVariantStorageTest.checkCalculatedStats;
/**
* Created by hpccoll1 on 13/07/15.
*/
public class VariantFileIndexerStorageOperationTest extends AbstractVariantStorageOperationTest {
@Rule
public ExpectedException thrown = ExpectedException.none();
Logger logger = LoggerFactory.getLogger(VariantFileIndexerStorageOperationTest.class);
@Test
public void testIndexWithStats() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false);
queryOptions.put(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, String.valueOf(outputId));
variantManager.index(null, String.valueOf(getFile(0).getId()), newTmpOutdir(), queryOptions, sessionId);
assertEquals(500, getDefaultCohort(studyId).getSamples().size());
assertEquals(Cohort.CohortStatus.NONE, getDefaultCohort(studyId).getStatus().getName());
assertNotNull(catalogManager.getFile(getFile(0).getId(), sessionId).first().getStats().get(FileMetadataReader.VARIANT_STATS));
variantManager.index(null, String.valueOf(getFile(1).getId()), newTmpOutdir(), queryOptions, sessionId);
assertEquals(1000, getDefaultCohort(studyId).getSamples().size());
assertEquals(Cohort.CohortStatus.NONE, getDefaultCohort(studyId).getStatus().getName());
assertNotNull(catalogManager.getFile(getFile(1).getId(), sessionId).first().getStats().get(FileMetadataReader.VARIANT_STATS));
queryOptions.put(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
variantManager.index(null, String.valueOf(getFile(2).getId()), newTmpOutdir(), queryOptions, sessionId);
assertEquals(1500, getDefaultCohort(studyId).getSamples().size());
assertEquals(Cohort.CohortStatus.READY, getDefaultCohort(studyId).getStatus().getName());
checkCalculatedStats(Collections.singletonMap(DEFAULT_COHORT, catalogManager.getAllCohorts(studyId,
new Query(CohortDBAdaptor.QueryParams.NAME.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first()),
catalogManager, dbName, sessionId);
assertNotNull(catalogManager.getFile(getFile(2).getId(), sessionId).first().getStats().get(FileMetadataReader.VARIANT_STATS));
queryOptions.put(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
variantManager.index(null, String.valueOf(getFile(3).getId()), newTmpOutdir(), queryOptions, sessionId);
assertEquals(2000, getDefaultCohort(studyId).getSamples().size());
assertEquals(Cohort.CohortStatus.INVALID, getDefaultCohort(studyId).getStatus().getName());
assertNotNull(catalogManager.getFile(getFile(3).getId(), sessionId).first().getStats().get(FileMetadataReader.VARIANT_STATS));
queryOptions.put(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
variantManager.index(null, String.valueOf(getFile(4).getId()), newTmpOutdir(), queryOptions, sessionId);
assertEquals(2504, getDefaultCohort(studyId).getSamples().size());
assertEquals(Cohort.CohortStatus.READY, getDefaultCohort(studyId).getStatus().getName());
assertNotNull(catalogManager.getFile(getFile(4).getId(), sessionId).first().getStats().get(FileMetadataReader.VARIANT_STATS));
checkCalculatedStats(Collections.singletonMap(DEFAULT_COHORT, catalogManager.getAllCohorts(studyId,
new Query(CohortDBAdaptor.QueryParams.NAME.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first()),
catalogManager, dbName, sessionId);
}
String newTmpOutdir() throws CatalogException {
return opencga.createTmpOutdir(studyId, "index", sessionId);
}
@Test
public void testIndex() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
indexFile(getFile(0), queryOptions, outputId);
}
@Test
public void testIndexFromFolder() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
File file = getFile(0);
File parent = catalogManager.getFileParent(file.getId(), null, sessionId).first();
indexFiles(singletonList(parent), singletonList(file), queryOptions, outputId);
}
@Test
public void testIndexBySteps() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
File transformedFile = transformFile(getFile(0), queryOptions);
loadFile(transformedFile, queryOptions, outputId);
}
@Test
public void testIndexByStepsWithStats() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
File transformedFile = transformFile(getFile(0), queryOptions);
loadFile(transformedFile, queryOptions, outputId);
}
@Test
public void testIndexByStepsSameInput() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
transformFile(getFile(0), queryOptions);
loadFile(getFile(0), queryOptions, outputId);
}
@Test
public void testIndexWithTransformError() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
DummyVariantStoragePipeline storageETL = mockVariantStorageETL();
List<File> files = Arrays.asList(getFile(0), getFile(1));
StorageEngineException transformException = StorageEngineException.unableToExecute("transform", 0, "");
Mockito.doThrow(transformException).when(storageETL)
.transform(ArgumentMatchers.argThat(argument -> argument.toString().contains(files.get(1).getName())), Mockito.any(), Mockito.any());
try {
indexFiles(files, queryOptions, outputId);
} catch (StoragePipelineException exception) {
assertEquals(files.size(), exception.getResults().size());
assertTrue(exception.getResults().get(0).isTransformExecuted());
assertNull(exception.getResults().get(0).getTransformError());
assertTrue(exception.getResults().get(1).isTransformExecuted());
assertSame(transformException, exception.getResults().get(1).getTransformError());
for (int i = files.size(); i > 0; i--) {
assertFalse(exception.getResults().get(1).isLoadExecuted());
assertNull(exception.getResults().get(1).getLoadError());
}
}
mockVariantStorageETL();
// File 0 already transformed.
// Expecting to transform and load only file 1
indexFiles(files, singletonList(files.get(1)), queryOptions, outputId);
}
@Test
public void testTransformTransformingFiles() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
List<File> files = Arrays.asList(getFile(0), getFile(1));
catalogManager.getFileManager().updateFileIndexStatus(getFile(1), FileIndex.IndexStatus.TRANSFORMING, "", sessionId);
// Expect both files to be loaded
indexFiles(files, Arrays.asList(getFile(0)), queryOptions, outputId);
}
@Test
public void testResumeTransformTransformingFiles() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false)
.append(VariantStorageEngine.Options.RESUME.key(), true);
List<File> files = Arrays.asList(getFile(0), getFile(1));
catalogManager.getFileManager().updateFileIndexStatus(getFile(1), FileIndex.IndexStatus.TRANSFORMING, "", sessionId);
// Expect only the first file to be loaded
indexFiles(files, files, queryOptions, outputId);
}
@Test
public void testIndexWithLoadErrorExternalOutputFolder() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
DummyVariantStoragePipeline storageETL = mockVariantStorageETL();
List<File> files = Arrays.asList(getFile(0), getFile(1));
StorageEngineException loadException = StorageEngineException.unableToExecute("load", 0, "");
Mockito.doThrow(loadException).when(storageETL)
.load(ArgumentMatchers.argThat(argument -> argument.toString().contains(files.get(1).getName())));
List<String> fileIds = files.stream().map(File::getId).map(Object::toString).collect(Collectors.toList());
try {
String outdir = opencga.createTmpOutdir(studyId, "_INDEX_", sessionId);
List<StoragePipelineResult> etlResults = variantManager.index(String.valueOf(studyId), fileIds, outdir, queryOptions, sessionId);
} catch (StoragePipelineException exception) {
assertEquals(files.size(), exception.getResults().size());
for (int i = 0; i < files.size(); i++) {
assertTrue(exception.getResults().get(i).isTransformExecuted());
assertNull(exception.getResults().get(i).getTransformError());
}
assertTrue(exception.getResults().get(0).isLoadExecuted());
assertNull(exception.getResults().get(0).getLoadError());
assertTrue(exception.getResults().get(1).isLoadExecuted());
assertSame(loadException, exception.getResults().get(1).getLoadError());
}
mockVariantStorageETL();
indexFiles(files, singletonList(files.get(1)), queryOptions, outputId);
}
@Test
public void testIndexWithLoadError() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false);
DummyVariantStoragePipeline storageETL = mockVariantStorageETL();
List<File> files = Arrays.asList(getFile(0), getFile(1));
StorageEngineException loadException = StorageEngineException.unableToExecute("load", 0, "");
Mockito.doThrow(loadException).when(storageETL)
.load(ArgumentMatchers.argThat(argument -> argument.toString().contains(files.get(1).getName())));
try {
indexFiles(files, queryOptions, outputId);
} catch (StoragePipelineException exception) {
assertEquals(files.size(), exception.getResults().size());
for (int i = 0; i < files.size(); i++) {
assertTrue(exception.getResults().get(i).isTransformExecuted());
assertNull(exception.getResults().get(i).getTransformError());
}
assertTrue(exception.getResults().get(0).isLoadExecuted());
assertNull(exception.getResults().get(0).getLoadError());
assertTrue(exception.getResults().get(1).isLoadExecuted());
assertSame(loadException, exception.getResults().get(1).getLoadError());
}
mockVariantStorageETL();
// File 0 already loaded.
// Expecting to load only file 1
loadFiles(files, singletonList(files.get(1)), queryOptions, outputId);
}
@Test
public void testIndexByStepsExternallyTransformed() throws Exception {
QueryOptions queryOptions = new QueryOptions(VariantFileIndexerStorageOperation.TRANSFORM, true)
// TODO: Should work without isolating transformation?
.append(VariantStorageEngine.Options.ISOLATE_FILE_FROM_STUDY_CONFIGURATION.key(), true);
// File transformFile = transformFile(getFile(0), queryOptions);
String outdir = opencga.createTmpOutdir(studyId, "_TRANSFORM_", sessionId);
List<StoragePipelineResult> etlResults = variantManager.index(String.valueOf(studyId), getFile(0).getPath(),
outdir, queryOptions, sessionId);
File transformFile = null;
create(studyId2, catalogManager.getFileUri(getFile(0)));
for (java.io.File file : Paths.get(UriUtils.createUri(outdir)).toFile().listFiles()) {
File f = create(studyId2, file.toURI());
if (VariantReaderUtils.isTransformedVariants(file.toString())) {
assertNull(transformFile);
transformFile = f;
}
}
assertNotNull(transformFile);
catalogManager.getFileManager().matchUpVariantFiles(singletonList(transformFile), sessionId);
queryOptions = new QueryOptions().append(VariantStorageEngine.Options.ANNOTATE.key(), false)
.append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true);
loadFile(transformFile, queryOptions, outputId2);
}
@Override
protected VariantSource.Aggregation getAggregation() {
return VariantSource.Aggregation.NONE;
}
}