/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.manager.variant.operations;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.biodata.models.variant.stats.VariantGlobalStats;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.catalog.config.Configuration;
import org.opencb.opencga.catalog.db.api.CohortDBAdaptor;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.db.api.SampleDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.managers.api.IFileManager;
import org.opencb.opencga.catalog.models.*;
import org.opencb.opencga.catalog.utils.FileMetadataReader;
import org.opencb.opencga.core.common.UriUtils;
import org.opencb.opencga.storage.core.StoragePipelineResult;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.config.StorageConfiguration;
import org.opencb.opencga.storage.core.exceptions.StoragePipelineException;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.manager.models.FileInfo;
import org.opencb.opencga.storage.core.manager.models.StudyInfo;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager;
import org.opencb.opencga.storage.core.variant.annotation.annotators.AbstractCellBaseVariantAnnotator;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import static org.opencb.opencga.catalog.utils.FileMetadataReader.VARIANT_STATS;
/**
* Created by imedina on 17/08/16.
*/
public class VariantFileIndexerStorageOperation extends StorageOperation {
private final IFileManager fileManager;
public static final String TRANSFORM = "transform";
public static final String LOAD = "load";
// FIXME : Needed?
public static final String TRANSFORMED_FILES = "transformedFiles";
private enum Type {
TRANSFORM,
LOAD,
INDEX
}
public VariantFileIndexerStorageOperation(CatalogManager catalogManager, StorageConfiguration storageConfiguration) {
super(catalogManager, StorageEngineFactory.get(storageConfiguration),
LoggerFactory.getLogger(VariantFileIndexerStorageOperation.class));
this.fileManager = catalogManager.getFileManager();
}
public VariantFileIndexerStorageOperation(Configuration configuration, StorageConfiguration storageConfiguration)
throws CatalogException {
super(new CatalogManager(configuration), StorageEngineFactory.get(storageConfiguration),
LoggerFactory.getLogger(VariantFileIndexerStorageOperation.class));
this.fileManager = catalogManager.getFileManager();
}
public List<StoragePipelineResult> index(StudyInfo studyInfo, String outdirString, QueryOptions options, String sessionId)
throws CatalogException, IOException, StorageEngineException, URISyntaxException {
URI outdirUri = UriUtils.createDirectoryUri(outdirString);
Path outdir = Paths.get(outdirUri);
FileUtils.checkDirectory(outdir, true);
// Check the output directory does not correspond with a catalog directory
Query query = new Query(FileDBAdaptor.QueryParams.URI.key(), outdir.toUri().toString());
QueryResult<Long> count = fileManager.count(query, sessionId);
if (count.first() > 0) {
throw new CatalogException("The output directory is pointing to one in catalog. Please, choose other out of catalog "
+ "boundaries.");
}
// Outdir must be empty
outdirMustBeEmpty(outdir, options);
writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.RUNNING, "Job has just started"));
// TODO: This hook should #updateFileInfo
Thread hook = buildHook(outdir);
Runtime.getRuntime().addShutdownHook(hook);
if (options == null) {
options = new QueryOptions();
}
boolean transform = true;
boolean load = true;
// If any of them is defined and at least one is different from false then we set the values
if (options.getBoolean(TRANSFORM, false) || options.getBoolean(LOAD, false)) {
transform = options.getBoolean(TRANSFORM, false);
load = options.getBoolean(LOAD, false);
}
boolean resume = options.getBoolean(VariantStorageEngine.Options.RESUME.key());
// Obtain the type of analysis (transform, load or index)
Type step = getType(load, transform);
// We read all input files from fileId. This can either be a single file and then we just use it,
// or this can be a directory, in that case we use all VCF files in that directory or subdirectory
// long studyIdByInputFileId = getStudyId(fileIds);
long studyIdByInputFileId = studyInfo.getStudyId();
// Study study = catalogManager.getStudyManager().get(studyIdByInputFileId, new QueryOptions(), sessionId).getResult().get(0);
Study study = studyInfo.getStudy();
// We get the credentials of the Datastore to insert the variants
// DataStore dataStore = getDataStore(catalogManager, studyIdByInputFileId, File.Bioformat.VARIANT, sessionId);
DataStore dataStore = studyInfo.getDataStores().get(File.Bioformat.VARIANT);
// Update study configuration BEFORE executing the index and fetching files from Catalog
updateStudyConfiguration(sessionId, studyIdByInputFileId, dataStore);
List<File> inputFiles = new ArrayList<>();
// for (Long fileIdLong : fileIds) {
for (FileInfo fileInfo : studyInfo.getFileInfos()) {
long fileIdLong = fileInfo.getFileId();
File inputFile = fileManager.get(fileIdLong, new QueryOptions(), sessionId).first();
if (inputFile.getType() == File.Type.FILE) {
inputFiles.add(inputFile);
} else {
if (inputFile.getType() == File.Type.DIRECTORY) {
query = new Query(FileDBAdaptor.QueryParams.PATH.key(), "~^" + inputFile.getPath() + "*");
query.append(FileDBAdaptor.QueryParams.FORMAT.key(),
// Arrays.asList(File.Format.VCF, File.Format.GVCF, File.Format.AVRO));
Arrays.asList(File.Format.VCF, File.Format.GVCF));
QueryResult<File> fileQueryResult = fileManager.get(studyIdByInputFileId, query, options, sessionId);
inputFiles.addAll(fileQueryResult.getResult());
} else {
throw new CatalogException(String.format("Expected file type %s or %s instead of %s",
File.Type.FILE, File.Type.DIRECTORY, inputFile.getType()));
}
}
}
// Check catalog path
Long catalogOutDirId = getCatalogOutdirId(studyIdByInputFileId, options, sessionId);
logger.debug("Index - Number of files to be indexed: {}, list of files: {}", inputFiles.size(),
inputFiles.stream().map(File::getName).collect(Collectors.toList()));
options.put(VariantStorageEngine.Options.DB_NAME.key(), dataStore.getDbName());
options.put(VariantStorageEngine.Options.STUDY_ID.key(), studyIdByInputFileId);
VariantStorageEngine variantStorageEngine;
try {
variantStorageEngine = storageEngineFactory.getVariantStorageEngine(dataStore.getStorageEngine());
} catch (ClassNotFoundException | IllegalAccessException | InstantiationException e) {
throw new StorageEngineException("Unable to create StorageEngine", e);
}
// Add species and assembly
String scientificName = studyInfo.getOrganism().getScientificName();
scientificName = AbstractCellBaseVariantAnnotator.toCellBaseSpeciesName(scientificName);
options.put(VariantAnnotationManager.SPECIES, scientificName);
options.put(VariantAnnotationManager.ASSEMBLY, studyInfo.getOrganism().getAssembly());
variantStorageEngine.getOptions().putAll(options);
boolean calculateStats = options.getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key())
&& (step.equals(Type.LOAD) || step.equals(Type.INDEX));
String fileStatus;
String fileStatusMessage;
List<File> filesToIndex;
List<URI> fileUris = new ArrayList<>(inputFiles.size());
switch (step) {
case INDEX:
fileStatus = FileIndex.IndexStatus.INDEXING;
fileStatusMessage = "Start indexing file";
filesToIndex = filterTransformFiles(inputFiles, resume);
for (File file : filesToIndex) {
fileUris.add(file.getUri());
}
break;
case TRANSFORM:
fileStatus = FileIndex.IndexStatus.TRANSFORMING;
fileStatusMessage = "Start transforming file";
filesToIndex = filterTransformFiles(inputFiles, resume);
for (File file : filesToIndex) {
fileUris.add(file.getUri());
}
break;
case LOAD:
filesToIndex = filterLoadFiles(inputFiles, options, fileUris, resume, sessionId);
fileStatus = FileIndex.IndexStatus.LOADING;
fileStatusMessage = "Start loading file";
break;
default:
throw new IllegalArgumentException("Unknown step '" + step + "'.");
}
if (filesToIndex.size() == 0) {
logger.warn("Nothing to do.");
return Collections.emptyList();
}
if (step.equals(Type.INDEX) || step.equals(Type.LOAD)) {
boolean modified = false;
for (File file : filesToIndex) {
modified |= updateDefaultCohort(file, study, options, sessionId);
}
if (calculateStats) {
updateDefaultCohortStatus(study, Cohort.CohortStatus.CALCULATING, sessionId);
}
if (modified) {
// Update again the StudyConfiguration.
updateStudyConfiguration(sessionId, study.getId(), dataStore);
}
}
// Only if we are not transforming or if a path has been passed, we will update catalog information
if (!step.equals(Type.TRANSFORM) || catalogOutDirId != null) {
for (File file : filesToIndex) {
QueryResult<FileIndex> fileIndexQueryResult = fileManager.updateFileIndexStatus(file, fileStatus,
fileStatusMessage, sessionId);
file.setIndex(fileIndexQueryResult.first());
}
}
logger.info("Starting to {}", step);
List<StoragePipelineResult> storagePipelineResults;
// Save exception to throw at the end
StorageEngineException exception = null;
try {
storagePipelineResults = variantStorageEngine.index(fileUris, outdir.toUri(), false, transform, load);
} catch (StoragePipelineException e) {
logger.error("Error executing " + step, e);
storagePipelineResults = e.getResults();
exception = e;
} catch (StorageEngineException e) {
logger.error("Error executing " + step, e);
storagePipelineResults = Collections.emptyList();
exception = e;
} catch (RuntimeException e) {
logger.error("Error executing " + step, e);
storagePipelineResults = Collections.emptyList();
exception = new StorageEngineException("Error executing " + step, e);
}
// logger.debug("Writing storagePipelineResults to file {}", outdir.resolve("storageETLresults"));
// objectMapper.writer().writeValue(outdir.resolve("storageETLresults").toFile(), storagePipelineResults);
// Only if we are not transforming or if a path has been passed, we will update catalog information
if (!step.equals(Type.TRANSFORM) || catalogOutDirId != null) {
boolean saveIntermediateFiles = catalogOutDirId != null;
if (saveIntermediateFiles) {
// Copy results to catalog
copyResults(outdir, catalogOutDirId, sessionId);
}
updateFileInfo(study, filesToIndex, storagePipelineResults, outdir, saveIntermediateFiles, options, sessionId);
if (calculateStats) {
updateDefaultCohortStatus(sessionId, study, exception);
}
}
if (exception == null) {
writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.DONE, "Job completed"));
} else {
writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.ERROR, "Job with errors: " + exception.getMessage()));
}
Runtime.getRuntime().removeShutdownHook(hook);
// Throw the exception!
if (exception != null) {
throw exception;
}
return storagePipelineResults;
}
private long getStudyId(List<Long> fileIds) throws CatalogException {
long studyIdByInputFileId = -1;
for (Long fileIdLong : fileIds) {
long studyId = fileManager.getStudyId(fileIdLong);
if (studyId == -1) {
// Skip the file. Something strange occurred.
logger.error("Could not obtain study of the file {}", fileIdLong);
throw new CatalogException("Could not obtain the study of the file " + fileIdLong + ". Is it a correct file id?.");
// continue;
}
// Check that the study of all the files is the same
if (studyIdByInputFileId == -1) {
// First iteration
studyIdByInputFileId = studyId;
} else if (studyId != studyIdByInputFileId) {
throw new CatalogException("Cannot index files coming from different studies.");
}
}
return studyIdByInputFileId;
}
@Override
protected List<File> copyResults(Path tmpOutdirPath, long catalogPathOutDir, String sessionId) throws CatalogException, IOException {
List<File> files = super.copyResults(tmpOutdirPath, catalogPathOutDir, sessionId);
// List<File> avroFiles = new ArrayList<>(files.size());
// for (File file : files) {
// if (File.Format.AVRO.equals(file.getFormat())) {
// avroFiles.add(file);
// }
// }
logger.info("{} files found", files.size());
if (files.size() > 0) {
fileManager.matchUpVariantFiles(files, sessionId);
}
//
// if (!ioManager.exists(tmpOutdirPath.toUri())) {
// logger.warn("Output folder doesn't exist");
// return;
// }
// List<URI> uriList;
// try {
// uriList = ioManager.listFiles(tmpOutdirPath.toUri());
// } catch (CatalogIOException e) {
// logger.warn("Could not obtain the URI of the files within the directory {}", tmpOutdirPath.toUri());
// logger.error(e.getMessage());
// throw e;
// }
// if (uriList.isEmpty()) {
// try {
// ioManager.deleteDirectory(tmpOutdirPath.toUri());
// } catch (CatalogIOException e) {
// if (ioManager.exists(tmpOutdirPath.toUri())) {
// logger.error("Could not delete empty directory {}. Error: {}", tmpOutdirPath.toUri(), e.getMessage());
// throw e;
// }
// }
// } else {
// logger.error("Error processing job output. Temporal job out dir is not empty. " + uriList);
// }
return files;
}
private Type getType(Boolean load, Boolean transform) {
if (transform && load) {
return Type.INDEX;
}
if (transform) {
return Type.TRANSFORM;
}
return Type.LOAD;
}
private void updateFileInfo(Study study, List<File> filesToIndex, List<StoragePipelineResult> storagePipelineResults, Path outdir,
boolean saveIntermediateFiles, QueryOptions options, String sessionId)
throws CatalogException, IOException {
Map<String, StoragePipelineResult> map;
try {
map = storagePipelineResults
.stream()
.collect(Collectors.toMap(s -> {
String input = s.getInput().getPath();
String inputFileName = Paths.get(input).getFileName().toString();
// Input file may be the transformed one. Convert into original file.
return VariantReaderUtils.getOriginalFromTransformedFile(inputFileName);
}, i -> i));
} catch (IllegalStateException e) {
throw e;
}
for (File indexedFile : filesToIndex) {
// Fetch from catalog. {@link #copyResult} may modify the content
indexedFile = catalogManager.getFile(indexedFile.getId(), sessionId).first();
// Suppose that the missing results are due to errors, and those files were not indexed.
StoragePipelineResult storagePipelineResult = map.get(indexedFile.getName());
boolean jobFailed = storagePipelineResult == null || storagePipelineResult.getLoadError() != null
|| storagePipelineResult.getTransformError() != null;
boolean transformedSuccess = storagePipelineResult != null && storagePipelineResult.isTransformExecuted()
&& storagePipelineResult.getTransformError() == null;
boolean loadedSuccess = storagePipelineResult != null && storagePipelineResult.isLoadExecuted()
&& storagePipelineResult.getLoadError() == null;
String indexStatusName;
String indexStatusMessage = null;
if (indexedFile.getIndex() != null) {
FileIndex index = indexedFile.getIndex();
switch (index.getStatus().getName()) {
case FileIndex.IndexStatus.NONE:
case FileIndex.IndexStatus.TRANSFORMED:
indexStatusMessage = "Unexpected index status. Expected " + FileIndex.IndexStatus.TRANSFORMING + ", "
+ FileIndex.IndexStatus.LOADING + " or " + FileIndex.IndexStatus.INDEXING + " and got "
+ index.getStatus();
logger.warn(indexStatusMessage);
case FileIndex.IndexStatus.READY: //Do not show warn message when index status is READY.
indexStatusName = index.getStatus().getName();
break;
case FileIndex.IndexStatus.TRANSFORMING:
if (jobFailed) {
indexStatusMessage = "Job failed. Restoring status from " + FileIndex.IndexStatus.TRANSFORMING + " to "
+ FileIndex.IndexStatus.NONE;
logger.warn(indexStatusMessage);
indexStatusName = FileIndex.IndexStatus.NONE;
} else {
indexStatusMessage = "Job finished. File transformed";
indexStatusName = FileIndex.IndexStatus.TRANSFORMED;
}
break;
case FileIndex.IndexStatus.LOADING:
if (jobFailed) {
indexStatusMessage = "Job failed. Restoring status from " + FileIndex.IndexStatus.LOADING + " to "
+ FileIndex.IndexStatus.TRANSFORMED;
logger.warn(indexStatusMessage);
indexStatusName = FileIndex.IndexStatus.TRANSFORMED;
} else {
indexStatusMessage = "Job finished. File index ready";
indexStatusName = FileIndex.IndexStatus.READY;
}
break;
case FileIndex.IndexStatus.INDEXING:
if (jobFailed) {
// If transform was executed, restore status to Transformed.
if (transformedSuccess && saveIntermediateFiles) {
// if (transformedSuccess) {
indexStatusName = FileIndex.IndexStatus.TRANSFORMED;
} else {
indexStatusName = FileIndex.IndexStatus.NONE;
}
indexStatusMessage = "Job failed. Restoring status from " + FileIndex.IndexStatus.INDEXING
+ " to " + indexStatusName;
logger.warn(indexStatusName);
} else {
indexStatusName = FileIndex.IndexStatus.READY;
indexStatusMessage = "Job finished. File index ready";
}
break;
default:
throw new IllegalStateException("Unknown Index Status " + index.getStatus().getName());
}
} else {
logger.error("The execution should never get into this condition. Critical error.");
throw new CatalogException("Critical error. Empty index parameter in file " + indexedFile.getId());
}
if (transformedSuccess) {
updateVariantFileStats(indexedFile, outdir, sessionId);
}
// Update storagePipelineResult
Map<String, Object> attributes = indexedFile.getAttributes();
attributes.put("storagePipelineResult", storagePipelineResult);
ObjectMap params = new ObjectMap(FileDBAdaptor.QueryParams.ATTRIBUTES.key(), attributes);
fileManager.update(indexedFile.getId(), params, new QueryOptions(), sessionId);
// Update index status
fileManager.updateFileIndexStatus(indexedFile, indexStatusName, indexStatusMessage, sessionId);
boolean calculateStats = options.getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key());
if (indexStatusName.equals(FileIndex.IndexStatus.READY) && calculateStats) {
Query query = new Query(CohortDBAdaptor.QueryParams.NAME.key(), StudyEntry.DEFAULT_COHORT);
QueryResult<Cohort> queryResult = catalogManager.getCohortManager().get(study.getId(), query, new QueryOptions(),
sessionId);
if (queryResult.getNumResults() != 0) {
logger.debug("Default cohort status set to READY");
Cohort defaultCohort = queryResult.first();
catalogManager.getCohortManager().setStatus(Long.toString(defaultCohort.getId()), Cohort.CohortStatus.READY, null,
sessionId);
// params = new ObjectMap(CohortDBAdaptor.QueryParams.STATUS_NAME.key(), Cohort.CohortStatus.READY);
// catalogManager.getCohortManager().update(defaultCohort.getId(), params, new QueryOptions(), sessionId);
}
}
}
}
/**
* Updates the file stats from a transformed variant file.
* Reads the stats generated on the transform step.
*
* @param inputFile
* @param outdir
* @param sessionId
* @throws CatalogException if a Catalog error occurs.
*/
private void updateVariantFileStats(File inputFile, Path outdir, String sessionId) throws CatalogException, IOException {
if (inputFile.getBioformat().equals(File.Bioformat.VARIANT)) {
Path metaFile = outdir.resolve(inputFile.getName() + "." + VariantReaderUtils.METADATA_FILE_FORMAT_GZ);
if (!metaFile.toFile().exists()) {
throw new IOException("Stats file not found.");
}
VariantGlobalStats stats;
try {
VariantSource variantSource = VariantReaderUtils.readVariantSource(metaFile, null);
stats = variantSource.getStats();
} catch (StorageEngineException e) {
throw new CatalogException("Error reading file \"" + metaFile + "\"", e);
}
ObjectMap params = new ObjectMap(FileDBAdaptor.QueryParams.STATS.key(), new ObjectMap(VARIANT_STATS, stats));
fileManager.update(inputFile.getId(), params, new QueryOptions(), sessionId);
}
// long studyId = catalogManager.getStudyIdByJobId(job.getId());
// Query query = new Query()
// .append(CatalogFileDBAdaptor.QueryParams.ID.key(), job.getInput())
// .append(CatalogFileDBAdaptor.QueryParams.BIOFORMAT.key(), File.Bioformat.VARIANT);
// QueryResult<File> fileQueryResult = catalogManager.getAllFiles(studyId, query, new QueryOptions(), sessionId);
// if (fileQueryResult.getResult().isEmpty()) {
// return;
// }
// File inputFile = fileQueryResult.first();
// if (inputFile.getBioformat().equals(File.Bioformat.VARIANT)) {
// query = new Query()
// .append(CatalogFileDBAdaptor.QueryParams.ID.key(), job.getOutput())
// .append(CatalogFileDBAdaptor.QueryParams.NAME.key(), "~" + inputFile.getName() + ".file");
// fileQueryResult = catalogManager.getAllFiles(studyId, query, new QueryOptions(), sessionId);
// if (fileQueryResult.getResult().isEmpty()) {
// return;
// }
//
// File variantsFile = fileQueryResult.first();
// URI fileUri = catalogManager.getFileUri(variantsFile);
// try (InputStream is = FileUtils.newInputStream(Paths.get(fileUri.getPath()))) {
// VariantSource variantSource = new com.fasterxml.jackson.databind.ObjectMapper().readValue(is, VariantSource.class);
// VariantGlobalStats stats = variantSource.stats();
// catalogManager.modifyFile(inputFile.getId(), new ObjectMap("stats", new ObjectMap(VARIANT_STATS, stats)), sessionId);
// } catch (IOException e) {
// throw new CatalogException("Error reading file \"" + fileUri + "\"", e);
// }
// }
}
private boolean updateDefaultCohort(File file, Study study, QueryOptions options, String sessionId) throws CatalogException {
/* Get file samples */
boolean modified = false;
List<Sample> sampleList;
if (file.getSampleIds() == null || file.getSampleIds().isEmpty()) {
final ObjectMap fileModifyParams = new ObjectMap(FileDBAdaptor.QueryParams.ATTRIBUTES.key(), new ObjectMap());
sampleList = FileMetadataReader.get(catalogManager).getFileSamples(study, file,
catalogManager.getFileManager().getUri(file), fileModifyParams,
options.getBoolean(FileMetadataReader.CREATE_MISSING_SAMPLES, true), false, options, sessionId);
} else {
Query query = new Query(SampleDBAdaptor.QueryParams.ID.key(), file.getSampleIds());
sampleList = catalogManager.getSampleManager().get(study.getId(), query, new QueryOptions(), sessionId).getResult();
}
Cohort defaultCohort;
Query query = new Query(CohortDBAdaptor.QueryParams.NAME.key(), StudyEntry.DEFAULT_COHORT);
QueryResult<Cohort> cohorts = catalogManager.getAllCohorts(study.getId(), query, new QueryOptions(), sessionId);
if (cohorts.getResult().isEmpty()) {
defaultCohort = catalogManager.getCohortManager().create(study.getId(), StudyEntry.DEFAULT_COHORT, Study.Type.COLLECTION,
"Default cohort with almost all indexed samples", Collections.emptyList(), null, sessionId).first();
modified = true;
} else {
defaultCohort = cohorts.first();
}
//Samples are the already indexed plus those that are going to be indexed
ObjectMap updateParams = new ObjectMap();
Set<Long> samples = new HashSet<>(defaultCohort.getSamples());
samples.addAll(sampleList.stream().map(Sample::getId).collect(Collectors.toList()));
if (samples.size() != defaultCohort.getSamples().size()) {
logger.debug("Updating \"{}\" cohort", StudyEntry.DEFAULT_COHORT);
updateParams.append(CohortDBAdaptor.QueryParams.SAMPLES.key(), new ArrayList<>(samples));
}
if (!updateParams.isEmpty()) {
catalogManager.getCohortManager().update(defaultCohort.getId(), updateParams, new QueryOptions(), sessionId);
modified = true;
}
return modified;
}
private void updateDefaultCohortStatus(String sessionId, Study study, StorageEngineException exception) throws CatalogException {
if (exception == null) {
updateDefaultCohortStatus(study, Cohort.CohortStatus.READY, sessionId);
} else {
updateDefaultCohortStatus(study, Cohort.CohortStatus.INVALID, sessionId);
}
}
private void updateDefaultCohortStatus(Study study, String status, String sessionId) throws CatalogException {
Query query = new Query(CohortDBAdaptor.QueryParams.NAME.key(), StudyEntry.DEFAULT_COHORT);
Cohort defaultCohort = catalogManager.getAllCohorts(study.getId(), query, new QueryOptions(), sessionId).first();
catalogManager.getCohortManager().setStatus(Long.toString(defaultCohort.getId()), status, null,
sessionId);
}
/**
* Get non transformed files.
*
* @param fileList Files to filter
* @param resume If resume, get also TRANSFORMING and INDEXING files.
* @return List of non transformed files
*/
private List<File> filterTransformFiles(List<File> fileList, boolean resume) {
if (fileList == null || fileList.isEmpty()) {
return new ArrayList<>();
}
List<File> filteredFiles = new ArrayList<>(fileList.size());
for (File file : fileList) {
if (file.getStatus().getName().equals(File.FileStatus.READY)
&& file.getFormat().equals(File.Format.VCF)) {
if (file.getIndex() != null) {
switch (file.getIndex().getStatus().getName()) {
case FileIndex.IndexStatus.NONE:
filteredFiles.add(file);
break;
case FileIndex.IndexStatus.INDEXING:
case FileIndex.IndexStatus.TRANSFORMING:
if (!resume) {
logger.warn("File already being transformed. "
+ "We can only transform VCF files not transformed, the status is {}. "
+ "Do '" + VariantStorageEngine.Options.RESUME.key() + "' to continue.",
file.getIndex().getStatus().getName());
} else {
filteredFiles.add(file);
}
break;
case FileIndex.IndexStatus.TRANSFORMED:
case FileIndex.IndexStatus.LOADING:
case FileIndex.IndexStatus.READY:
default:
logger.warn("We can only transform VCF files not transformed, the status is {}",
file.getIndex().getStatus().getName());
break;
}
} else {
// This block should not happen ever
filteredFiles.add(file);
logger.warn("This block should not happen ever");
}
} else {
logger.warn("");
}
}
return filteredFiles;
}
private List<File> filterLoadFiles(List<File> fileList, QueryOptions options, List<URI> fileUris, boolean resume, String sessionId)
throws CatalogException, URISyntaxException {
if (fileList == null || fileList.isEmpty()) {
return new ArrayList<>();
}
List<String> transformedFiles = null;
if (options.get(TRANSFORMED_FILES) != null) {
transformedFiles = options.getAsStringList(TRANSFORMED_FILES);
if (transformedFiles.size() != fileList.size()) {
throw new CatalogException("The number of transformed files (" + transformedFiles.size() + ") passed does not match the"
+ " number of vcf files (" + fileList.size() + ").");
}
// Check all the files are accessible and are actually files
for (String transformedFile : transformedFiles) {
java.io.File file = Paths.get(transformedFile).toFile();
if (!file.exists() || !file.isFile()) {
throw new CatalogException("File " + file + " does not exist or it is not an avro file");
}
}
}
List<File> filteredFiles = new ArrayList<>(fileList.size());
for (int i = 0; i < fileList.size(); i++) {
File file = fileList.get(i);
// If is a transformed file, get the related VCF file
if (VariantReaderUtils.isTransformedVariants(file.getName())) {
if (transformedFiles != null) {
// Check if the uri is different
URI avroUri = UriUtils.createUri(transformedFiles.get(i));
if (!avroUri.equals(file.getUri())) {
// This case should never happen. If we are here, it means that the input file was already an avro file and that
// a list of avro uris have been found. However, the uri of the avro does not match the uri we have for the avro
// file. Nonsense.
throw new CatalogException("The avro file " + file.getId() + " in " + file.getUri() + " has been associated a "
+ "different uri " + avroUri + " by the user.");
}
}
file = getOriginalFromTransformed(sessionId, file);
}
if (file.getFormat().equals(File.Format.VCF) || file.getFormat().equals(File.Format.GVCF)) {
String status = file.getIndex() == null ? FileIndex.IndexStatus.NONE : file.getIndex().getStatus().getName();
switch (status) {
case FileIndex.IndexStatus.NONE:
if (transformedFiles != null) {
filteredFiles.add(file);
fileUris.add(UriUtils.createUri(transformedFiles.get(i)));
} else {
logger.warn("Cannot load vcf file " + file.getId() + " if no avro file is provided.");
}
break;
case FileIndex.IndexStatus.INDEXING:
case FileIndex.IndexStatus.LOADING:
if (!resume) {
logger.warn("Unable to load this file. Already being loaded. Skipping file {}", file.getName());
break;
}
case FileIndex.IndexStatus.TRANSFORMED:
// We will attempt to use the avro file registered in catalog
File transformed = getTransformedFromOriginal(sessionId, file);
if (transformedFiles != null) {
// Check that the uri from the avro file obtained from catalog is the same the user has put as input
URI uri = UriUtils.createUri(transformedFiles.get(i));
if (!uri.equals(transformed.getUri())) {
throw new CatalogException("A transformed file was found for file " + file.getId() + " in "
+ transformed.getUri() + ". However, the user selected a different one in " + uri);
}
}
filteredFiles.add(file);
fileUris.add(transformed.getUri());
break;
case FileIndex.IndexStatus.TRANSFORMING:
logger.warn("We can only load files previously transformed. Skipping file {}", file.getName());
break;
case FileIndex.IndexStatus.READY:
logger.warn("Already loaded file. Skipping file {}", file.getName());
break;
default:
logger.warn("We can only load files previously transformed, File {} with status is {}", file.getName(), status);
break;
}
} else {
logger.warn("The input file is not a variant file. Format {}", file.getFormat());
}
}
return filteredFiles;
}
private File getOriginalFromTransformed(String sessionId, File file)
throws URISyntaxException, CatalogException {
// Look for the vcf file
long vcfId = -1;
// Matchup variant files, if missing
if (file.getRelatedFiles() == null || file.getRelatedFiles().isEmpty()) {
catalogManager.getFileManager().matchUpVariantFiles(Collections.singletonList(file), sessionId);
}
for (File.RelatedFile relatedFile : file.getRelatedFiles()) {
if (File.RelatedFile.Relation.PRODUCED_FROM.equals(relatedFile.getRelation())) {
vcfId = relatedFile.getFileId();
break;
}
}
if (vcfId == -1) {
logger.error("This code should never be executed. Every transformed avro file should come from a registered vcf file");
throw new CatalogException("Internal error. No vcf file could be found for file " + file.getId());
}
QueryResult<File> vcfQueryResult = fileManager.get(vcfId, new QueryOptions(), sessionId);
if (vcfQueryResult.getNumResults() != 1) {
logger.error("This code should never be executed. No vcf file could be found for vcf id " + vcfId);
throw new CatalogException("Internal error. No vcf file could be found under id " + vcfId);
}
file = vcfQueryResult.first();
return file;
}
private File getTransformedFromOriginal(String sessionId, File file)
throws CatalogException, URISyntaxException {
long transformedFile = file.getIndex() != null && file.getIndex().getTransformedFile() != null
? file.getIndex().getTransformedFile().getId()
: -1;
if (transformedFile == -1) {
logger.error("This code should never be executed. Every vcf file containing the transformed status should have"
+ " a registered transformed file");
throw new CatalogException("Internal error. No transformed file could be found for file " + file.getId());
}
QueryResult<File> queryResult = fileManager.get(transformedFile, new QueryOptions(), sessionId);
if (queryResult.getNumResults() != 1) {
logger.error("This code should never be executed. No transformed file could be found under ");
throw new CatalogException("Internal error. No transformed file could be found under id " + transformedFile);
}
return queryResult.first();
}
}