/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.manager.variant;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.catalog.db.api.CohortDBAdaptor;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.models.*;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.BatchFileOperation;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import static java.lang.Math.toIntExact;
/**
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class CatalogStudyConfigurationFactory {
public static final QueryOptions ALL_FILES_QUERY_OPTIONS = new QueryOptions()
.append("include", Arrays.asList("projects.studies.files.id", "projects.studies.files.name", "projects.studies.files.path",
"projects.studies.files.sampleIds", "projects.studies.files.attributes.variantSource.metadata.variantFileHeader"));
public static final Query ALL_FILES_QUERY = new Query()
.append(FileDBAdaptor.QueryParams.BIOFORMAT.key(), Arrays.asList(File.Bioformat.VARIANT, File.Bioformat.ALIGNMENT));
public static final QueryOptions INDEXED_FILES_QUERY_OPTIONS = new QueryOptions()
.append("include", Arrays.asList("projects.studies.files.id", "projects.studies.files.name", "projects.studies.files.path"));
public static final Query INDEXED_FILES_QUERY = new Query()
.append(FileDBAdaptor.QueryParams.INDEX_STATUS_NAME.key(), FileIndex.IndexStatus.READY);
public static final QueryOptions SAMPLES_QUERY_OPTIONS = new QueryOptions("include",
Arrays.asList("projects.studies.samples.id", "projects.studies.samples.name"));
public static final Query COHORTS_QUERY = new Query();
public static final QueryOptions COHORTS_QUERY_OPTIONS = new QueryOptions();
public static final QueryOptions INVALID_COHORTS_QUERY_OPTIONS = new QueryOptions()
.append(CohortDBAdaptor.QueryParams.STATUS_NAME.key(), Cohort.CohortStatus.INVALID)
.append("include",
Arrays.asList("projects.studies.cohorts.name", "projects.studies.cohorts.id", "projects.studies.cohorts.status"));
protected static Logger logger = LoggerFactory.getLogger(CatalogStudyConfigurationFactory.class);
private final CatalogManager catalogManager;
public static final String STUDY_CONFIGURATION_FIELD = "studyConfiguration";
public static final QueryOptions STUDY_QUERY_OPTIONS = new QueryOptions("include", Arrays.asList(
"projects.studies.id",
"projects.studies.alias",
"projects.studies.attributes." + STUDY_CONFIGURATION_FIELD,
"projects.studies.attributes." + VariantStorageEngine.Options.AGGREGATED_TYPE.key()
));
private final ObjectMapper objectMapper;
private QueryOptions options;
public CatalogStudyConfigurationFactory(CatalogManager catalogManager) {
this.catalogManager = catalogManager;
objectMapper = new ObjectMapper();
}
public StudyConfiguration getStudyConfiguration(long studyId, QueryOptions options, String sessionId) throws CatalogException {
return getStudyConfiguration(studyId, null, options, sessionId);
}
public StudyConfiguration getStudyConfiguration(long studyId, StudyConfigurationManager studyConfigurationManager, QueryOptions options,
String sessionId) throws CatalogException {
Study study = catalogManager.getStudy(studyId, STUDY_QUERY_OPTIONS, sessionId).first();
StudyConfiguration studyConfiguration = null;
QueryOptions qOpts = new QueryOptions(options);
if (studyConfigurationManager != null) {
studyConfiguration = studyConfigurationManager.getStudyConfiguration((int) studyId, qOpts).first();
}
studyConfiguration = fillStudyConfiguration(studyConfiguration, study, sessionId);
return studyConfiguration;
}
private StudyConfiguration fillStudyConfiguration(StudyConfiguration studyConfiguration, Study study, String sessionId)
throws CatalogException {
long studyId = study.getId();
boolean newStudyConfiguration = false;
if (studyConfiguration == null) {
studyConfiguration = new StudyConfiguration(0, "");
newStudyConfiguration = true;
}
studyConfiguration.setStudyId((int) study.getId());
long projectId = catalogManager.getProjectIdByStudyId(study.getId());
String projectAlias = catalogManager.getProject(projectId, null, sessionId).first().getAlias();
String userId = catalogManager.getUserIdByProjectId(projectId);
studyConfiguration.setStudyName(userId + "@" + projectAlias + ":" + study.getAlias());
fillNullMaps(studyConfiguration);
//Clear maps
// studyConfiguration.getIndexedFiles().clear();
// studyConfiguration.getFileIds().clear();
// studyConfiguration.getSamplesInFiles().clear();
// studyConfiguration.getHeaders().clear();
// studyConfiguration.getSampleIds().clear();
// studyConfiguration.getCalculatedStats().clear();
// studyConfiguration.getInvalidStats().clear();
// studyConfiguration.getCohortIds().clear();
// studyConfiguration.getCohorts().clear();
Object aggregationObj = study.getAttributes().get(VariantStorageEngine.Options.AGGREGATED_TYPE.key());
if (aggregationObj != null) {
String aggregatedType = aggregationObj.toString();
logger.debug("setting study aggregation to {}", aggregatedType);
studyConfiguration.setAggregation(VariantSource.Aggregation.valueOf(
aggregatedType));
} else {
studyConfiguration.setAggregation(VariantSource.Aggregation.NONE);
}
logger.debug("studyConfiguration aggregation: {}", studyConfiguration.getAggregation());
// DO NOT update "indexed files" list. This MUST be modified only by storage.
// This field will never be modified from catalog to storage
// *** Except if it is a new StudyConfiguration...
// if (newStudyConfiguration) {
// for (File file : catalogManager.getAllFiles(studyId, INDEXED_FILES_QUERY,
// INDEXED_FILES_QUERY_OPTIONS, sessionId).getResult()) {
// studyConfiguration.getIndexedFiles().add((int) file.getId());
// }
// }
logger.debug("Get Files");
QueryResult<File> files = catalogManager.getAllFiles(studyId, ALL_FILES_QUERY, ALL_FILES_QUERY_OPTIONS, sessionId);
for (File file : files.getResult()) {
int fileId = (int) file.getId();
studyConfiguration.getFileIds().forcePut(file.getName(), fileId);
List<Integer> sampleIds = new ArrayList<>(file.getSampleIds().size());
for (Long sampleId : file.getSampleIds()) {
sampleIds.add(toIntExact(sampleId));
}
studyConfiguration.getSamplesInFiles().put(fileId, new LinkedHashSet<>(sampleIds));
// if (studyConfiguration.getIndexedFiles().contains(fileId) && file.getAttributes().containsKey("variantSource")) {
// //attributes.variantSource.metadata.variantFileHeader
// Object object = file.getAttributes().get("variantSource");
// if (object instanceof Map) {
// Map variantSource = ((Map) object);
// object = variantSource.get("metadata");
// if (object instanceof Map) {
// Map metadata = (Map) object;
// if (metadata.containsKey(VariantFileUtils.VARIANT_FILE_HEADER)) {
// String variantFileHeader = metadata.get(VariantFileUtils.VARIANT_FILE_HEADER).toString();
// studyConfiguration.getHeaders().put(fileId, variantFileHeader);
// }
// }
// }
// }
}
logger.debug("Get Samples");
QueryResult<Sample> samples = catalogManager.getAllSamples(studyId, new Query(), SAMPLES_QUERY_OPTIONS, sessionId);
for (Sample sample : samples.getResult()) {
studyConfiguration.getSampleIds().forcePut(sample.getName(), (int) sample.getId());
}
logger.debug("Get Cohorts");
QueryResult<Cohort> cohorts = catalogManager.getAllCohorts(studyId, COHORTS_QUERY, COHORTS_QUERY_OPTIONS, sessionId);
for (Cohort cohort : cohorts.getResult()) {
int cohortId = (int) cohort.getId();
studyConfiguration.getCohortIds().forcePut(cohort.getName(), cohortId);
List<Integer> sampleIds = new ArrayList<>(cohort.getSamples().size());
for (Long sampleId : cohort.getSamples()) {
sampleIds.add(toIntExact(sampleId));
}
studyConfiguration.getCohorts().put(cohortId, new HashSet<>(sampleIds));
if (cohort.getStatus().getName().equals(Cohort.CohortStatus.READY)) {
studyConfiguration.getCalculatedStats().add(cohortId);
studyConfiguration.getInvalidStats().remove(cohortId);
} else if (cohort.getStatus().getName().equals(Cohort.CohortStatus.INVALID)) {
studyConfiguration.getCalculatedStats().remove(cohortId);
studyConfiguration.getInvalidStats().add(cohortId);
} else { //CALCULATING || NONE
studyConfiguration.getCalculatedStats().remove(cohortId);
studyConfiguration.getInvalidStats().remove(cohortId);
}
}
return studyConfiguration;
}
private void fillNullMaps(StudyConfiguration studyConfiguration) {
if (studyConfiguration.getFileIds() == null) {
studyConfiguration.setFileIds(new HashMap<>());
}
if (studyConfiguration.getSamplesInFiles() == null) {
studyConfiguration.setSamplesInFiles(new HashMap<>());
}
if (studyConfiguration.getSampleIds() == null) {
studyConfiguration.setSampleIds(new HashMap<>());
}
if (studyConfiguration.getCohortIds() == null) {
studyConfiguration.setCohortIds(new HashMap<>());
}
if (studyConfiguration.getCohorts() == null) {
studyConfiguration.setCohorts(new HashMap<>());
}
if (studyConfiguration.getAttributes() == null) {
studyConfiguration.setAttributes(new ObjectMap());
}
}
public void updateStudyConfigurationFromCatalog(long studyId, StudyConfigurationManager studyConfigurationManager, String sessionId)
throws CatalogException, StorageEngineException {
studyConfigurationManager.lockAndUpdate((int) studyId,
studyConfiguration -> getStudyConfiguration(studyId, studyConfigurationManager, new QueryOptions(), sessionId));
}
public void updateCatalogFromStudyConfiguration(StudyConfiguration studyConfiguration, QueryOptions options, String sessionId)
throws CatalogException {
if (options == null) {
options = this.options;
}
logger.info("Updating StudyConfiguration " + studyConfiguration.getStudyId());
//Check if any cohort stat has been updated
if (!studyConfiguration.getCalculatedStats().isEmpty()) {
for (Cohort cohort : catalogManager.getAllCohorts(studyConfiguration.getStudyId(),
new Query(CohortDBAdaptor.QueryParams.ID.key(), new ArrayList<>(studyConfiguration.getCalculatedStats())),
new QueryOptions(), sessionId).getResult()) {
if (cohort.getStatus() == null || !cohort.getStatus().getName().equals(Cohort.CohortStatus.READY)) {
logger.debug("Cohort \"{}\":{} change status from {} to {}",
cohort.getName(), cohort.getId(), cohort.getStats(), Cohort.CohortStatus.READY);
catalogManager.getCohortManager().setStatus(String.valueOf(cohort.getId()), Cohort.CohortStatus.READY,
"Update status from Storage", sessionId);
}
}
}
//Check if any cohort stat has been invalidated
if (!studyConfiguration.getInvalidStats().isEmpty()) {
for (Cohort cohort : catalogManager.getAllCohorts(studyConfiguration.getStudyId(),
new Query(CohortDBAdaptor.QueryParams.ID.key(), new ArrayList<>(studyConfiguration.getInvalidStats())),
new QueryOptions(), sessionId).getResult()) {
if (cohort.getStatus() == null || !cohort.getStatus().getName().equals(Cohort.CohortStatus.INVALID)) {
logger.debug("Cohort \"{}\":{} change status from {} to {}",
cohort.getName(), cohort.getId(), cohort.getStats(), Cohort.CohortStatus.INVALID);
catalogManager.getCohortManager().setStatus(String.valueOf(cohort.getId()), Cohort.CohortStatus.INVALID,
"Update status from Storage", sessionId);
}
}
}
if (!studyConfiguration.getIndexedFiles().isEmpty()) {
for (File file : catalogManager.getAllFiles(studyConfiguration.getStudyId(),
new Query(FileDBAdaptor.QueryParams.ID.key(), new ArrayList<>(studyConfiguration.getIndexedFiles())),
new QueryOptions(), sessionId)
.getResult()) {
if (file.getIndex() == null || !file.getIndex().getStatus().getName().equals(FileIndex.IndexStatus.READY)) {
final FileIndex index;
index = file.getIndex() == null ? new FileIndex() : file.getIndex();
logger.debug("File \"{}\":{} change status from {} to {}", file.getName(), file.getId(),
file.getIndex().getStatus().getName(), FileIndex.IndexStatus.READY);
index.getStatus().setName(FileIndex.IndexStatus.READY);
catalogManager.getFileManager().setFileIndex(file.getId(), index, sessionId);
}
}
}
// Update READY files
Query query = new Query(FileDBAdaptor.QueryParams.INDEX_STATUS_NAME.key(), FileIndex.IndexStatus.READY);
QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE,
Arrays.asList(FileDBAdaptor.QueryParams.ID.key(),
FileDBAdaptor.QueryParams.NAME.key(),
FileDBAdaptor.QueryParams.INDEX.key()));
Set<Long> indexedFiles = new HashSet<>();
studyConfiguration.getIndexedFiles().forEach((e) -> indexedFiles.add(e.longValue()));
for (File file : catalogManager.getAllFiles(studyConfiguration.getStudyId(), query, queryOptions, sessionId).getResult()) {
if (!indexedFiles.contains(file.getId())) {
String newStatus;
if (hasTransformedFile(file.getIndex())) {
newStatus = FileIndex.IndexStatus.TRANSFORMED;
} else {
newStatus = FileIndex.IndexStatus.NONE;
}
logger.info("File \"{}\":{} change status from {} to {}", file.getName(), file.getId(),
FileIndex.IndexStatus.READY, newStatus);
catalogManager.getFileManager()
.updateFileIndexStatus(file, newStatus, "Not indexed, regarding StudyConfiguration", sessionId);
}
}
// Update ongoing files
query = new Query(FileDBAdaptor.QueryParams.INDEX_STATUS_NAME.key(), Arrays.asList(
FileIndex.IndexStatus.LOADING,
FileIndex.IndexStatus.INDEXING));
for (File file : catalogManager.getAllFiles(studyConfiguration.getStudyId(), query, queryOptions, sessionId).getResult()) {
BatchFileOperation loadOperation = null;
// Find last load operation
for (int i = studyConfiguration.getBatches().size() - 1; i >= 0; i--) {
BatchFileOperation op = studyConfiguration.getBatches().get(i);
if (op.getType().equals(BatchFileOperation.Type.LOAD) && op.getFileIds().contains((int) file.getId())) {
loadOperation = op;
// Found last operation over this file.
break;
}
}
// If last LOAD operation is ERROR or there is no LOAD operation
if (loadOperation != null && loadOperation.getStatus().lastEntry().getValue().equals(BatchFileOperation.Status.ERROR)
|| loadOperation == null) {
final FileIndex index;
index = file.getIndex() == null ? new FileIndex() : file.getIndex();
String prevStatus = index.getStatus().getName();
String newStatus;
if (hasTransformedFile(index)) {
newStatus = FileIndex.IndexStatus.TRANSFORMED;
} else {
newStatus = FileIndex.IndexStatus.NONE;
}
logger.info("File \"{}\":{} change status from {} to {}", file.getName(), file.getId(),
prevStatus, newStatus);
catalogManager.getFileManager().updateFileIndexStatus(file, newStatus,
"Error loading. Reset status to " + newStatus,
sessionId);
}
}
}
public boolean hasTransformedFile(FileIndex index) {
return index.getTransformedFile() != null && index.getTransformedFile().getId() > 0;
}
}