package org.opencb.opencga.storage.core.manager.variant.operations;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Region;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.models.DataStore;
import org.opencb.opencga.catalog.models.File;
import org.opencb.opencga.catalog.models.Job;
import org.opencb.opencga.catalog.models.Project;
import org.opencb.opencga.core.common.TimeUtils;
import org.opencb.opencga.core.common.UriUtils;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.config.StorageConfiguration;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.manager.models.StudyInfo;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.VariantQueryParams;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptorUtils;
import org.opencb.opencga.storage.core.variant.annotation.DefaultVariantAnnotationManager;
import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager;
import org.opencb.opencga.storage.core.variant.annotation.annotators.AbstractCellBaseVariantAnnotator;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* Created on 24/11/16.
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class VariantAnnotationStorageOperation extends StorageOperation {
public VariantAnnotationStorageOperation(CatalogManager catalogManager, StorageConfiguration storageConfiguration) {
super(catalogManager, StorageEngineFactory.get(storageConfiguration),
LoggerFactory.getLogger(VariantAnnotationStorageOperation.class));
}
public List<File> annotateVariants(@Nullable String projectStr, @Nullable List<StudyInfo> studyInfos, Query query, String outdirStr,
String sessionId, ObjectMap options)
throws CatalogException, StorageEngineException, URISyntaxException, IOException {
if (options == null) {
options = new ObjectMap();
}
// Outdir must be empty
URI outdirUri = UriUtils.createDirectoryUri(outdirStr);
final Path outdir = Paths.get(outdirUri);
outdirMustBeEmpty(outdir, options);
List<File> newFiles;
Thread hook = buildHook(outdir);
writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.RUNNING, "Job has just started"));
Runtime.getRuntime().addShutdownHook(hook);
// Up to this point, catalog has not been modified
try {
final List<Long> studyIds;
final String studyStr;
final String alias;
final DataStore dataStore;
final Project.Organism organism;
if (studyInfos == null || studyInfos.isEmpty()) {
long projectId = catalogManager.getProjectId(projectStr, sessionId);
Project project = catalogManager.getProject(projectId, null, sessionId).first();
studyStr = null;
alias = project.getAlias();
organism = project.getOrganism();
dataStore = getDataStoreByProjectId(catalogManager, projectId, File.Bioformat.VARIANT, sessionId);
studyIds = Collections.emptyList();
} else {
StudyInfo info = studyInfos.get(0);
if (studyInfos.size() == 1) {
studyStr = String.valueOf(info.getStudy().getId());
alias = info.getStudyAlias();
} else {
studyStr = null;
alias = studyInfos.get(0).getProjectAlias();
}
dataStore = info.getDataStores().get(File.Bioformat.VARIANT);
organism = info.getOrganism();
studyIds = studyInfos.stream().map(StudyInfo::getStudyId).collect(Collectors.toList());
for (int i = 1; i < studyInfos.size(); i++) {
info = studyInfos.get(i);
if (!dataStore.equals(info.getDataStores().get(File.Bioformat.VARIANT))) {
throw new CatalogException("Can't annotate studies from different databases");
}
if (!organism.equals(info.getOrganism())) {
throw new CatalogException("Can't annotate studies with different organisms");
}
}
}
String outputFileName = options.getString(DefaultVariantAnnotationManager.FILE_NAME);
if (StringUtils.isEmpty(outputFileName)) {
outputFileName = buildOutputFileName(alias, query);
}
Long catalogOutDirId = getCatalogOutdirId(studyStr, options, sessionId);
Query annotationQuery = new Query(query);
if (!options.getBoolean(VariantAnnotationManager.OVERWRITE_ANNOTATIONS, false)) {
annotationQuery.put(VariantQueryParams.ANNOTATION_EXISTS.key(), false);
}
if (studyIds != null && !studyIds.isEmpty()) {
annotationQuery.put(VariantQueryParams.STUDIES.key(), studyIds);
}
QueryOptions annotationOptions = new QueryOptions(options)
.append(DefaultVariantAnnotationManager.OUT_DIR, outdirUri.getPath());
annotationOptions.put(DefaultVariantAnnotationManager.FILE_NAME, outputFileName);
String loadFileStr = options.getString(VariantAnnotationManager.LOAD_FILE);
if (StringUtils.isNotEmpty(loadFileStr)) {
if (!Paths.get(UriUtils.createUri(loadFileStr)).toFile().exists()) {
long fileId = catalogManager.getFileId(loadFileStr, studyStr, sessionId);
if (fileId < 0) {
throw new CatalogException("File '" + loadFileStr + "' does not exist!");
}
File loadFile = catalogManager.getFile(fileId, sessionId).first();
annotationOptions.put(VariantAnnotationManager.LOAD_FILE, loadFile.getUri().toString());
}
}
if (organism == null) {
annotationOptions.putIfAbsent(VariantAnnotationManager.SPECIES, "hsapiens");
annotationOptions.putIfAbsent(VariantAnnotationManager.ASSEMBLY, "GRch37");
} else {
String scientificName = organism.getScientificName();
scientificName = AbstractCellBaseVariantAnnotator.toCellBaseSpeciesName(scientificName);
annotationOptions.put(VariantAnnotationManager.SPECIES, scientificName);
annotationOptions.put(VariantAnnotationManager.ASSEMBLY, organism.getAssembly());
}
// StudyConfiguration studyConfiguration = updateStudyConfiguration(sessionId, studyId, dataStore);
VariantStorageEngine variantStorageEngine = storageEngineFactory.getVariantStorageEngine(dataStore.getStorageEngine());
variantStorageEngine.annotate(dataStore.getDbName(), annotationQuery, annotationOptions);
if (catalogOutDirId != null) {
newFiles = copyResults(Paths.get(outdirUri), catalogOutDirId, sessionId);
} else {
newFiles = Collections.emptyList();
}
writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.DONE, "Job completed"));
} catch (Exception e) {
// Error!
logger.error("Error annotating variants.", e);
writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.ERROR, "Job with error : " + e.getMessage()));
throw new StorageEngineException("Error annotating variants.", e);
} finally {
// Remove hook
Runtime.getRuntime().removeShutdownHook(hook);
}
return newFiles;
}
private String buildOutputFileName(String alias, Query query) {
List<Region> regions = new ArrayList<>();
if (VariantDBAdaptorUtils.isValidParam(query, VariantQueryParams.REGION)) {
List<Region> c = Region.parseRegions(query.getString(VariantQueryParams.REGION.key()));
if (c != null) {
regions.addAll(c);
}
}
if (VariantDBAdaptorUtils.isValidParam(query, VariantQueryParams.CHROMOSOME)) {
List<Region> c = Region.parseRegions(query.getString(VariantQueryParams.CHROMOSOME.key()));
if (c != null) {
regions.addAll(c);
}
}
if (regions.isEmpty() || regions.size() > 1) {
return alias + '.' + TimeUtils.getTime();
} else {
return alias + ".region_" + regions.get(0).toString() + '.' + TimeUtils.getTime();
}
}
}