/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.analysis.storage.variant;
import org.apache.commons.lang.StringUtils;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.tools.variant.converters.ga4gh.Ga4ghVariantConverter;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.manager.variant.operations.StorageOperation;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.db.api.SampleDBAdaptor;
import org.opencb.opencga.catalog.db.api.StudyDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogAuthorizationException;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.models.*;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator;
import org.opencb.opencga.storage.core.variant.adaptors.VariantSourceDBAdaptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
/**
* @author Jacobo Coll <jacobo167@gmail.com>
*
* Created on 18/08/15.
*/
@Deprecated
public class VariantFetcher implements AutoCloseable {
public static final String SAMPLES_METADATA = "samplesMetadata";
private final CatalogManager catalogManager;
private final StorageEngineFactory storageEngineFactory;
private final Logger logger;
public static final int LIMIT_DEFAULT = 1000;
public static final int LIMIT_MAX = 5000;
private final ConcurrentHashMap<String, VariantDBAdaptor> variantDBAdaptor = new ConcurrentHashMap<>();
public VariantFetcher(CatalogManager catalogManager, StorageEngineFactory storageEngineFactory) {
this.catalogManager = catalogManager;
this.storageEngineFactory = storageEngineFactory;
logger = LoggerFactory.getLogger(VariantFetcher.class);
}
public QueryResult rank(Query query, QueryOptions queryOptions, String rank, String sessionId)
throws Exception {
return getVariantsPerStudy(getMainStudyId(query, sessionId), query, queryOptions, false, null, rank, 0, null, sessionId);
}
public QueryResult groupBy(Query query, QueryOptions queryOptions, String groupBy, String sessionId)
throws Exception {
return getVariantsPerStudy(getMainStudyId(query, sessionId), query, queryOptions, false, groupBy, null, 0, null, sessionId);
}
public QueryResult<Variant> get(Query query, QueryOptions queryOptions, String sessionId)
throws Exception {
queryOptions.remove("model");
return getVariantsPerStudy(getMainStudyId(query, sessionId), query, queryOptions, false, null, null, 0, null, sessionId);
}
public QueryResult<org.ga4gh.models.Variant> getGa4gh(Query query, QueryOptions queryOptions, String sessionId)
throws Exception {
queryOptions.put("model", "ga4gh");
return getVariantsPerStudy(getMainStudyId(query, sessionId), query, queryOptions, false, null, null, 0, null, sessionId);
}
public Map<Long, List<Sample>> getSamplesMetadata(long studyId, Query query, QueryOptions queryOptions, String sessionId)
throws CatalogException, StorageEngineException, IOException {
VariantDBAdaptor variantDBAdaptor = getVariantDBAdaptor(studyId, sessionId);
return checkSamplesPermissions(query, queryOptions, variantDBAdaptor, sessionId);
}
public StudyConfiguration getStudyConfiguration(long studyId, QueryOptions options, String sessionId)
throws CatalogException, StorageEngineException, IOException {
VariantDBAdaptor variantDBAdaptor = getVariantDBAdaptor(studyId, sessionId); // DB con closed by VariantFetcher
return variantDBAdaptor.getStudyConfigurationManager().getStudyConfiguration((int) studyId, options).first();
}
public QueryResult getVariantsPerFile(String region, boolean histogram, String groupBy, int interval, String fileId, String sessionId, QueryOptions queryOptions)
throws Exception {
QueryResult result;
long fileIdNum;
fileIdNum = catalogManager.getFileId(fileId, null, sessionId);
File file = catalogManager.getFile(fileIdNum, sessionId).first();
if (file.getIndex() == null || !file.getIndex().getStatus().getName().equals(FileIndex.IndexStatus.READY)) {
throw new Exception("File {id:" + file.getId() + " name:'" + file.getName() + "'} " +
" is not an indexed file.");
}
if (!file.getBioformat().equals(File.Bioformat.VARIANT)) {
throw new Exception("File {id:" + file.getId() + " name:'" + file.getName() + "'} " +
" is not a Variant file.");
}
long studyId = catalogManager.getStudyIdByFileId(file.getId());
result = getVariantsPerStudy(studyId, region, histogram, groupBy, interval, fileIdNum, sessionId, queryOptions);
return result;
}
public QueryResult getVariantsPerStudy(long studyId, String region, boolean histogram, String groupBy, int interval, String sessionId, QueryOptions queryOptions) throws Exception {
return getVariantsPerStudy(studyId, region, histogram, groupBy, interval, null, sessionId, queryOptions);
}
public QueryResult getVariantsPerStudy(long studyId, String regionStr, boolean histogram, String groupBy, int interval, Long fileIdNum, String sessionId, QueryOptions queryOptions)
throws Exception {
queryOptions.add(VariantDBAdaptor.VariantQueryParams.REGION.key(), regionStr);
return getVariantsPerStudy(studyId, getVariantQuery(queryOptions), queryOptions, histogram, groupBy, null, interval, fileIdNum, sessionId);
}
public QueryResult getVariantsPerStudy(long studyId, Query query, QueryOptions queryOptions,
boolean histogram, String groupBy, String rank, int interval, Long fileIdNum, String sessionId)
throws Exception {
QueryResult result;
logger.debug("queryVariants = {}", query.toJson());
//TODO: Check files and studies exists
if (fileIdNum != null) {
query.put(VariantDBAdaptor.VariantQueryParams.FILES.key(), fileIdNum);
}
if (!query.containsKey(VariantDBAdaptor.VariantQueryParams.STUDIES.key())) {
query.put(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), studyId);
}
// TODO: Check returned files
try (VariantDBAdaptor dbAdaptor = getVariantDBAdaptor(studyId, sessionId)) {
final Map<Long, List<Sample>> samplesMap = checkSamplesPermissions(query, queryOptions, dbAdaptor, sessionId);
String[] regions = getRegions(query);
if (histogram) {
if (regions.length != 1) {
throw new IllegalArgumentException("Unable to calculate histogram with " + regions.length + " regions.");
}
result = dbAdaptor.getFrequency(query, Region.parseRegion(regions[0]), interval);
} else if (StringUtils.isNotEmpty(groupBy)) {
result = dbAdaptor.groupBy(query, groupBy, queryOptions);
} else if (StringUtils.isNotEmpty(rank)) {
int limit = addDefaultLimit(queryOptions, LIMIT_MAX, 10);
boolean asc = false;
if (rank.contains(":")) { // eg. gene:-1
String[] arr = rank.split(":");
rank = arr[0];
if (arr[1].endsWith("-1")) {
asc = true;
}
}
result = dbAdaptor.rank(query, rank, limit, asc);
} else if (queryOptions.getBoolean(SAMPLES_METADATA)) {
List<ObjectMap> list = samplesMap.entrySet().stream()
.map(entry -> new ObjectMap("id", entry.getKey()).append("samples", entry.getValue()))
.collect(Collectors.toList());
result = new QueryResult("getVariantSamples", 0, list.size(), list.size(), "", "", list);
} else {
addDefaultLimit(queryOptions);
logger.debug("getVariants {}, {}", query, queryOptions);
result = dbAdaptor.get(query, queryOptions);
logger.debug("gotVariants {}, {}, in {}ms", result.getNumResults(), result.getNumTotalResults(), result.getDbTime());
if (queryOptions.getString("model", "opencb").equalsIgnoreCase("ga4gh")) {
result = convertToGA4GH(result);
}
}
return result;
}
}
public VariantDBIterator iterator(Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, StorageEngineException {
long studyId = getMainStudyId(query, sessionId);
VariantDBAdaptor dbAdaptor = getVariantDBAdaptor(studyId, sessionId);
checkSamplesPermissions(query, queryOptions, dbAdaptor, sessionId);
// TODO: Check returned files
return dbAdaptor.iterator(query, queryOptions);
}
public QueryResult<Long> countByFile(long fileId, QueryOptions params, String sessionId) throws CatalogException, StorageEngineException, IOException {
Query query = getVariantQuery(params);
if (getMainStudyId(query, VariantDBAdaptor.VariantQueryParams.STUDIES.key(), sessionId) == null) {
long studyId = catalogManager.getStudyIdByFileId(fileId);
query.put(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), studyId);
}
query.put(VariantDBAdaptor.VariantQueryParams.FILES.key(), fileId);
return count(query, sessionId);
}
public QueryResult<Long> count(long studyId, QueryOptions params, String sessionId) throws CatalogException, StorageEngineException, IOException {
Query query = getVariantQuery(params);
if (getMainStudyId(query, VariantDBAdaptor.VariantQueryParams.STUDIES.key(), sessionId) == null) {
query.put(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), studyId);
}
return count(query, sessionId);
}
public QueryResult<Long> count(Query query, String sessionId) throws CatalogException, StorageEngineException, IOException {
long studyId = getMainStudyId(query, sessionId);
// Closed by Variant Fetcher
VariantDBAdaptor dbAdaptor = getVariantDBAdaptor(studyId, sessionId);
// TODO: Check permissions?
return dbAdaptor.count(query);
}
protected int addDefaultLimit(QueryOptions queryOptions) {
return addDefaultLimit(queryOptions, LIMIT_MAX, LIMIT_DEFAULT);
}
protected int addDefaultLimit(QueryOptions queryOptions, int limitMax, int limitDefault) {
// Add default limit
int limit = queryOptions.getInt("limit", -1);
if (limit > limitMax) {
logger.info("Unable to return more than {} variants. Change limit from {} to {}", limitMax, limit, limitMax);
}
limit = (limit > 0) ? Math.min(limit, limitMax) : limitDefault;
queryOptions.put("limit", limit);
return limit;
}
protected String[] getRegions(Query query) {
String[] regions;
String regionStr = query.getString(VariantDBAdaptor.VariantQueryParams.REGION.key());
if (!StringUtils.isEmpty(regionStr)) {
regions = regionStr.split(",");
} else {
regions = new String[0];
}
return regions;
}
public Long getMainStudyId(Query query, String sessionId) throws CatalogException {
Long id = getMainStudyId(query, VariantDBAdaptor.VariantQueryParams.STUDIES.key(), sessionId);
if (id == null) {
id = getMainStudyId(query, VariantDBAdaptor.VariantQueryParams.RETURNED_STUDIES.key(), sessionId);
}
if (id != null) {
return id;
} else {
throw new IllegalArgumentException("Missing StudyId. Unable to get any variant!");
}
}
private Long getMainStudyId(Query query, String key, String sessionId) throws CatalogException {
if (query.containsKey(key)) {
for (String id : query.getAsStringList(key)) {
if (!id.startsWith("!")) {
long studyId = catalogManager.getStudyId(id, sessionId);
return studyId > 0 ? studyId : null;
}
}
}
return null;
}
protected Map<Long, List<Sample>> checkSamplesPermissions(Query query, QueryOptions queryOptions, VariantDBAdaptor dbAdaptor, String sessionId) throws CatalogException {
final Map<Long, List<Sample>> samplesMap;
if (query.containsKey(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key())) {
Map<Integer, List<Integer>> samplesToReturn = dbAdaptor.getReturnedSamples(query, queryOptions);
samplesMap = new HashMap<>();
for (Map.Entry<Integer, List<Integer>> entry : samplesToReturn.entrySet()) {
if (!entry.getValue().isEmpty()) {
QueryResult<Sample> samplesQueryResult = catalogManager.getAllSamples(entry.getKey(),
new Query(SampleDBAdaptor.QueryParams.ID.key(), entry.getValue()),
new QueryOptions("exclude", Arrays.asList("projects.studies.samples.annotationSets",
"projects.studies.samples.attributes"))
, sessionId);
if (samplesQueryResult.getNumResults() != entry.getValue().size()) {
throw new CatalogAuthorizationException("Permission denied. User " + catalogManager.getUserIdBySessionId(sessionId)
+ " can't read all the requested samples");
}
samplesMap.put((long) entry.getKey(), samplesQueryResult.getResult());
}
}
} else {
logger.debug("Missing returned samples! Obtaining returned samples from catalog.");
List<Integer> returnedStudies = dbAdaptor.getReturnedStudies(query, queryOptions);
List<Study> studies = catalogManager.getAllStudies(new Query(StudyDBAdaptor.QueryParams.ID.key(), returnedStudies),
new QueryOptions("include", "projects.studies.id"), sessionId).getResult();
samplesMap = new HashMap<>();
List<Long> returnedSamples = new LinkedList<>();
for (Study study : studies) {
QueryResult<Sample> samplesQueryResult = catalogManager.getAllSamples(study.getId(),
new Query(),
new QueryOptions("exclude", Arrays.asList("projects.studies.samples.annotationSets",
"projects.studies.samples.attributes"))
, sessionId);
samplesQueryResult.getResult().sort((o1, o2) -> Long.compare(o1.getId(), o2.getId()));
samplesMap.put(study.getId(), samplesQueryResult.getResult());
samplesQueryResult.getResult().stream().map(Sample::getId).forEach(returnedSamples::add);
}
query.append(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), returnedSamples);
}
return samplesMap;
}
@Override
public void close() throws Exception {
while (!this.variantDBAdaptor.isEmpty()) {
String key = this.variantDBAdaptor.keys().nextElement();
VariantDBAdaptor adaptor = this.variantDBAdaptor.remove(key);
if (adaptor != null) {
try{
adaptor.close();
} catch (Exception e) {
logger.error("Issue closing VariantDBadaptor", e);
}
}
}
}
protected VariantDBAdaptor getVariantDBAdaptor(long studyId, String sessionId) throws CatalogException, StorageEngineException {
String key = studyId + "_" + sessionId;
if (!this.variantDBAdaptor.containsKey(key)) {
// Set new key
DataStore dataStore = StorageOperation.getDataStore(catalogManager, studyId, File.Bioformat.VARIANT, sessionId);
String storageEngine = dataStore.getStorageEngine();
String dbName = dataStore.getDbName();
try {
this.variantDBAdaptor.computeIfAbsent(key, (str) -> {
try {
return storageEngineFactory.getVariantStorageEngine(storageEngine).getDBAdaptor(dbName);
} catch (ClassNotFoundException | IllegalAccessException | InstantiationException | StorageEngineException e) {
throw new IllegalStateException("Unable to get VariantDBAdaptor", e);
}
});
} catch (IllegalStateException e) {
throw new StorageEngineException("Problems creating VariantDBAdaptor", e);
}
}
return variantDBAdaptor.get(key);
}
protected QueryResult<org.ga4gh.models.Variant> convertToGA4GH(QueryResult<Variant> result) {
Ga4ghVariantConverter<org.ga4gh.models.Variant> converter = Ga4ghVariantConverter.newAvroConverter(false, null);
List<org.ga4gh.models.Variant> gaVariants = converter.apply(result.getResult());
QueryResult<org.ga4gh.models.Variant> gaResult = new QueryResult<>(result.getId(), result.getDbTime(), result.getNumResults(), result.getNumTotalResults(), result.getWarningMsg(), result.getErrorMsg(), gaVariants);
return gaResult;
}
public static Query getVariantQuery(QueryOptions queryOptions) {
Query query = new Query();
for (VariantDBAdaptor.VariantQueryParams queryParams : VariantDBAdaptor.VariantQueryParams.values()) {
if (queryOptions.containsKey(queryParams.key())) {
query.put(queryParams.key(), queryOptions.get(queryParams.key()));
}
}
return query;
}
public VariantSourceDBAdaptor getSourceDBAdaptor(int studyId, String sessionId) throws CatalogException, StorageEngineException {
return getVariantDBAdaptor(studyId, sessionId).getVariantSourceDBAdaptor();
}
}