/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.core.manager.variant; import org.apache.commons.lang.StringUtils; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.tools.variant.converters.ga4gh.Ga4ghVariantConverter; import org.opencb.biodata.tools.variant.converters.ga4gh.factories.AvroGa4GhVariantFactory; import org.opencb.biodata.tools.variant.converters.ga4gh.factories.ProtoGa4GhVariantFactory; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.catalog.db.api.SampleDBAdaptor; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogAuthorizationException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.models.DataStore; import org.opencb.opencga.catalog.models.File; import org.opencb.opencga.catalog.models.Sample; import org.opencb.opencga.catalog.models.Study; import org.opencb.opencga.core.results.VariantQueryResult; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.manager.StorageManager; import org.opencb.opencga.storage.core.manager.models.StudyInfo; import org.opencb.opencga.storage.core.manager.variant.operations.*; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor.VariantQueryParams; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.*; import java.util.function.Function; public class VariantStorageManager extends StorageManager { public static final int LIMIT_DEFAULT = 1000; public static final int LIMIT_MAX = 5000; private final VariantCatalogQueryUtils catalogUtils; public VariantStorageManager(CatalogManager catalogManager, StorageEngineFactory storageEngineFactory) { super(catalogManager, storageEngineFactory); catalogUtils = new VariantCatalogQueryUtils(catalogManager); } public void clearCache(String studyId, String type, String sessionId) throws CatalogException { String userId = catalogManager.getUserManager().getId(sessionId); } // -------------------------// // Import/Export methods // // -------------------------// /** * Loads the given file into an empty study. * * The input file should have, in the same directory, a metadata file, with the same name ended with * {@link org.opencb.opencga.storage.core.variant.io.VariantExporter#METADATA_FILE_EXTENSION} * * * @param inputUri Variants input file in avro format. * @param study Study where to load the variants * @param sessionId User's session id * @throws CatalogException if there is any error with Catalog * @throws IOException if there is any I/O error * @throws StorageEngineException if there si any error loading the variants */ public void importData(URI inputUri, String study, String sessionId) throws CatalogException, IOException, StorageEngineException { VariantExportStorageOperation op = new VariantExportStorageOperation(catalogManager, storageConfiguration); StudyInfo studyInfo = getStudyInfo(study, Collections.emptyList(), sessionId); op.importData(studyInfo, inputUri, sessionId); } /** * Exports the result of the given query and the associated metadata. * @param outputFile Optional output file. If null or empty, will print into the Standard output. Won't export any metadata. * @param outputFormat Output format. * @param study Study to export * @param sessionId User's session id * @return List of generated files * @throws CatalogException if there is any error with Catalog * @throws IOException If there is any IO error * @throws StorageEngineException If there is any error exporting variants */ public List<URI> exportData(String outputFile, VariantOutputFormat outputFormat, String study, String sessionId) throws StorageEngineException, CatalogException, IOException { Query query = new Query(VariantQueryParams.RETURNED_STUDIES.key(), study) .append(VariantQueryParams.STUDIES.key(), study); return exportData(outputFile, outputFormat, query, new QueryOptions(), sessionId); } /** * Exports the result of the given query and the associated metadata. * @param outputFile Optional output file. If null or empty, will print into the Standard output. Won't export any metadata. * @param outputFormat Variant Output format. * @param query Query with the variants to export * @param queryOptions Query options * @param sessionId User's session id * @return List of generated files * @throws CatalogException if there is any error with Catalog * @throws IOException If there is any IO error * @throws StorageEngineException If there is any error exporting variants */ public List<URI> exportData(String outputFile, VariantOutputFormat outputFormat, Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, IOException, StorageEngineException { if (query == null) { query = new Query(); } VariantExportStorageOperation op = new VariantExportStorageOperation(catalogManager, storageConfiguration); catalogUtils.parseQuery(query, sessionId); Set<Long> studies = checkSamplesPermissions(query, queryOptions, sessionId).keySet(); if (studies.isEmpty()) { studies = catalogUtils.getStudies(query, sessionId); } List<StudyInfo> studyInfos = new ArrayList<>(studies.size()); for (Long study : studies) { studyInfos.add(getStudyInfo(String.valueOf(study), Collections.emptyList(), sessionId)); } return op.exportData(studyInfos, query, outputFormat, outputFile, sessionId, queryOptions); } // --------------------------// // Data Operation methods // // --------------------------// public List<StoragePipelineResult> index(String study, String fileId, String outDir, ObjectMap config, String sessionId) throws CatalogException, StorageEngineException, IOException, URISyntaxException { return index(study, Arrays.asList(fileId.split(",")), outDir, config, sessionId); } public List<StoragePipelineResult> index(String study, List<String> files, String outDir, ObjectMap config, String sessionId) throws CatalogException, StorageEngineException, IOException, URISyntaxException { VariantFileIndexerStorageOperation indexOperation = new VariantFileIndexerStorageOperation(catalogManager, storageConfiguration); QueryOptions options = new QueryOptions(config); StudyInfo studyInfo = getStudyInfo(study, files, sessionId); return indexOperation.index(studyInfo, outDir, options, sessionId); } public void searchIndex(String study, String sessionId) throws StorageEngineException, IOException, VariantSearchException, IllegalAccessException, ClassNotFoundException, InstantiationException, CatalogException { searchIndex(study, new Query(), new QueryOptions(), sessionId); } public void searchIndex(String study, Query query, QueryOptions queryOptions, String sessionId) throws StorageEngineException, IOException, VariantSearchException, IllegalAccessException, InstantiationException, ClassNotFoundException, CatalogException { long studyId = catalogManager.getStudyId(study, sessionId); QueryResult<Study> studyObj = catalogManager.getStudyManager().get(studyId, QueryOptions.empty(), sessionId); VariantStorageEngine variantStorageEngine = storageEngineFactory.getVariantStorageEngine(storageConfiguration.getDefaultStorageEngineId()); variantStorageEngine.searchIndex(studyObj.first().getDataStores().get(File.Bioformat.VARIANT).getDbName(), query, queryOptions); } public void deleteStudy(String studyId, String sessionId) { throw new UnsupportedOperationException(); } public void deleteFile(String fileId, String studyId, String sessionId) { throw new UnsupportedOperationException(); } public List<File> annotate(String study, Query query, String outDir, ObjectMap config, String sessionId) throws StorageEngineException, URISyntaxException, CatalogException, IOException { return annotate(null, study, query, outDir, config, sessionId); } public List<File> annotate(String project, String studies, Query query, String outDir, ObjectMap config, String sessionId) throws CatalogException, StorageEngineException, IOException, URISyntaxException { VariantAnnotationStorageOperation annotOperation = new VariantAnnotationStorageOperation(catalogManager, storageConfiguration); List<Long> studyIds; if (StringUtils.isNotEmpty(studies) || StringUtils.isEmpty(project)) { // Only get specific studies if project is missing, or if some study is given studyIds = catalogManager.getStudyIds(studies, sessionId); } else { studyIds = Collections.emptyList(); } List<StudyInfo> studiesList = new ArrayList<>(studyIds.size()); for (Long studyId : studyIds) { studiesList.add(getStudyInfo(studyId.toString(), Collections.emptyList(), sessionId)); } return annotOperation.annotateVariants(project, studiesList, query, outDir, sessionId, config); } public void deleteAnnotation(String annotationId, String studyId, String sessionId) { throw new UnsupportedOperationException(); } public void stats(String study, List<String> cohorts, String outDir, ObjectMap config, String sessionId) throws CatalogException, StorageEngineException, IOException, URISyntaxException { VariantStatsStorageOperation statsOperation = new VariantStatsStorageOperation(catalogManager, storageConfiguration); long studyId = catalogManager.getStudyId(study, sessionId); statsOperation.calculateStats(studyId, cohorts, outDir, new QueryOptions(config), sessionId); } public void deleteStats(List<String> cohorts, String studyId, String sessionId) { throw new UnsupportedOperationException(); } // ---------------------// // Query methods // // ---------------------// public VariantQueryResult<Variant> get(Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, StorageEngineException, IOException { return secure(query, queryOptions, sessionId, dbAdaptor -> { addDefaultLimit(queryOptions); logger.debug("getVariants {}, {}", query, queryOptions); VariantQueryResult<Variant> result = dbAdaptor.get(query, queryOptions); logger.debug("gotVariants {}, {}, in {}ms", result.getNumResults(), result.getNumTotalResults(), result.getDbTime()); return result; }); } @SuppressWarnings("unchecked") public <T> VariantQueryResult<T> get(Query query, QueryOptions queryOptions, String sessionId, Class<T> clazz) throws CatalogException, IOException, StorageEngineException { VariantQueryResult<Variant> result = get(query, queryOptions, sessionId); List<T> variants; if (clazz == Variant.class) { return (VariantQueryResult<T>) result; } else if (clazz == org.ga4gh.models.Variant.class) { Ga4ghVariantConverter<org.ga4gh.models.Variant> converter = new Ga4ghVariantConverter<>(new AvroGa4GhVariantFactory()); variants = (List<T>) converter.apply(result.getResult()); } else if (clazz == ga4gh.Variants.Variant.class) { Ga4ghVariantConverter<ga4gh.Variants.Variant> converter = new Ga4ghVariantConverter<>(new ProtoGa4GhVariantFactory()); variants = (List<T>) converter.apply(result.getResult()); } else { throw new IllegalArgumentException("Unknown variant format " + clazz); } return new VariantQueryResult<>( result.getId(), result.getDbTime(), result.getNumResults(), result.getNumTotalResults(), result.getWarningMsg(), result.getErrorMsg(), variants, result.getSamples()); } //TODO: GroupByFieldEnum public QueryResult groupBy(String field, Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, StorageEngineException, IOException { return (QueryResult) secure(query, queryOptions, sessionId, dbAdaptor -> dbAdaptor.groupBy(query, field, queryOptions)); } public QueryResult rank(Query query, String field, int limit, boolean asc, String sessionId) throws StorageEngineException, CatalogException, IOException { getDefaultLimit(limit, 30, 10); return (QueryResult) secure(query, null, sessionId, dbAdaptor -> dbAdaptor.rank(query, field, limit, asc)); } public QueryResult<Long> count(Query query, String sessionId) throws CatalogException, StorageEngineException, IOException { return secure(query, new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES), sessionId, dbAdaptor -> dbAdaptor.count(query)); } public QueryResult distinct(Query query, String field, String sessionId) throws CatalogException, IOException, StorageEngineException { return (QueryResult) secure(query, new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES), sessionId, dbAdaptor -> dbAdaptor.distinct(query, field)); } public void facet() { throw new UnsupportedOperationException(); } public VariantQueryResult<Variant> getPhased(Variant variant, String study, String sample, String sessionId, QueryOptions options) throws CatalogException, IOException, StorageEngineException { return secure(new Query(VariantQueryParams.STUDIES.key(), study), options, sessionId, dbAdaptor -> dbAdaptor.getPhased(variant.toString(), study, sample, options, 5000)); } public QueryResult getFrequency(Query query, int interval, String sessionId) throws CatalogException, IOException, StorageEngineException { return (QueryResult) secure(query, null, sessionId, dbAdaptor -> { String[] regions = getRegions(query); if (regions.length != 1) { throw new IllegalArgumentException("Unable to calculate histogram with " + regions.length + " regions."); } return dbAdaptor.getFrequency(query, Region.parseRegion(regions[0]), interval); }); } public VariantIterable iterable(String sessionId) throws CatalogException, StorageEngineException { return (query, options) -> { try { return iterator(query, options, sessionId); } catch (CatalogException | StorageEngineException e) { throw new VariantQueryException("Error getting variant iterator", e); } }; } public VariantDBIterator iterator(String sessionId) throws CatalogException, StorageEngineException { return iterator(null, null, sessionId); } public VariantDBIterator iterator(Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, StorageEngineException { long studyId = catalogUtils.getAnyStudyId(query, sessionId); VariantDBAdaptor dbAdaptor = getVariantDBAdaptor(studyId, sessionId); catalogUtils.parseQuery(query, sessionId); checkSamplesPermissions(query, queryOptions, dbAdaptor, sessionId); VariantDBIterator iterator = dbAdaptor.iterator(query, queryOptions); iterator.addCloseable(dbAdaptor); return iterator; } // public <T> VariantDBIterator<T> iterator(Query query, QueryOptions queryOptions, Class<T> clazz, String sessionId) { // return null; // } public VariantQueryResult<Variant> intersect(Query query, QueryOptions queryOptions, List<String> studyIds, String sessionId) throws CatalogException, IOException, StorageEngineException { Query intersectQuery = new Query(query); intersectQuery.put(VariantQueryParams.STUDIES.key(), String.join(VariantDBAdaptorUtils.AND, studyIds)); return get(intersectQuery, queryOptions, sessionId); } public Map<Long, List<Sample>> getSamplesMetadata(Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, StorageEngineException, IOException { long studyId = catalogUtils.getAnyStudyId(query, sessionId); catalogUtils.parseQuery(query, sessionId); try (VariantDBAdaptor variantDBAdaptor = getVariantDBAdaptor(studyId, sessionId)) { return checkSamplesPermissions(query, queryOptions, variantDBAdaptor, sessionId); } } protected VariantDBAdaptor getVariantDBAdaptor(long studyId, String sessionId) throws CatalogException, StorageEngineException { DataStore dataStore = StorageOperation.getDataStore(catalogManager, studyId, File.Bioformat.VARIANT, sessionId); String storageEngine = dataStore.getStorageEngine(); String dbName = dataStore.getDbName(); try { return storageEngineFactory.getVariantStorageEngine(storageEngine).getDBAdaptor(dbName); } catch (ClassNotFoundException | IllegalAccessException | InstantiationException e) { throw new StorageEngineException("Unable to get VariantDBAdaptor", e); } } // Permission related methods private <R> R secure(Query query, QueryOptions queryOptions, String sessionId, Function<VariantDBAdaptor, R> supplier) throws CatalogException, StorageEngineException, IOException { long studyId = catalogUtils.getAnyStudyId(query, sessionId); catalogUtils.parseQuery(query, sessionId); try (VariantDBAdaptor dbAdaptor = getVariantDBAdaptor(studyId, sessionId)) { checkSamplesPermissions(query, queryOptions, dbAdaptor, sessionId); return supplier.apply(dbAdaptor); } } private Map<Long, List<Sample>> checkSamplesPermissions(Query query, QueryOptions queryOptions, String sessionId) throws CatalogException, StorageEngineException, IOException { long studyId = catalogUtils.getAnyStudyId(query, sessionId); try (VariantDBAdaptor dbAdaptor = getVariantDBAdaptor(studyId, sessionId)) { return checkSamplesPermissions(query, queryOptions, dbAdaptor, sessionId); } } // package protected for test visibility Map<Long, List<Sample>> checkSamplesPermissions(Query query, QueryOptions queryOptions, VariantDBAdaptor dbAdaptor, String sessionId) throws CatalogException { final Map<Long, List<Sample>> samplesMap = new HashMap<>(); Set<VariantField> returnedFields = VariantField.getReturnedFields(queryOptions); if (!returnedFields.contains(VariantField.STUDIES)) { return Collections.emptyMap(); } if (VariantDBAdaptorUtils.isReturnedSamplesDefined(query, returnedFields)) { Map<Integer, List<Integer>> samplesToReturn = dbAdaptor.getReturnedSamples(query, queryOptions); for (Map.Entry<Integer, List<Integer>> entry : samplesToReturn.entrySet()) { if (!entry.getValue().isEmpty()) { QueryResult<Sample> samplesQueryResult = catalogManager.getAllSamples(entry.getKey(), new Query(SampleDBAdaptor.QueryParams.ID.key(), entry.getValue()), new QueryOptions("exclude", Arrays.asList("projects.studies.samples.annotationSets", "projects.studies.samples.attributes")), sessionId); if (samplesQueryResult.getNumResults() != entry.getValue().size()) { throw new CatalogAuthorizationException("Permission denied. User " + catalogManager.getUserIdBySessionId(sessionId) + " can't read all the requested samples"); } samplesMap.put((long) entry.getKey(), samplesQueryResult.getResult()); } else { samplesMap.put((long) entry.getKey(), Collections.emptyList()); } } } else { logger.debug("Missing returned samples! Obtaining returned samples from catalog."); List<Integer> returnedStudies = dbAdaptor.getReturnedStudies(query, queryOptions); List<Study> studies = catalogManager.getAllStudies(new Query(StudyDBAdaptor.QueryParams.ID.key(), returnedStudies), new QueryOptions("include", "projects.studies.id"), sessionId).getResult(); if (!returnedFields.contains(VariantField.STUDIES_SAMPLES_DATA)) { for (Integer returnedStudy : returnedStudies) { samplesMap.put(returnedStudy.longValue(), Collections.emptyList()); } } else { List<Long> returnedSamples = new LinkedList<>(); for (Study study : studies) { QueryResult<Sample> samplesQueryResult = catalogManager.getAllSamples(study.getId(), new Query(), new QueryOptions("exclude", Arrays.asList("projects.studies.samples.annotationSets", "projects.studies.samples.attributes")), sessionId); samplesQueryResult.getResult().sort((o1, o2) -> Long.compare(o1.getId(), o2.getId())); samplesMap.put(study.getId(), samplesQueryResult.getResult()); samplesQueryResult.getResult().stream().map(Sample::getId).forEach(returnedSamples::add); } query.append(VariantQueryParams.RETURNED_SAMPLES.key(), returnedSamples); } } return samplesMap; } // Some aux methods private int addDefaultLimit(QueryOptions queryOptions) { return addDefaultLimit(queryOptions, LIMIT_MAX, LIMIT_DEFAULT); } private int addDefaultLimit(QueryOptions queryOptions, int limitMax, int limitDefault) { // Add default limit int limit = getDefaultLimit(queryOptions.getInt(QueryOptions.LIMIT, -1), limitMax, limitDefault); queryOptions.put(QueryOptions.LIMIT, limit); return limit; } private int getDefaultLimit(int limit, int limitMax, int limitDefault) { if (limit > limitMax) { logger.info("Unable to return more than {} variants. Change limit from {} to {}", limitMax, limit, limitMax); } limit = (limit > 0) ? Math.min(limit, limitMax) : limitDefault; return limit; } private String[] getRegions(Query query) { String[] regions; String regionStr = query.getString(VariantQueryParams.REGION.key()); if (!StringUtils.isEmpty(regionStr)) { regions = regionStr.split(","); } else { regions = new String[0]; } return regions; } public static <T extends ObjectMap> Query getVariantQuery(T queryOptions) { Query query = new Query(); for (VariantQueryParams queryParams : VariantQueryParams.values()) { if (queryOptions.containsKey(queryParams.key())) { query.put(queryParams.key(), queryOptions.get(queryParams.key())); } } if (queryOptions.containsKey(VariantCatalogQueryUtils.SAMPLE_FILTER.key())) { query.put(VariantCatalogQueryUtils.SAMPLE_FILTER.key(), queryOptions.get(VariantCatalogQueryUtils.SAMPLE_FILTER.key())); } return query; } @Override public void testConnection() throws StorageEngineException { } }