/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.mongodb.variant; import org.apache.commons.lang3.time.StopWatch; import org.apache.log4j.Level; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.mongodb.MongoDataStoreManager; import org.opencb.opencga.core.auth.IllegalOpenCGACredentialsException; import org.opencb.opencga.core.common.MemoryUsageMonitor; import org.opencb.opencga.storage.core.StoragePipeline; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.config.DatabaseCredentials; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.StoragePipelineException; import org.opencb.opencga.storage.core.metadata.FileStudyConfigurationManager; import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.annotation.DefaultVariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; import org.opencb.opencga.storage.core.variant.io.VariantImporter; import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter; import org.opencb.opencga.storage.mongodb.auth.MongoCredentials; import org.opencb.opencga.storage.mongodb.metadata.MongoDBStudyConfigurationManager; import org.opencb.opencga.storage.mongodb.variant.adaptors.VariantMongoDBAdaptor; import org.opencb.opencga.storage.mongodb.variant.io.db.VariantMongoDBAnnotationDBWriter; import org.opencb.opencga.storage.mongodb.variant.load.MongoVariantImporter; import java.net.URI; import java.net.UnknownHostException; import java.util.*; import java.util.concurrent.TimeUnit; import static org.opencb.opencga.storage.mongodb.variant.MongoDBVariantStorageEngine.MongoDBVariantOptions.*; /** * Created by imedina on 13/08/14. */ public class MongoDBVariantStorageEngine extends VariantStorageEngine { /* * This field defaultValue must be the same that the one at storage-configuration.yml */ public static final String STORAGE_ENGINE_ID = "mongodb"; // Connection to MongoDB. private MongoDataStoreManager mongoDataStoreManager = null; public enum MongoDBVariantOptions { COLLECTION_VARIANTS("collection.variants", "variants"), COLLECTION_FILES("collection.files", "files"), COLLECTION_STUDIES("collection.studies", "studies"), COLLECTION_STAGE("collection.stage", "stage"), BULK_SIZE("bulkSize", 100), DEFAULT_GENOTYPE("defaultGenotype", Arrays.asList("0/0", "0|0")), ALREADY_LOADED_VARIANTS("alreadyLoadedVariants", 0), STAGE("stage", false), STAGE_RESUME("stage.resume", false), STAGE_PARALLEL_WRITE("stage.parallel.write", false), STAGE_CLEAN_WHILE_LOAD("stage.clean.while.load", true), MERGE("merge", false), MERGE_SKIP("merge.skip", false), // Internal use only MERGE_RESUME("merge.resume", false), MERGE_IGNORE_OVERLAPPING_VARIANTS("merge.ignore-overlapping-variants", false), //Do not look for overlapping variants MERGE_PARALLEL_WRITE("merge.parallel.write", false), MERGE_BATCH_SIZE("merge.batch.size", 10); //Number of files to merge directly from first to second collection private final String key; private final Object value; MongoDBVariantOptions(String key, Object value) { this.key = key; this.value = value; } public static boolean isResumeStage(ObjectMap options) { return options.getBoolean(Options.RESUME.key(), Options.RESUME.defaultValue()) || options.getBoolean(STAGE_RESUME.key(), false); } public static boolean isResumeMerge(ObjectMap options) { return options.getBoolean(Options.RESUME.key(), Options.RESUME.defaultValue()) || options.getBoolean(MERGE_RESUME.key(), false); } public String key() { return key; } @SuppressWarnings("unchecked") public <T> T defaultValue() { return (T) value; } } public MongoDBVariantStorageEngine() { //Disable MongoDB useless logging org.apache.log4j.Logger.getLogger("org.mongodb.driver.cluster").setLevel(Level.WARN); org.apache.log4j.Logger.getLogger("org.mongodb.driver.connection").setLevel(Level.WARN); } @Override public void testConnection() throws StorageEngineException { ObjectMap options = configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions(); String dbName = options.getString(VariantStorageEngine.Options.DB_NAME.key()); MongoCredentials credentials = getMongoCredentials(dbName); if (!credentials.check()) { logger.error("Connection to database '{}' failed", dbName); throw new StorageEngineException("Database connection test failed"); } } @Override protected VariantImporter newVariantImporter(VariantDBAdaptor dbAdaptor) { VariantMongoDBAdaptor mongoDBAdaptor = (VariantMongoDBAdaptor) dbAdaptor; return new MongoVariantImporter(mongoDBAdaptor); } @Override public MongoDBVariantStoragePipeline newStoragePipeline(boolean connected) throws StorageEngineException { VariantMongoDBAdaptor dbAdaptor = connected ? getDBAdaptor(null) : null; return new MongoDBVariantStoragePipeline(configuration, STORAGE_ENGINE_ID, dbAdaptor); } @Override protected VariantAnnotationManager newVariantAnnotationManager(VariantAnnotator annotator, VariantDBAdaptor dbAdaptor) { VariantMongoDBAdaptor mongoDBAdaptor = (VariantMongoDBAdaptor) dbAdaptor; return new DefaultVariantAnnotationManager(annotator, dbAdaptor) { @Override protected VariantAnnotationDBWriter newVariantAnnotationDBWriter(VariantDBAdaptor dbAdaptor, QueryOptions options) { return new VariantMongoDBAnnotationDBWriter(options, mongoDBAdaptor); } }; } @Override public void dropFile(String study, int fileId) throws StorageEngineException { ObjectMap options = new ObjectMap(configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions()); getDBAdaptor().deleteFile(study, Integer.toString(fileId), new QueryOptions(options)); } @Override public void dropStudy(String studyName) throws StorageEngineException { ObjectMap options = new ObjectMap(configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions()); getDBAdaptor().deleteStudy(studyName, new QueryOptions(options)); } @Override public VariantMongoDBAdaptor getDBAdaptor() throws StorageEngineException { return getDBAdaptor(null); } @Override public List<StoragePipelineResult> index(List<URI> inputFiles, URI outdirUri, boolean doExtract, boolean doTransform, boolean doLoad) throws StorageEngineException { Map<URI, MongoDBVariantStoragePipeline> storageResultMap = new LinkedHashMap<>(); Map<URI, StoragePipelineResult> resultsMap = new LinkedHashMap<>(); LinkedList<StoragePipelineResult> results = new LinkedList<>(); MemoryUsageMonitor monitor = new MemoryUsageMonitor(); monitor.setDelay(5000); // monitor.start(); try { for (URI inputFile : inputFiles) { StoragePipelineResult storagePipelineResult = new StoragePipelineResult(inputFile); MongoDBVariantStoragePipeline storagePipeline = newStoragePipeline(doLoad); storagePipeline.getOptions().append(VariantStorageEngine.Options.ISOLATE_FILE_FROM_STUDY_CONFIGURATION.key(), true); storageResultMap.put(inputFile, storagePipeline); resultsMap.put(inputFile, storagePipelineResult); results.add(storagePipelineResult); } if (doExtract) { for (Map.Entry<URI, MongoDBVariantStoragePipeline> entry : storageResultMap.entrySet()) { URI uri = entry.getValue().extract(entry.getKey(), outdirUri); resultsMap.get(entry.getKey()).setExtractResult(uri); } } if (doTransform) { for (Map.Entry<URI, MongoDBVariantStoragePipeline> entry : storageResultMap.entrySet()) { StoragePipelineResult result = resultsMap.get(entry.getKey()); URI input = result.getExtractResult() == null ? entry.getKey() : result.getExtractResult(); transformFile(entry.getValue(), result, results, input, outdirUri); } } boolean doStage = getOptions().getBoolean(STAGE.key()); boolean doMerge = getOptions().getBoolean(MERGE.key()); if (!doStage && !doMerge) { doStage = true; doMerge = true; } if (doLoad) { int batchLoad = getOptions().getInt(MERGE_BATCH_SIZE.key(), MERGE_BATCH_SIZE.defaultValue()); // Files to merge List<Integer> filesToMerge = new ArrayList<>(batchLoad); List<StoragePipelineResult> resultsToMerge = new ArrayList<>(batchLoad); List<Integer> mergedFiles = new ArrayList<>(); Iterator<Map.Entry<URI, MongoDBVariantStoragePipeline>> iterator = storageResultMap.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry<URI, MongoDBVariantStoragePipeline> entry = iterator.next(); StoragePipelineResult result = resultsMap.get(entry.getKey()); URI input = result.getPostTransformResult() == null ? entry.getKey() : result.getPostTransformResult(); MongoDBVariantStoragePipeline storagePipeline = entry.getValue(); StopWatch loadWatch = StopWatch.createStarted(); try { storagePipeline.getOptions().put(STAGE.key(), doStage); storagePipeline.getOptions().put(MERGE.key(), doMerge); logger.info("PreLoad '{}'", input); input = storagePipeline.preLoad(input, outdirUri); result.setPreLoadResult(input); if (doStage) { logger.info("Load - Stage '{}'", input); storagePipeline.stage(input); result.setLoadResult(input); result.setLoadStats(storagePipeline.getLoadStats()); result.getLoadStats().put(STAGE.key(), true); result.setLoadTimeMillis(loadWatch.getTime(TimeUnit.MILLISECONDS)); } if (doMerge) { logger.info("Load - Merge '{}'", input); filesToMerge.add(storagePipeline.getOptions().getInt(Options.FILE_ID.key())); resultsToMerge.add(result); if (filesToMerge.size() == batchLoad || !iterator.hasNext()) { StopWatch mergeWatch = StopWatch.createStarted(); try { storagePipeline.merge(new ArrayList<>(filesToMerge)); } catch (Exception e) { for (StoragePipelineResult storagePipelineResult : resultsToMerge) { storagePipelineResult.setLoadError(e); } throw new StoragePipelineException("Exception executing merge.", e, results); } finally { long mergeTime = mergeWatch.getTime(TimeUnit.MILLISECONDS); for (StoragePipelineResult storagePipelineResult : resultsToMerge) { storagePipelineResult.setLoadTimeMillis(storagePipelineResult.getLoadTimeMillis() + mergeTime); for (Map.Entry<String, Object> statsEntry : storagePipeline.getLoadStats().entrySet()) { storagePipelineResult.getLoadStats().putIfAbsent(statsEntry.getKey(), statsEntry.getValue()); } storagePipelineResult.setLoadExecuted(true); } mergedFiles.addAll(filesToMerge); filesToMerge.clear(); resultsToMerge.clear(); } } else { // We don't execute merge for this file storagePipeline.getOptions().put(MERGE.key(), false); } } logger.info("PostLoad '{}'", input); input = storagePipeline.postLoad(input, outdirUri); result.setPostLoadResult(input); } catch (Exception e) { if (result.getLoadError() == null) { result.setLoadError(e); } if (!(e instanceof StoragePipelineException)) { throw new StoragePipelineException("Exception executing load: " + e.getMessage(), e, results); } else { throw e; } } finally { if (result.getLoadTimeMillis() == 0) { result.setLoadTimeMillis(loadWatch.getTime(TimeUnit.MILLISECONDS)); } if (result.getLoadStats() == null) { result.setLoadStats(storagePipeline.getLoadStats()); } } } if (doMerge) { annotateLoadedFiles(outdirUri, inputFiles, results, getOptions()); calculateStatsForLoadedFiles(outdirUri, inputFiles, results, getOptions()); } } } finally { // monitor.interrupt(); for (StoragePipeline storagePipeline : storageResultMap.values()) { storagePipeline.close(); } } return results; } @Override public VariantMongoDBAdaptor getDBAdaptor(String dbName) throws StorageEngineException { MongoCredentials credentials = getMongoCredentials(dbName); VariantMongoDBAdaptor variantMongoDBAdaptor; ObjectMap options = new ObjectMap(configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions()); if (dbName != null && !dbName.isEmpty()) { options.append(VariantStorageEngine.Options.DB_NAME.key(), dbName); } String variantsCollection = options.getString(COLLECTION_VARIANTS.key(), COLLECTION_VARIANTS.defaultValue()); String filesCollection = options.getString(COLLECTION_FILES.key(), COLLECTION_FILES.defaultValue()); MongoDataStoreManager mongoDataStoreManager = getMongoDataStoreManager(); try { StudyConfigurationManager studyConfigurationManager = getStudyConfigurationManager(options); variantMongoDBAdaptor = new VariantMongoDBAdaptor(mongoDataStoreManager, credentials, variantsCollection, filesCollection, studyConfigurationManager, configuration); } catch (UnknownHostException e) { throw new IllegalArgumentException(e); } logger.debug("getting DBAdaptor to db: {}", credentials.getMongoDbName()); return variantMongoDBAdaptor; } MongoCredentials getMongoCredentials(String dbName) { ObjectMap options = configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions(); // If no database name is provided, read from the configuration file if (dbName == null || dbName.isEmpty()) { dbName = options.getString(VariantStorageEngine.Options.DB_NAME.key(), VariantStorageEngine.Options.DB_NAME.defaultValue()); } DatabaseCredentials database = configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getDatabase(); try { return new MongoCredentials(database, dbName); } catch (IllegalOpenCGACredentialsException e) { e.printStackTrace(); return null; } } @Override protected StudyConfigurationManager buildStudyConfigurationManager(ObjectMap options) throws StorageEngineException { if (options != null && !options.getString(FileStudyConfigurationManager.STUDY_CONFIGURATION_PATH, "").isEmpty()) { return super.buildStudyConfigurationManager(options); } else { String dbName = options == null ? null : options.getString(VariantStorageEngine.Options.DB_NAME.key()); String collectionName = options == null ? null : options.getString(COLLECTION_STUDIES.key(), COLLECTION_STUDIES.defaultValue()); try { return new MongoDBStudyConfigurationManager(getMongoDataStoreManager(), getMongoCredentials(dbName), collectionName); // return getDBAdaptor(dbName).getStudyConfigurationManager(); } catch (UnknownHostException e) { throw new StorageEngineException("Unable to build MongoStorageConfigurationManager", e); } } } private synchronized MongoDataStoreManager getMongoDataStoreManager() { if (mongoDataStoreManager == null) { mongoDataStoreManager = new MongoDataStoreManager(getMongoCredentials(null).getDataStoreServerAddresses()); } return mongoDataStoreManager; } public synchronized void close() { if (mongoDataStoreManager != null) { mongoDataStoreManager.close(); mongoDataStoreManager = null; } } }