/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.app.cli.client.executors; import com.beust.jcommander.ParameterException; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.io.FileFormatException; import org.opencb.biodata.formats.variant.vcf4.VcfUtils; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.tools.variant.converters.VariantContextToAvroVariantConverter; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.commons.utils.FileUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.storage.app.cli.CommandExecutor; import org.opencb.opencga.storage.app.cli.GeneralCliOptions; import org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.config.StorageEngineConfiguration; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.metadata.FileStudyConfigurationManager; import org.opencb.opencga.storage.core.metadata.StudyConfiguration; import org.opencb.opencga.storage.core.search.VariantSearchManager; import org.opencb.opencga.storage.core.search.VariantSearchModel; import org.opencb.opencga.storage.core.search.solr.SolrVariantSearchIterator; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator; import org.opencb.opencga.storage.core.variant.annotation.DefaultVariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotatorException; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotatorFactory; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.stats.DefaultVariantStatisticsManager; import java.io.*; import java.net.URI; import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; /** * Created by imedina on 02/03/15. */ public class VariantCommandExecutor extends CommandExecutor { private StorageEngineConfiguration storageConfiguration; private VariantStorageEngine variantStorageEngine; private StorageVariantCommandOptions variantCommandOptions; public VariantCommandExecutor(StorageVariantCommandOptions variantCommandOptions) { super(variantCommandOptions.commonCommandOptions); this.variantCommandOptions = variantCommandOptions; } private void configure(GeneralCliOptions.CommonOptions commonOptions) throws Exception { this.logFile = commonOptions.logFile; /** * Getting VariantStorageEngine * We need to find out the Storage Engine Id to be used * If not storage engine is passed then the default is taken from storage-configuration.yml file **/ this.storageEngine = (storageEngine != null && !storageEngine.isEmpty()) ? storageEngine : configuration.getDefaultStorageEngineId(); logger.debug("Storage Engine set to '{}'", this.storageEngine); this.storageConfiguration = configuration.getStorageEngine(storageEngine); // TODO: Start passing catalogManager StorageEngineFactory storageEngineFactory = StorageEngineFactory.get(configuration); if (storageEngine == null || storageEngine.isEmpty()) { this.variantStorageEngine = storageEngineFactory.getVariantStorageEngine(); } else { this.variantStorageEngine = storageEngineFactory.getVariantStorageEngine(storageEngine); } } @Override public void execute() throws Exception { logger.debug("Executing variant command line"); // String subCommandString = variantCommandOptions.getParsedSubCommand(); String subCommandString = getParsedSubCommand(variantCommandOptions.jCommander); switch (subCommandString) { case "index": configure(variantCommandOptions.indexVariantsCommandOptions.commonOptions); index(); break; case "query": configure(variantCommandOptions.variantQueryCommandOptions.commonOptions); query(); break; // case "query-grpc": // configure(variantCommandOptions.queryVariantsCommandOptions.commonOptions); // queryGrpc(); // break; case "import": configure(variantCommandOptions.importVariantsCommandOptions.commonOptions); importData(); break; case "annotate": configure(variantCommandOptions.annotateVariantsCommandOptions.commonOptions); annotation(); break; case "stats": configure(variantCommandOptions.statsVariantsCommandOptions.commonOptions); stats(); break; case "export": configure(variantCommandOptions.exportVariantsCommandOptions.queryOptions.commonOptions); export(); break; case "search": configure(variantCommandOptions.searchVariantsCommandOptions.commonOptions); search(); break; // case "benchmark": // configure(variantCommandOptions.statsVariantsCommandOptions.commonOptions); // benchmark(); // break; default: logger.error("Subcommand not valid"); break; } } private void index() throws URISyntaxException, IOException, StorageEngineException, FileFormatException { StorageVariantCommandOptions.VariantIndexCommandOptions indexVariantsCommandOptions = variantCommandOptions.indexVariantsCommandOptions; List<URI> inputUris = new LinkedList<>(); String inputs[] = indexVariantsCommandOptions.commonIndexOptions.input.split(","); for (String uri: inputs) { URI variantsUri = UriUtils.createUri(uri); if (variantsUri.getScheme().startsWith("file") || variantsUri.getScheme().isEmpty()) { FileUtils.checkFile(Paths.get(variantsUri)); } inputUris.add(variantsUri); } // URI pedigreeUri = (indexVariantsCommandOptions.pedigree != null && !indexVariantsCommandOptions.pedigree.isEmpty()) // ? UriUtils.createUri(indexVariantsCommandOptions.pedigree) // : null; // if (pedigreeUri != null) { // FileUtils.checkFile(Paths.get(pedigreeUri)); // } URI outdirUri = (indexVariantsCommandOptions.commonIndexOptions.outdir != null && !indexVariantsCommandOptions.commonIndexOptions.outdir.isEmpty()) ? UriUtils.createDirectoryUri(indexVariantsCommandOptions.commonIndexOptions.outdir) // Get parent folder from input file : inputUris.get(0).resolve("."); if (outdirUri.getScheme().startsWith("file") || outdirUri.getScheme().isEmpty()) { FileUtils.checkDirectory(Paths.get(outdirUri), true); } logger.debug("All files and directories exist"); // VariantSource source = new VariantSource(fileName, indexVariantsCommandOptions.fileId, // indexVariantsCommandOptions.studyId, indexVariantsCommandOptions.study, indexVariantsCommandOptions.studyType, // indexVariantsCommandOptions.aggregated); /** Add CLi options to the variant options **/ ObjectMap params = storageConfiguration.getVariant().getOptions(); params.put(VariantStorageEngine.Options.STUDY_NAME.key(), indexVariantsCommandOptions.studyName); params.put(VariantStorageEngine.Options.STUDY_ID.key(), indexVariantsCommandOptions.studyId); params.put(VariantStorageEngine.Options.FILE_ID.key(), indexVariantsCommandOptions.fileId); params.put(VariantStorageEngine.Options.SAMPLE_IDS.key(), indexVariantsCommandOptions.sampleIds); params.put(VariantStorageEngine.Options.CALCULATE_STATS.key(), indexVariantsCommandOptions.calculateStats); params.put(VariantStorageEngine.Options.INCLUDE_STATS.key(), indexVariantsCommandOptions.includeStats); params.put(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), indexVariantsCommandOptions.excludeGenotype); params.put(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), indexVariantsCommandOptions.extraFields); // variantOptions.put(VariantStorageEngine.Options.INCLUDE_SRC.key(), indexVariantsCommandOptions.includeSrc); // variantOptions.put(VariantStorageEngine.Options.COMPRESS_GENOTYPES.key(), indexVariantsCommandOptions.compressGenotypes); params.put(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), indexVariantsCommandOptions.aggregated); params.putIfNotEmpty(VariantStorageEngine.Options.DB_NAME.key(), indexVariantsCommandOptions.commonIndexOptions.dbName); params.put(VariantStorageEngine.Options.ANNOTATE.key(), indexVariantsCommandOptions.annotate); if (indexVariantsCommandOptions.annotator != null) { params.put(VariantAnnotationManager.ANNOTATION_SOURCE, indexVariantsCommandOptions.annotator); } params.put(VariantAnnotationManager.OVERWRITE_ANNOTATIONS, indexVariantsCommandOptions.overwriteAnnotations); if (indexVariantsCommandOptions.studyConfigurationFile != null && !indexVariantsCommandOptions.studyConfigurationFile.isEmpty()) { params.put(FileStudyConfigurationManager.STUDY_CONFIGURATION_PATH, indexVariantsCommandOptions.studyConfigurationFile); } params.put(VariantStorageEngine.Options.RESUME.key(), indexVariantsCommandOptions.resume); if (indexVariantsCommandOptions.aggregationMappingFile != null) { // TODO move this options to new configuration.yml Properties aggregationMappingProperties = new Properties(); try { aggregationMappingProperties.load(new FileInputStream(indexVariantsCommandOptions.aggregationMappingFile)); params.put(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), aggregationMappingProperties); } catch (FileNotFoundException e) { logger.error("Aggregation mapping file {} not found. Population stats won't be parsed.", indexVariantsCommandOptions .aggregationMappingFile); } } if (indexVariantsCommandOptions.commonOptions.params != null) { params.putAll(indexVariantsCommandOptions.commonOptions.params); } logger.debug("Configuration options: {}", params.toJson()); /** Execute ETL steps **/ boolean doExtract, doTransform, doLoad; if (!indexVariantsCommandOptions.indexSearch) { if (!indexVariantsCommandOptions.load && !indexVariantsCommandOptions.transform) { doExtract = true; doTransform = true; doLoad = true; } else { doExtract = indexVariantsCommandOptions.transform; doTransform = indexVariantsCommandOptions.transform; doLoad = indexVariantsCommandOptions.load; } variantStorageEngine.index(inputUris, outdirUri, doExtract, doTransform, doLoad); } else { try { variantStorageEngine.searchIndex(indexVariantsCommandOptions.commonIndexOptions.dbName); } catch (VariantSearchException e) { e.printStackTrace(); } } } private void query() throws Exception { StorageVariantCommandOptions.VariantQueryCommandOptions variantQueryCommandOptions = variantCommandOptions.variantQueryCommandOptions; // if (true) { //// System.out.println(variantCommandOptions.queryVariantsCommandOptions.toString()); // System.out.println(new ObjectMapper().writer().withDefaultPrettyPrinter().writeValueAsString(variantCommandOptions // .variantQueryCommandOptions)); // return; // } storageConfiguration.getVariant().getOptions().putAll(variantQueryCommandOptions.commonOptions.params); VariantDBAdaptor variantDBAdaptor = variantStorageEngine.getDBAdaptor(variantQueryCommandOptions.commonQueryOptions.dbName); List<String> studyNames = variantDBAdaptor.getStudyConfigurationManager().getStudyNames(new QueryOptions()); Query query = VariantQueryCommandUtils.parseQuery(variantQueryCommandOptions, studyNames); QueryOptions options = VariantQueryCommandUtils.parseQueryOptions(variantQueryCommandOptions); options.put("summary", variantQueryCommandOptions.summary); if (variantQueryCommandOptions.commonQueryOptions.count) { QueryResult<Long> result = variantDBAdaptor.count(query); System.out.println("Num. results\t" + result.getResult().get(0)); } else if (StringUtils.isNotEmpty(variantQueryCommandOptions.rank)) { executeRank(query, variantDBAdaptor, variantQueryCommandOptions); } else if (StringUtils.isNotEmpty(variantQueryCommandOptions.groupBy)) { ObjectMapper objectMapper = new ObjectMapper(); QueryResult groupBy = variantDBAdaptor.groupBy(query, variantQueryCommandOptions.groupBy, options); System.out.println("groupBy = " + objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(groupBy)); } else { URI uri = StringUtils.isEmpty(variantQueryCommandOptions.commonQueryOptions.output) ? null : UriUtils.createUri(variantQueryCommandOptions.commonQueryOptions.output); if (variantQueryCommandOptions.annotations != null) { options.add("annotations", variantQueryCommandOptions.annotations); } VariantWriterFactory.VariantOutputFormat of = VariantWriterFactory .toOutputFormat(variantQueryCommandOptions.outputFormat, variantQueryCommandOptions.commonQueryOptions.output); variantStorageEngine.exportData(uri, of, variantQueryCommandOptions.commonQueryOptions.dbName, query, options); } } private void importData() throws URISyntaxException, StorageEngineException, IOException { StorageVariantCommandOptions.ImportVariantsCommandOptions importVariantsOptions = variantCommandOptions.importVariantsCommandOptions; URI uri = UriUtils.createUri(importVariantsOptions.input); ObjectMap options = new ObjectMap(); options.putAll(importVariantsOptions.commonOptions.params); variantStorageEngine.importData(uri, importVariantsOptions.dbName, options); } private void annotation() throws StorageEngineException, IOException, URISyntaxException, VariantAnnotatorException { StorageVariantCommandOptions.VariantAnnotateCommandOptions annotateVariantsCommandOptions = variantCommandOptions.annotateVariantsCommandOptions; VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(annotateVariantsCommandOptions.dbName); /* * Create Annotator */ ObjectMap options = configuration.getStorageEngine(storageEngine).getVariant().getOptions(); if (annotateVariantsCommandOptions.annotator != null) { options.put(VariantAnnotationManager.ANNOTATION_SOURCE, annotateVariantsCommandOptions.annotator); } if (annotateVariantsCommandOptions.customAnnotationKey != null) { options.put(VariantAnnotationManager.CUSTOM_ANNOTATION_KEY, annotateVariantsCommandOptions.customAnnotationKey); } if (annotateVariantsCommandOptions.species != null) { options.put(VariantAnnotationManager.SPECIES, annotateVariantsCommandOptions.species); } if (annotateVariantsCommandOptions.assembly != null) { options.put(VariantAnnotationManager.ASSEMBLY, annotateVariantsCommandOptions.assembly); } options.putAll(annotateVariantsCommandOptions.commonOptions.params); VariantAnnotator annotator = VariantAnnotatorFactory.buildVariantAnnotator(configuration, storageEngine, options); // VariantAnnotator annotator = VariantAnnotationManager.buildVariantAnnotator(annotatorSource, annotatorProperties, // annotateVariantsCommandOptions.species, annotateVariantsCommandOptions.assembly); DefaultVariantAnnotationManager variantAnnotationManager = new DefaultVariantAnnotationManager(annotator, dbAdaptor); /* * Annotation options */ Query query = new Query(); if (annotateVariantsCommandOptions.filterRegion != null) { query.put(VariantDBAdaptor.VariantQueryParams.REGION.key(), annotateVariantsCommandOptions.filterRegion); } if (annotateVariantsCommandOptions.filterChromosome != null) { query.put(VariantDBAdaptor.VariantQueryParams.CHROMOSOME.key(), annotateVariantsCommandOptions.filterChromosome); } if (annotateVariantsCommandOptions.filterGene != null) { query.put(VariantDBAdaptor.VariantQueryParams.GENE.key(), annotateVariantsCommandOptions.filterGene); } if (annotateVariantsCommandOptions.filterAnnotConsequenceType != null) { query.put(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE.key(), annotateVariantsCommandOptions.filterAnnotConsequenceType); } if (!annotateVariantsCommandOptions.overwriteAnnotations) { query.put(VariantDBAdaptor.VariantQueryParams.ANNOTATION_EXISTS.key(), false); } URI outputUri = UriUtils.createUri(annotateVariantsCommandOptions.outdir == null ? "." : annotateVariantsCommandOptions.outdir); Path outDir = Paths.get(outputUri.resolve(".").getPath()); /* * Create and load annotations */ boolean doCreate = annotateVariantsCommandOptions.create, doLoad = annotateVariantsCommandOptions.load != null; if (!annotateVariantsCommandOptions.create && annotateVariantsCommandOptions.load == null) { doCreate = true; doLoad = true; } URI annotationFile = null; if (doCreate) { long start = System.currentTimeMillis(); logger.info("Starting annotation creation "); annotationFile = variantAnnotationManager.createAnnotation(outDir, annotateVariantsCommandOptions.fileName == null ? annotateVariantsCommandOptions.dbName : annotateVariantsCommandOptions.fileName, query, new QueryOptions(options)); logger.info("Finished annotation creation {}ms", System.currentTimeMillis() - start); } if (doLoad) { long start = System.currentTimeMillis(); logger.info("Starting annotation load"); if (annotationFile == null) { // annotationFile = new URI(null, c.load, null); annotationFile = Paths.get(annotateVariantsCommandOptions.load).toUri(); } variantAnnotationManager.loadAnnotation(annotationFile, new QueryOptions(options)); logger.info("Finished annotation load {}ms", System.currentTimeMillis() - start); } } private void stats() throws IOException, URISyntaxException, StorageEngineException, IllegalAccessException, InstantiationException, ClassNotFoundException { StorageVariantCommandOptions.VariantStatsCommandOptions statsVariantsCommandOptions = variantCommandOptions.statsVariantsCommandOptions; ObjectMap options = storageConfiguration.getVariant().getOptions(); if (statsVariantsCommandOptions.dbName != null && !statsVariantsCommandOptions.dbName.isEmpty()) { options.put(VariantStorageEngine.Options.DB_NAME.key(), statsVariantsCommandOptions.dbName); } options.put(VariantStorageEngine.Options.OVERWRITE_STATS.key(), statsVariantsCommandOptions.overwriteStats); options.put(VariantStorageEngine.Options.UPDATE_STATS.key(), statsVariantsCommandOptions.updateStats); options.putIfNotEmpty(VariantStorageEngine.Options.FILE_ID.key(), statsVariantsCommandOptions.fileId); options.put(VariantStorageEngine.Options.STUDY_ID.key(), statsVariantsCommandOptions.studyId); if (statsVariantsCommandOptions.studyConfigurationFile != null && !statsVariantsCommandOptions.studyConfigurationFile.isEmpty()) { options.put(FileStudyConfigurationManager.STUDY_CONFIGURATION_PATH, statsVariantsCommandOptions.studyConfigurationFile); } options.put(VariantStorageEngine.Options.RESUME.key(), statsVariantsCommandOptions.resume); if (statsVariantsCommandOptions.commonOptions.params != null) { options.putAll(statsVariantsCommandOptions.commonOptions.params); } Map<String, Set<String>> cohorts = null; if (statsVariantsCommandOptions.cohort != null && !statsVariantsCommandOptions.cohort.isEmpty()) { cohorts = new LinkedHashMap<>(statsVariantsCommandOptions.cohort.size()); for (Map.Entry<String, String> entry : statsVariantsCommandOptions.cohort.entrySet()) { List<String> samples = Arrays.asList(entry.getValue().split(",")); if (samples.size() == 1 && samples.get(0).isEmpty()) { samples = new ArrayList<>(); } cohorts.put(entry.getKey(), new HashSet<>(samples)); } } options.put(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), statsVariantsCommandOptions.aggregated); if (statsVariantsCommandOptions.aggregationMappingFile != null) { Properties aggregationMappingProperties = new Properties(); try { aggregationMappingProperties.load(new FileInputStream(statsVariantsCommandOptions.aggregationMappingFile)); options.put(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), aggregationMappingProperties); } catch (FileNotFoundException e) { logger.error("Aggregation mapping file {} not found. Population stats won't be parsed.", statsVariantsCommandOptions .aggregationMappingFile); } } /** * Create DBAdaptor */ VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(options.getString(VariantStorageEngine.Options.DB_NAME.key())); // dbAdaptor.setConstantSamples(Integer.toString(statsVariantsCommandOptions.fileId)); // TODO jmmut: change to studyId when we // remove fileId StudyConfiguration studyConfiguration = dbAdaptor.getStudyConfigurationManager() .getStudyConfiguration(statsVariantsCommandOptions.studyId, new QueryOptions(options)).first(); if (studyConfiguration == null) { studyConfiguration = new StudyConfiguration(Integer.parseInt(statsVariantsCommandOptions.studyId), statsVariantsCommandOptions.dbName); } /** * Create and load stats */ URI outputUri = UriUtils.createUri(statsVariantsCommandOptions.fileName == null ? "" : statsVariantsCommandOptions.fileName); URI directoryUri = outputUri.resolve("."); String filename = outputUri.equals(directoryUri) ? VariantStorageEngine.buildFilename(studyConfiguration.getStudyName(), Integer.parseInt(statsVariantsCommandOptions.fileId)) : Paths.get(outputUri.getPath()).getFileName().toString(); // assertDirectoryExists(directoryUri); DefaultVariantStatisticsManager variantStatisticsManager = new DefaultVariantStatisticsManager(dbAdaptor); boolean doCreate = true; boolean doLoad = true; // doCreate = statsVariantsCommandOptions.create; // doLoad = statsVariantsCommandOptions.load != null; // if (!statsVariantsCommandOptions.create && statsVariantsCommandOptions.load == null) { // doCreate = doLoad = true; // } else if (statsVariantsCommandOptions.load != null) { // filename = statsVariantsCommandOptions.load; // } try { Map<String, Integer> cohortIds = statsVariantsCommandOptions.cohortIds.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> Integer.parseInt(e.getValue()))); QueryOptions queryOptions = new QueryOptions(options); if (doCreate) { filename += "." + TimeUtils.getTime(); outputUri = outputUri.resolve(filename); outputUri = variantStatisticsManager.createStats(dbAdaptor, outputUri, cohorts, cohortIds, studyConfiguration, queryOptions); } if (doLoad) { outputUri = outputUri.resolve(filename); variantStatisticsManager.loadStats(dbAdaptor, outputUri, studyConfiguration, queryOptions); } } catch (Exception e) { // file not found? wrong file id or study id? bad parameters to ParallelTaskRunner? e.printStackTrace(); logger.error(e.getMessage()); } } private void export() throws URISyntaxException, StorageEngineException, IOException { StorageVariantCommandOptions.VariantExportCommandOptions exportVariantsCommandOptions = variantCommandOptions.exportVariantsCommandOptions; // // ObjectMap options = storageConfiguration.getVariant().getOptions(); // if (exportVariantsCommandOptions.dbName != null && !exportVariantsCommandOptions.dbName.isEmpty()) { // options.put(VariantStorageEngine.Options.DB_NAME.key(), exportVariantsCommandOptions.dbName); // } // options.putIfNotEmpty(VariantStorageEngine.Options.FILE_ID.key(), exportVariantsCommandOptions.fileId); // options.put(VariantStorageEngine.Options.STUDY_ID.key(), exportVariantsCommandOptions.studyId); // if (exportVariantsCommandOptions.studyConfigurationFile != null && !exportVariantsCommandOptions.studyConfigurationFile.isEmpty()) { // options.put(FileStudyConfigurationManager.STUDY_CONFIGURATION_PATH, exportVariantsCommandOptions.studyConfigurationFile); // } // // if (exportVariantsCommandOptions.commonOptions.params != null) { // options.putAll(exportVariantsCommandOptions.commonOptions.params); // } // // // VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(exportVariantsCommandOptions.dbName); // // URI outputUri = UriUtils.createUri(exportVariantsCommandOptions.outFilename == null ? "" : exportVariantsCommandOptions.outFilename); // URI directoryUri = outputUri.resolve("."); // StudyConfiguration studyConfiguration = dbAdaptor.getStudyConfigurationManager() // .getStudyConfiguration(exportVariantsCommandOptions.studyId, new QueryOptions(options)).first(); // if (studyConfiguration == null) { // studyConfiguration = new StudyConfiguration(Integer.parseInt(exportVariantsCommandOptions.studyId), // exportVariantsCommandOptions.dbName); // } // String filename = outputUri.equals(directoryUri) ? VariantStorageEngine.buildFilename(studyConfiguration.getStudyName(), // Integer.parseInt(exportVariantsCommandOptions.fileId)) // : Paths.get(outputUri.getPath()).getFileName().toString(); // // URI outputFile = Paths.get(exportVariantsCommandOptions.outFilename).toUri(); // VariantWriterFactory.VariantOutputFormat outputFormat = VariantWriterFactory.toOutputFormat(null, // outputFile.getPath()); // // Query query = new Query(); // QueryOptions queryOptions = new QueryOptions(); // // variantStorageEngine.exportData(outputFile, outputFormat, exportVariantsCommandOptions.dbName, // query, queryOptions); // storageConfiguration.getVariant().getOptions().putAll(exportVariantsCommandOptions.commonOptions.params); VariantDBAdaptor variantDBAdaptor = variantStorageEngine.getDBAdaptor(exportVariantsCommandOptions.queryOptions.commonQueryOptions.dbName); List<String> studyNames = variantDBAdaptor.getStudyConfigurationManager().getStudyNames(new QueryOptions()); // TODO: JT try { Query query = VariantQueryCommandUtils.parseQuery(exportVariantsCommandOptions.queryOptions, studyNames); QueryOptions options = VariantQueryCommandUtils.parseQueryOptions(exportVariantsCommandOptions.queryOptions); // create VCF header by getting information from metadata or study configuration List<String> cohortNames = null; List<String> annotations = null; List<String> formatFields = null; List<String> formatFieldsType = null; List<String> formatFieldsDescr = null; List<String> sampleNames = null; Function<String, String> converter = null; VCFHeader vcfHeader = VcfUtils.createVCFHeader(cohortNames, annotations, formatFields, formatFieldsType, formatFieldsDescr, sampleNames, converter); // create the variant context writer OutputStream outputStream = new FileOutputStream(exportVariantsCommandOptions.outFilename); Options writerOptions = null; VariantContextWriter writer = VcfUtils.createVariantContextWriter(outputStream, vcfHeader.getSequenceDictionary(), writerOptions); // write VCF header writer.writeHeader(vcfHeader); // TODO: get study id/name VariantContextToAvroVariantConverter variantContextToAvroVariantConverter = new VariantContextToAvroVariantConverter(0, Collections.emptyList(), Collections.emptyList()); VariantDBIterator iterator = variantDBAdaptor.iterator(query, options); while (iterator.hasNext()) { Variant variant = iterator.next(); VariantContext variantContext = variantContextToAvroVariantConverter.from(variant); System.out.println(variantContext.toString()); writer.add(variantContext); } // close writer.close(); outputStream.close(); } catch (Exception e) { e.printStackTrace(); } } /** * search command */ private void search() throws Exception { StorageVariantCommandOptions.VariantSearchCommandOptions searchOptions = variantCommandOptions.searchVariantsCommandOptions; //VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(exportVariantsCommandOptions.dbName); // variantStorageEngine.getConfiguration().getSearch() // TODO: initialize solrUrl and database (i.e.: core/collection name) from the configuration file String solrUrl = (searchOptions.solrUrl == null ? "http://localhost:8983/solr/" : searchOptions.solrUrl); String dbName = (searchOptions.dbName == null ? "variants" : searchOptions.dbName); VariantSearchManager variantSearchManager = new VariantSearchManager(solrUrl, dbName); boolean querying = true; String mode = searchOptions.mode; // create the core or collection if (searchOptions.create) { querying = false; switch (mode.toLowerCase()) { case "core": { if (variantSearchManager.existCore(dbName)) { throw new IllegalArgumentException("Core '" + dbName + "' already exists"); } variantSearchManager.createCore(searchOptions.dbName, searchOptions.solrConfig); break; } case "collection": { if (variantSearchManager.existCollection(dbName)) { throw new IllegalArgumentException("Collection '" + dbName + "' already exists"); } variantSearchManager.createCollection(searchOptions.dbName, searchOptions.solrConfig, searchOptions.numShards, searchOptions.numReplicas); break; } default: { throw new IllegalArgumentException("Invalid value '" + searchOptions.create + "' for the --create parameter. Valid values are 'core' or 'collection'"); } } } // index if (searchOptions.index) { // if (!variantSearchManager.existCore(dbName)) { // throw new IllegalArgumentException("Search " + mode + " '" + dbName + "' does not exists"); // } // querying = false; // Path path = Paths.get(searchOptions.inputFilename); // variantSearchManager.load(dbName, path); VariantStorageEngine variantStorageEngine = StorageEngineFactory.get(configuration).getVariantStorageEngine(); variantStorageEngine.searchIndex(dbName); } // // query // if (querying) { // if (!variantSearchManager.existCore(dbName)) { // throw new IllegalArgumentException("Search " + mode + " '" + dbName + "' does not exists"); // } // int count = 0; // try { // Query query = new Query(); // query = VariantQueryCommandUtils.parseQuery(searchOptions, query); // QueryOptions queryOptions = new QueryOptions(); // VariantQueryCommandUtils.parseQueryOptions(searchOptions); // SolrVariantSearchIterator iterator = variantSearchManager.iterator(dbName, query, queryOptions); // while (iterator.hasNext()) { // VariantSearchModel variantSearch = iterator.next(); // System.out.println("Variant #" + count); // System.out.println(variantSearch.toString()); // count++; // } // } catch (Exception e) { // e.printStackTrace(); // } // System.out.println("Num. variants: " + count); // } } private void executeRank(Query query, VariantDBAdaptor variantDBAdaptor, StorageVariantCommandOptions.VariantQueryCommandOptions variantQueryCommandOptions) throws JsonProcessingException { ObjectMapper objectMapper = new ObjectMapper(); String field = variantQueryCommandOptions.rank; boolean asc = false; if (variantQueryCommandOptions.rank.contains(":")) { // eg. gene:-1 String[] arr = variantQueryCommandOptions.rank.split(":"); field = arr[0]; if (arr[1].endsWith("-1")) { asc = true; } } int limit = 10; if (variantQueryCommandOptions.commonQueryOptions.limit > 0) { limit = variantQueryCommandOptions.commonQueryOptions.limit; } QueryResult rank = variantDBAdaptor.rank(query, field, limit, asc); System.out.println("rank = " + objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(rank)); } private void printJsonResult(VariantDBIterator variantDBIterator, OutputStream outputStream) throws IOException { while (variantDBIterator.hasNext()) { Variant variant = variantDBIterator.next(); outputStream.write(variant.toJson().getBytes()); outputStream.write('\n'); } } // private void benchmark() throws StorageEngineException, InterruptedException, ExecutionException, InstantiationException, // IllegalAccessException, ClassNotFoundException { // StorageVariantCommandOptions.BenchmarkCommandOptions benchmarkCommandOptions = variantCommandOptions.benchmarkCommandOptions; // //// Overwrite default options from configuration.yaml with CLI parameters // if (benchmarkCommandOptions.commonOptions.storageEngine != null && !benchmarkCommandOptions.commonOptions.storageEngine.isEmpty()) { // configuration.getBenchmark().setStorageEngine(benchmarkCommandOptions.commonOptions.storageEngine); // } else { // configuration.getBenchmark().setStorageEngine(configuration.getDefaultStorageEngineId()); // logger.debug("Storage Engine for benchmarking set to '{}'", configuration.getDefaultStorageEngineId()); // } // // if (benchmarkCommandOptions.repetition > 0) { // configuration.getBenchmark().setNumRepetitions(benchmarkCommandOptions.repetition); // } // // if (benchmarkCommandOptions.database != null && !benchmarkCommandOptions.database.isEmpty()) { // configuration.getBenchmark().setDatabaseName(benchmarkCommandOptions.database); // } // // if (benchmarkCommandOptions.table != null && !benchmarkCommandOptions.table.isEmpty()) { // configuration.getBenchmark().setTable(benchmarkCommandOptions.table); // } // // if (benchmarkCommandOptions.queries != null) { // configuration.getBenchmark().setQueries(Arrays.asList(benchmarkCommandOptions.queries.split(","))); // } // // DatabaseCredentials databaseCredentials = configuration.getBenchmark().getDatabase(); // if (benchmarkCommandOptions.host != null && !benchmarkCommandOptions.host.isEmpty()) { // databaseCredentials.setHosts(Arrays.asList(benchmarkCommandOptions.host.split(","))); // } // // if (benchmarkCommandOptions.concurrency > 0) { // configuration.getBenchmark().setConcurrency(benchmarkCommandOptions.concurrency); // } // // logger.debug("Benchmark configuration: {}", configuration.getBenchmark()); // // // validate // checkParams(); // //// VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(benchmarkCommandOptions.storageEngine); // BenchmarkManager benchmarkManager = new BenchmarkManager(configuration); // benchmarkManager.variantBenchmark(); // } private void checkParams() { if (configuration.getBenchmark().getDatabaseName() == null || configuration.getBenchmark().getDatabaseName().isEmpty()) { throw new ParameterException("Database name is null or empty. Please provide database name."); } if (configuration.getBenchmark().getTable() == null || configuration.getBenchmark().getTable().isEmpty()) { throw new ParameterException("Table name is null or empty. Please provide table name."); } if (configuration.getBenchmark().getDatabase().getHosts() == null || configuration.getBenchmark().getDatabase().getHosts().isEmpty()) { throw new ParameterException("Database name is null or empty. Please provide Database name."); } } }