/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.app.cli.client.executors; import org.opencb.biodata.formats.feature.gff.Gff; import org.opencb.biodata.formats.feature.gff.io.GffReader; import org.opencb.biodata.formats.io.FileFormatException; import org.opencb.biodata.models.core.Region; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.storage.app.cli.CommandExecutor; import org.opencb.opencga.storage.app.cli.client.CliOptionsParser; import org.opencb.opencga.storage.app.cli.client.options.StorageAlignmentCommandOptions; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.StoragePipeline; import org.opencb.opencga.storage.core.alignment.AlignmentDBAdaptor; import org.opencb.opencga.storage.core.alignment.AlignmentStorageEngine; import org.opencb.opencga.storage.core.alignment.AlignmentStorageEngineOld; import org.opencb.opencga.storage.core.config.StorageEngineConfiguration; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.List; /** * Created by imedina on 22/05/15. */ public class AlignmentCommandExecutor extends CommandExecutor { private StorageEngineConfiguration storageConfiguration; private AlignmentStorageEngine alignmentStorageManager; private StorageAlignmentCommandOptions alignmentCommandOptions; public AlignmentCommandExecutor(StorageAlignmentCommandOptions alignmentCommandOptions) { super(alignmentCommandOptions.commonCommandOptions); this.alignmentCommandOptions = alignmentCommandOptions; } private void configure(CliOptionsParser.CommonOptions commonOptions) throws Exception { this.logFile = commonOptions.logFile; /** * Getting VariantStorageEngine * We need to find out the Storage Engine Id to be used * If not storage engine is passed then the default is taken from storage-configuration.yml file **/ this.storageEngine = (storageEngine != null && !storageEngine.isEmpty()) ? storageEngine : configuration.getDefaultStorageEngineId(); logger.debug("Storage Engine set to '{}'", this.storageEngine); this.storageConfiguration = configuration.getStorageEngine(storageEngine); // TODO: Start passing catalogManager StorageEngineFactory storageEngineFactory = StorageEngineFactory.get(configuration); if (storageEngine == null || storageEngine.isEmpty()) { this.alignmentStorageManager = storageEngineFactory.getAlignmentStorageEngine(); } else { this.alignmentStorageManager = storageEngineFactory.getAlignmentStorageEngine(storageEngine); } } @Override public void execute() throws Exception { logger.debug("Executing alignment command line"); // String subCommandString = alignmentCommandOptions.getParsedSubCommand(); String subCommandString = getParsedSubCommand(alignmentCommandOptions.jCommander); switch (subCommandString) { case "index": configure(alignmentCommandOptions.indexAlignmentsCommandOptions.commonOptions); index(); break; case "query": configure(alignmentCommandOptions.queryAlignmentsCommandOptions.commonOptions); query(); break; default: logger.error("Subcommand not valid"); break; } } private void index() throws Exception { StorageAlignmentCommandOptions.IndexAlignmentsCommandOptions indexAlignmentsCommandOptions = alignmentCommandOptions.indexAlignmentsCommandOptions; String inputs[] = indexAlignmentsCommandOptions.commonIndexOptions.input.split(","); URI inputUri = UriUtils.createUri(inputs[0]); // FileUtils.checkFile(Paths.get(inputUri.getPath())); URI outdirUri = (indexAlignmentsCommandOptions.commonIndexOptions.outdir != null && !indexAlignmentsCommandOptions.commonIndexOptions.outdir.isEmpty()) ? UriUtils.createDirectoryUri(indexAlignmentsCommandOptions.commonIndexOptions.outdir) // Get parent folder from input file : inputUri.resolve("."); // FileUtils.checkDirectory(Paths.get(outdirUri.getPath())); logger.debug("All files and directories exist"); /* * Add CLI options to the alignmentOptions */ ObjectMap alignmentOptions = storageConfiguration.getAlignment().getOptions(); if (Integer.parseInt(indexAlignmentsCommandOptions.fileId) != 0) { alignmentOptions.put(AlignmentStorageEngineOld.Options.FILE_ID.key(), indexAlignmentsCommandOptions.fileId); } if (indexAlignmentsCommandOptions.commonIndexOptions.dbName != null && !indexAlignmentsCommandOptions.commonIndexOptions.dbName.isEmpty()) { alignmentOptions.put(AlignmentStorageEngineOld.Options.DB_NAME.key(), indexAlignmentsCommandOptions.commonIndexOptions.dbName); } if (indexAlignmentsCommandOptions.commonOptions.params != null) { alignmentOptions.putAll(indexAlignmentsCommandOptions.commonOptions.params); } alignmentOptions.put(AlignmentStorageEngineOld.Options.PLAIN.key(), false); alignmentOptions.put(AlignmentStorageEngineOld.Options.INCLUDE_COVERAGE.key(), indexAlignmentsCommandOptions.calculateCoverage); if (indexAlignmentsCommandOptions.meanCoverage != null && !indexAlignmentsCommandOptions.meanCoverage.isEmpty()) { alignmentOptions.put(AlignmentStorageEngineOld.Options.MEAN_COVERAGE_SIZE_LIST.key(), indexAlignmentsCommandOptions.meanCoverage); } alignmentOptions.put(AlignmentStorageEngineOld.Options.COPY_FILE.key(), false); alignmentOptions.put(AlignmentStorageEngineOld.Options.ENCRYPT.key(), "null"); logger.debug("Configuration options: {}", alignmentOptions.toJson()); boolean extract, transform, load; URI nextFileUri = inputUri; if (!indexAlignmentsCommandOptions.load && !indexAlignmentsCommandOptions.transform) { // if not present --transform nor --load, // do both extract = true; transform = true; load = true; } else { extract = indexAlignmentsCommandOptions.transform; transform = indexAlignmentsCommandOptions.transform; load = indexAlignmentsCommandOptions.load; } StoragePipeline storagePipeline = alignmentStorageManager.newStoragePipeline(true); if (extract) { logger.info("-- Extract alignments -- {}", inputUri); nextFileUri = storagePipeline.extract(inputUri, outdirUri); } if (transform) { logger.info("-- PreTransform alignments -- {}", nextFileUri); nextFileUri = storagePipeline.preTransform(nextFileUri); logger.info("-- Transform alignments -- {}", nextFileUri); nextFileUri = storagePipeline.transform(nextFileUri, null, outdirUri); logger.info("-- PostTransform alignments -- {}", nextFileUri); nextFileUri = storagePipeline.postTransform(nextFileUri); } if (load) { logger.info("-- PreLoad alignments -- {}", nextFileUri); nextFileUri = storagePipeline.preLoad(nextFileUri, outdirUri); logger.info("-- Load alignments -- {}", nextFileUri); nextFileUri = storagePipeline.load(nextFileUri); logger.info("-- PostLoad alignments -- {}", nextFileUri); nextFileUri = storagePipeline.postLoad(nextFileUri, outdirUri); } } private void query() throws StorageEngineException, FileFormatException { StorageAlignmentCommandOptions.QueryAlignmentsCommandOptions queryAlignmentsCommandOptions = alignmentCommandOptions.queryAlignmentsCommandOptions; AlignmentDBAdaptor dbAdaptor = alignmentStorageManager.getDBAdaptor(queryAlignmentsCommandOptions.commonQueryOptions.dbName); /** * Parse Regions */ GffReader gffReader = null; List<Region> regions = null; if (queryAlignmentsCommandOptions.region != null && !queryAlignmentsCommandOptions.region.isEmpty()) { regions = Region.parseRegions(queryAlignmentsCommandOptions.region); logger.debug("Processed regions: '{}'", regions); // regions = new LinkedList<>(); // for (String csvRegion : queryAlignmentsCommandOptions.regions) { // for (String strRegion : csvRegion.split(",")) { // Region region = new Region(strRegion); // regions.add(region); // logger.info("Parsed region: {}", region); // } // } } else if (queryAlignmentsCommandOptions.regionFile != null && !queryAlignmentsCommandOptions.regionFile.isEmpty()) { try { gffReader = new GffReader(queryAlignmentsCommandOptions.regionFile); } catch (NoSuchMethodException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } //throw new UnsupportedOperationException("Unsuppoted GFF file"); } /** * Parse QueryOptions */ QueryOptions options = new QueryOptions(); if (queryAlignmentsCommandOptions.fileId != null && !queryAlignmentsCommandOptions.fileId.isEmpty()) { options.add(AlignmentDBAdaptor.QO_FILE_ID, queryAlignmentsCommandOptions.fileId); } options.add(AlignmentDBAdaptor.QO_INCLUDE_COVERAGE, queryAlignmentsCommandOptions.coverage); options.add(AlignmentDBAdaptor.QO_VIEW_AS_PAIRS, queryAlignmentsCommandOptions.asPairs); options.add(AlignmentDBAdaptor.QO_PROCESS_DIFFERENCES, queryAlignmentsCommandOptions.processDifferences); if (queryAlignmentsCommandOptions.histogram) { options.add(AlignmentDBAdaptor.QO_INCLUDE_COVERAGE, true); options.add(AlignmentDBAdaptor.QO_HISTOGRAM, true); options.add(AlignmentDBAdaptor.QO_INTERVAL_SIZE, queryAlignmentsCommandOptions.histogram); } if (queryAlignmentsCommandOptions.filePath != null && !queryAlignmentsCommandOptions.filePath.isEmpty()) { options.add(AlignmentDBAdaptor.QO_BAM_PATH, queryAlignmentsCommandOptions.filePath); } if (queryAlignmentsCommandOptions.stats != null && !queryAlignmentsCommandOptions.stats.isEmpty()) { for (String csvStat : queryAlignmentsCommandOptions.stats) { for (String stat : csvStat.split(",")) { int index = stat.indexOf("<"); index = index >= 0 ? index : stat.indexOf("!"); index = index >= 0 ? index : stat.indexOf("~"); index = index >= 0 ? index : stat.indexOf("<"); index = index >= 0 ? index : stat.indexOf(">"); index = index >= 0 ? index : stat.indexOf("="); if (index < 0) { throw new UnsupportedOperationException("Unknown stat filter operation: " + stat); } String name = stat.substring(0, index); String cond = stat.substring(index); if (name.matches("")) { options.put(name, cond); } else { throw new UnsupportedOperationException("Unknown stat filter name: " + name); } logger.info("Parsed stat filter: {} {}", name, cond); } } } /** * Run query */ int subListSize = 20; logger.info("options = {}", options.toJson()); if (queryAlignmentsCommandOptions.histogram) { for (Region region : regions) { System.out.println(dbAdaptor.getAllIntervalFrequencies(region, options)); } } else if (regions != null && !regions.isEmpty()) { for (int i = 0; i < (regions.size() + subListSize - 1) / subListSize; i++) { List<Region> subRegions = regions.subList( i * subListSize, Math.min((i + 1) * subListSize, regions.size())); logger.info("subRegions = " + subRegions); QueryResult queryResult = dbAdaptor.getAllAlignmentsByRegion(subRegions, options); logger.info("{}", queryResult); System.out.println(new ObjectMap("queryResult", queryResult).toJson()); } } else if (gffReader != null) { List<Gff> gffList; List<Region> subRegions; while ((gffList = gffReader.read(subListSize)) != null) { subRegions = new ArrayList<>(subListSize); for (Gff gff : gffList) { subRegions.add(new Region(gff.getSequenceName(), gff.getStart(), gff.getEnd())); } logger.info("subRegions = " + subRegions); QueryResult queryResult = dbAdaptor.getAllAlignmentsByRegion(subRegions, options); logger.info("{}", queryResult); System.out.println(new ObjectMap("queryResult", queryResult).toJson()); } } else { throw new UnsupportedOperationException("Unable to fetch over all the genome"); // System.out.println(dbAdaptor.getAllAlignments(options)); } } }