/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation; import act.server.NoSQLAPI; import chemaxon.license.LicenseProcessingException; import chemaxon.reaction.ReactionException; import com.act.biointerpretation.cofactorremoval.CofactorRemover; import com.act.biointerpretation.desalting.ReactionDesalter; import com.act.biointerpretation.mechanisminspection.MechanisticValidator; import com.act.biointerpretation.reactionmerging.ReactionMerger; import com.act.biointerpretation.sequencemerging.SequenceMerger; import com.act.lcms.db.io.LoadPlateCompositionIntoDB; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; public class BiointerpretationDriver { private static final Logger LOGGER = LogManager.getFormatterLogger(BiointerpretationDriver.class); public static final String OPTION_CONFIGURATION_FILE = "c"; public static final String OPTION_SINGLE_OPERATION = "o"; public static final String OPTION_SINGLE_READ_DB = "r"; public static final String OPTION_SINGLE_WRITE_DB = "w"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public enum BiointerpretationOperation { MERGE_REACTIONS, DESALT, REMOVE_COFACTORS, VALIDATE, MERGE_DUPLICATE_SEQUENCES, } public static final String HELP_MESSAGE = StringUtils.join(new String[]{ "This class drives one or more biointerpretation steps. A single operation can be specified on the ", "command line, or a series of operations and databases can be specified in a JSON configuration file." }, ""); public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{ add(Option.builder(OPTION_CONFIGURATION_FILE) .argName("config file") .desc("JSON configuration file of steps to run in sequence") .hasArg() .longOpt("config") ); add(Option.builder(OPTION_SINGLE_OPERATION) .argName("operation") .desc("Single operation to run on one read/write DB pair (requires db names), options are: " + StringUtils.join(BiointerpretationOperation.values(), ", ")) .hasArg() .longOpt("op") ); add(Option.builder(OPTION_SINGLE_READ_DB) .argName("db name") .desc("DB from which to read when performing a single operation") .hasArg() .longOpt("read") ); add(Option.builder(OPTION_SINGLE_WRITE_DB) .argName("db name") .desc("DB to which to write when performing a single operation") .hasArg() .longOpt("write") ); add(Option.builder("h") .argName("help") .desc("Prints this help message") .longOpt("help") ); }}; public static final HelpFormatter HELP_FORMATTER = new HelpFormatter(); static { HELP_FORMATTER.setWidth(100); } public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build()); } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(ReactionDesalter.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } if (cl.hasOption(OPTION_CONFIGURATION_FILE)) { List<BiointerpretationStep> steps; File configFile = new File(cl.getOptionValue(OPTION_CONFIGURATION_FILE)); if (!configFile.exists()) { String msg = String.format("Cannot find configuration file at %s", configFile.getAbsolutePath()); LOGGER.error(msg); throw new RuntimeException(msg); } // Read the whole config file. try (InputStream is = new FileInputStream(configFile)) { steps = OBJECT_MAPPER.readValue(is, new TypeReference<List<BiointerpretationStep>>() {}); } catch (IOException e) { LOGGER.error("Caught IO exception when attempting to read configuration file: %s", e.getMessage()); throw e; // Crash after logging if the config file can't be read. } // Ask for explicit confirmation before dropping databases. LOGGER.info("Biointerpretation plan:"); for (BiointerpretationStep step : steps) { crashIfInvalidDBName(step.getReadDBName()); crashIfInvalidDBName(step.getWriteDBName()); LOGGER.info("%s: %s -> %s", step.getOperation(), step.getReadDBName(), step.getWriteDBName()); } LOGGER.warn("WARNING: each DB to be written will be dropped before the writing step commences"); LOGGER.info("Proceed? [y/n]"); String readLine; try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) { readLine = reader.readLine(); } readLine.trim(); if ("y".equalsIgnoreCase(readLine) || "yes".equalsIgnoreCase(readLine)) { LOGGER.info("Biointerpretation plan confirmed, commencing"); for (BiointerpretationStep step : steps) { performOperation(step, true); } LOGGER.info("Biointerpretation plan completed"); } else { LOGGER.info("Biointerpretation plan not confirmed, exiting"); } } else if (cl.hasOption(OPTION_SINGLE_OPERATION)) { if (!cl.hasOption(OPTION_SINGLE_READ_DB) || !cl.hasOption(OPTION_SINGLE_WRITE_DB)) { String msg = "Must specify read and write DB names when performing a single operation"; LOGGER.error(msg); throw new RuntimeException(msg); } BiointerpretationOperation operation; try { operation = BiointerpretationOperation.valueOf(cl.getOptionValue(OPTION_SINGLE_OPERATION)); } catch (IllegalArgumentException e) { LOGGER.error("Caught IllegalArgumentException when trying to parse operation '%s': %s", cl.getOptionValue(OPTION_SINGLE_OPERATION), e.getMessage()); throw e; // Crash if we can't interpret the operation. } String readDB = crashIfInvalidDBName(cl.getOptionValue(OPTION_SINGLE_READ_DB)); String writeDB = crashIfInvalidDBName(cl.getOptionValue(OPTION_SINGLE_WRITE_DB)); performOperation(new BiointerpretationStep(operation, readDB, writeDB), false); } else { String msg = "Must specify either a config file or a single operation to perform."; LOGGER.error(msg); throw new RuntimeException(msg); } } public static final Pattern VALID_DB_NAME_REGEX = Pattern.compile("[a-zA-Z][\\w-]+"); public static String crashIfInvalidDBName(String dbName) { if (!VALID_DB_NAME_REGEX.matcher(dbName).matches()) { String msg = String.format("Invalid database name: %s", dbName); LOGGER.error(msg); throw new RuntimeException(msg); } return dbName; } public static void performOperation(BiointerpretationStep step, boolean forceDrop) throws IOException, LicenseProcessingException, ReactionException { // Drop the write DB and create a NoSQLAPI object that can be used by any step. NoSQLAPI.dropDB(step.writeDBName, forceDrop); // Note that this constructor call initializes the write DB collections and indices, so it must happen after dropDB. NoSQLAPI noSQLAPI = new NoSQLAPI(step.getReadDBName(), step.getWriteDBName()); switch (step.getOperation()) { case MERGE_REACTIONS: LOGGER.info("Reaction merger starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); ReactionMerger reactionMerger = new ReactionMerger(noSQLAPI); reactionMerger.init(); reactionMerger.run(); LOGGER.info("Reaction merger complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); break; case DESALT: LOGGER.info("Desalter starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); ReactionDesalter reactionDesalter = new ReactionDesalter(noSQLAPI); reactionDesalter.init(); reactionDesalter.run(); LOGGER.info("Reaction merger complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); break; case REMOVE_COFACTORS: LOGGER.info("Cofactor remover starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); CofactorRemover cofactorRemover = new CofactorRemover(noSQLAPI); cofactorRemover.init(); cofactorRemover.run(); LOGGER.info("Cofactor remover complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); break; case VALIDATE: LOGGER.info("Mechanistic validator starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); MechanisticValidator validator = new MechanisticValidator(noSQLAPI); validator.init(); validator.run(); LOGGER.info("Mechanistic validator complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); break; case MERGE_DUPLICATE_SEQUENCES: LOGGER.info("Sequence merger starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); SequenceMerger sequenceMerger = new SequenceMerger(noSQLAPI); sequenceMerger.init(); sequenceMerger.run(); LOGGER.info("Sequence merger complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName()); break; // No default is necessary since deserialization will ensure there is a corresponding operation in the enum. } // TODO: returning timing data and other stats for a final step-by-step report. } public static class BiointerpretationStep { @JsonProperty("operation") BiointerpretationOperation operation; @JsonProperty("read") String readDBName; @JsonProperty("write") String writeDBName; // Required for deserialization. public BiointerpretationStep() { } public BiointerpretationStep(BiointerpretationOperation operation, String readDBName, String writeDBName) { this.operation = operation; this.readDBName = readDBName; this.writeDBName = writeDBName; } public BiointerpretationOperation getOperation() { return operation; } public void setOperation(BiointerpretationOperation operation) { this.operation = operation; } public String getReadDBName() { return readDBName; } public void setReadDBName(String readDBName) { this.readDBName = readDBName; } public String getWriteDBName() { return writeDBName; } public void setWriteDBName(String writeDBName) { this.writeDBName = writeDBName; } } }