/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation;
import act.server.NoSQLAPI;
import chemaxon.license.LicenseProcessingException;
import chemaxon.reaction.ReactionException;
import com.act.biointerpretation.cofactorremoval.CofactorRemover;
import com.act.biointerpretation.desalting.ReactionDesalter;
import com.act.biointerpretation.mechanisminspection.MechanisticValidator;
import com.act.biointerpretation.reactionmerging.ReactionMerger;
import com.act.biointerpretation.sequencemerging.SequenceMerger;
import com.act.lcms.db.io.LoadPlateCompositionIntoDB;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class BiointerpretationDriver {
private static final Logger LOGGER = LogManager.getFormatterLogger(BiointerpretationDriver.class);
public static final String OPTION_CONFIGURATION_FILE = "c";
public static final String OPTION_SINGLE_OPERATION = "o";
public static final String OPTION_SINGLE_READ_DB = "r";
public static final String OPTION_SINGLE_WRITE_DB = "w";
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public enum BiointerpretationOperation {
MERGE_REACTIONS,
DESALT,
REMOVE_COFACTORS,
VALIDATE,
MERGE_DUPLICATE_SEQUENCES,
}
public static final String HELP_MESSAGE = StringUtils.join(new String[]{
"This class drives one or more biointerpretation steps. A single operation can be specified on the ",
"command line, or a series of operations and databases can be specified in a JSON configuration file."
}, "");
public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{
add(Option.builder(OPTION_CONFIGURATION_FILE)
.argName("config file")
.desc("JSON configuration file of steps to run in sequence")
.hasArg()
.longOpt("config")
);
add(Option.builder(OPTION_SINGLE_OPERATION)
.argName("operation")
.desc("Single operation to run on one read/write DB pair (requires db names), options are: " +
StringUtils.join(BiointerpretationOperation.values(), ", "))
.hasArg()
.longOpt("op")
);
add(Option.builder(OPTION_SINGLE_READ_DB)
.argName("db name")
.desc("DB from which to read when performing a single operation")
.hasArg()
.longOpt("read")
);
add(Option.builder(OPTION_SINGLE_WRITE_DB)
.argName("db name")
.desc("DB to which to write when performing a single operation")
.hasArg()
.longOpt("write")
);
add(Option.builder("h")
.argName("help")
.desc("Prints this help message")
.longOpt("help")
);
}};
public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
static {
HELP_FORMATTER.setWidth(100);
}
public static void main(String[] args) throws Exception {
Options opts = new Options();
for (Option.Builder b : OPTION_BUILDERS) {
opts.addOption(b.build());
}
CommandLine cl = null;
try {
CommandLineParser parser = new DefaultParser();
cl = parser.parse(opts, args);
} catch (ParseException e) {
System.err.format("Argument parsing failed: %s\n", e.getMessage());
HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
if (cl.hasOption("help")) {
HELP_FORMATTER.printHelp(ReactionDesalter.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
return;
}
if (cl.hasOption(OPTION_CONFIGURATION_FILE)) {
List<BiointerpretationStep> steps;
File configFile = new File(cl.getOptionValue(OPTION_CONFIGURATION_FILE));
if (!configFile.exists()) {
String msg = String.format("Cannot find configuration file at %s", configFile.getAbsolutePath());
LOGGER.error(msg);
throw new RuntimeException(msg);
}
// Read the whole config file.
try (InputStream is = new FileInputStream(configFile)) {
steps = OBJECT_MAPPER.readValue(is, new TypeReference<List<BiointerpretationStep>>() {});
} catch (IOException e) {
LOGGER.error("Caught IO exception when attempting to read configuration file: %s", e.getMessage());
throw e; // Crash after logging if the config file can't be read.
}
// Ask for explicit confirmation before dropping databases.
LOGGER.info("Biointerpretation plan:");
for (BiointerpretationStep step : steps) {
crashIfInvalidDBName(step.getReadDBName());
crashIfInvalidDBName(step.getWriteDBName());
LOGGER.info("%s: %s -> %s", step.getOperation(), step.getReadDBName(), step.getWriteDBName());
}
LOGGER.warn("WARNING: each DB to be written will be dropped before the writing step commences");
LOGGER.info("Proceed? [y/n]");
String readLine;
try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) {
readLine = reader.readLine();
}
readLine.trim();
if ("y".equalsIgnoreCase(readLine) || "yes".equalsIgnoreCase(readLine)) {
LOGGER.info("Biointerpretation plan confirmed, commencing");
for (BiointerpretationStep step : steps) {
performOperation(step, true);
}
LOGGER.info("Biointerpretation plan completed");
} else {
LOGGER.info("Biointerpretation plan not confirmed, exiting");
}
} else if (cl.hasOption(OPTION_SINGLE_OPERATION)) {
if (!cl.hasOption(OPTION_SINGLE_READ_DB) || !cl.hasOption(OPTION_SINGLE_WRITE_DB)) {
String msg = "Must specify read and write DB names when performing a single operation";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
BiointerpretationOperation operation;
try {
operation = BiointerpretationOperation.valueOf(cl.getOptionValue(OPTION_SINGLE_OPERATION));
} catch (IllegalArgumentException e) {
LOGGER.error("Caught IllegalArgumentException when trying to parse operation '%s': %s",
cl.getOptionValue(OPTION_SINGLE_OPERATION), e.getMessage());
throw e; // Crash if we can't interpret the operation.
}
String readDB = crashIfInvalidDBName(cl.getOptionValue(OPTION_SINGLE_READ_DB));
String writeDB = crashIfInvalidDBName(cl.getOptionValue(OPTION_SINGLE_WRITE_DB));
performOperation(new BiointerpretationStep(operation, readDB, writeDB), false);
} else {
String msg = "Must specify either a config file or a single operation to perform.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
}
public static final Pattern VALID_DB_NAME_REGEX = Pattern.compile("[a-zA-Z][\\w-]+");
public static String crashIfInvalidDBName(String dbName) {
if (!VALID_DB_NAME_REGEX.matcher(dbName).matches()) {
String msg = String.format("Invalid database name: %s", dbName);
LOGGER.error(msg);
throw new RuntimeException(msg);
}
return dbName;
}
public static void performOperation(BiointerpretationStep step, boolean forceDrop)
throws IOException, LicenseProcessingException, ReactionException {
// Drop the write DB and create a NoSQLAPI object that can be used by any step.
NoSQLAPI.dropDB(step.writeDBName, forceDrop);
// Note that this constructor call initializes the write DB collections and indices, so it must happen after dropDB.
NoSQLAPI noSQLAPI = new NoSQLAPI(step.getReadDBName(), step.getWriteDBName());
switch (step.getOperation()) {
case MERGE_REACTIONS:
LOGGER.info("Reaction merger starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
ReactionMerger reactionMerger = new ReactionMerger(noSQLAPI);
reactionMerger.init();
reactionMerger.run();
LOGGER.info("Reaction merger complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
break;
case DESALT:
LOGGER.info("Desalter starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
ReactionDesalter reactionDesalter = new ReactionDesalter(noSQLAPI);
reactionDesalter.init();
reactionDesalter.run();
LOGGER.info("Reaction merger complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
break;
case REMOVE_COFACTORS:
LOGGER.info("Cofactor remover starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
CofactorRemover cofactorRemover = new CofactorRemover(noSQLAPI);
cofactorRemover.init();
cofactorRemover.run();
LOGGER.info("Cofactor remover complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
break;
case VALIDATE:
LOGGER.info("Mechanistic validator starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
MechanisticValidator validator = new MechanisticValidator(noSQLAPI);
validator.init();
validator.run();
LOGGER.info("Mechanistic validator complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
break;
case MERGE_DUPLICATE_SEQUENCES:
LOGGER.info("Sequence merger starting (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
SequenceMerger sequenceMerger = new SequenceMerger(noSQLAPI);
sequenceMerger.init();
sequenceMerger.run();
LOGGER.info("Sequence merger complete (%s -> %s)", step.getReadDBName(), step.getWriteDBName());
break;
// No default is necessary since deserialization will ensure there is a corresponding operation in the enum.
}
// TODO: returning timing data and other stats for a final step-by-step report.
}
public static class BiointerpretationStep {
@JsonProperty("operation")
BiointerpretationOperation operation;
@JsonProperty("read")
String readDBName;
@JsonProperty("write")
String writeDBName;
// Required for deserialization.
public BiointerpretationStep() {
}
public BiointerpretationStep(BiointerpretationOperation operation, String readDBName, String writeDBName) {
this.operation = operation;
this.readDBName = readDBName;
this.writeDBName = writeDBName;
}
public BiointerpretationOperation getOperation() {
return operation;
}
public void setOperation(BiointerpretationOperation operation) {
this.operation = operation;
}
public String getReadDBName() {
return readDBName;
}
public void setReadDBName(String readDBName) {
this.readDBName = readDBName;
}
public String getWriteDBName() {
return writeDBName;
}
public void setWriteDBName(String writeDBName) {
this.writeDBName = writeDBName;
}
}
}