/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.sars;
import act.server.MongoDB;
import com.act.biointerpretation.Utils.ReactionProjector;
import com.act.biointerpretation.mechanisminspection.ErosCorpus;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class SarGenerationDriver {
private static final Logger LOGGER = LogManager.getFormatterLogger(SarGenerationDriver.class);
private static final String OPTION_DB = "db";
private static final String OPTION_OUTPUT_PATH = "o";
private static final String OPTION_HELP = "h";
private static final String OPTION_REACTION_LIST = "r";
private static final String OPTION_REACTIONS_FILE = "f";
public static final String HELP_MESSAGE =
"This class is used to generate SARs from a set of reactions or chemicals. It has several modes of operation, " +
"which are described in more detail along with the operations that run them.";
public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{
add(Option.builder(OPTION_DB)
.argName("db name")
.desc("The name of the mongo DB to use.")
.hasArg()
.longOpt("db-name")
.type(String.class)
.required(true)
);
add(Option.builder(OPTION_OUTPUT_PATH)
.argName("output file path")
.desc("The absolute path to the file to which to write the json file of the sar corpus.")
.hasArg()
.longOpt("output-file-path")
.required(true)
);
add(Option.builder(OPTION_REACTION_LIST)
.argName("specific reactions")
.desc("A list of reaction IDs to build a SAR from.")
.hasArgs()
.valueSeparator(',')
.longOpt("specific reactions")
);
add(Option.builder(OPTION_REACTIONS_FILE)
.argName("reactions file")
.desc("Absolute path to file from which to read reaction groups. File should either be a ReactionGroupCorpus " +
"in json format, or a file with one reaction group per line, where each line has comma separate values, " +
"with the first value being the name of the group, and the subsequent values being reaction ids.")
.hasArg()
.longOpt("reactions-file")
);
add(Option.builder(OPTION_HELP)
.argName("help")
.desc("Prints this help message.")
.longOpt("help")
);
}};
public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
static {
HELP_FORMATTER.setWidth(100);
}
private static final String LOCAL_HOST = "localhost";
private static final Integer MONGO_PORT = 27017;
public static void main(String[] args) throws Exception {
// Build command line parser.
Options opts = new Options();
for (Option.Builder b : OPTION_BUILDERS) {
opts.addOption(b.build());
}
CommandLine cl = null;
try {
CommandLineParser parser = new DefaultParser();
cl = parser.parse(opts, args);
} catch (ParseException e) {
LOGGER.error("Argument parsing failed: %s", e.getMessage());
HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
// Print help.
if (cl.hasOption(OPTION_HELP)) {
HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
return;
}
// Create DB and DbAPI
MongoDB mongoDB = new MongoDB(LOCAL_HOST, MONGO_PORT, cl.getOptionValue(OPTION_DB));
DbAPI dbApi = new DbAPI(mongoDB);
// Handle output file
File outputFile = new File(cl.getOptionValue(OPTION_OUTPUT_PATH));
if (outputFile.isDirectory() || outputFile.exists()) {
LOGGER.error("Supplied output file is a directory or already exists.");
HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
outputFile.createNewFile();
// Check that there is exactly one reaction group input option
if (cl.hasOption(OPTION_REACTION_LIST) && cl.hasOption(OPTION_REACTIONS_FILE)) {
LOGGER.error("Cannot process both a reaction list and a reactions file as input.");
HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
if (!cl.hasOption(OPTION_REACTION_LIST) && !cl.hasOption(OPTION_REACTIONS_FILE)) {
LOGGER.error("Must supply either a reaction list or a reactions file as input.");
HELP_FORMATTER.printHelp(SarGenerationDriver.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
// Build input reaction group corpus.
Iterable<ReactionGroup> groups = null;
if (cl.hasOption(OPTION_REACTION_LIST)) {
LOGGER.info("Using specific input reactions.");
ReactionGroup group = new ReactionGroup("ONLY_GROUP", "NO_DB");
for (String idString : cl.getOptionValues(OPTION_REACTION_LIST)) {
group.addReactionId(Long.parseLong(idString));
}
groups = Arrays.asList(group);
}
if (cl.hasOption(OPTION_REACTIONS_FILE)) {
LOGGER.info("Using reactions file.");
File inputFile = new File(cl.getOptionValue(OPTION_REACTIONS_FILE));
try {
groups = ReactionGroupCorpus.loadFromJsonFile(inputFile);
LOGGER.info("Successfully parsed input as json file.");
} catch (IOException e) {
LOGGER.info("Input file not json file. Trying txt format.");
try {
groups = ReactionGroupCorpus.loadFromTextFile(inputFile);
LOGGER.info("Successfully parsed input as text file.");
} catch (IOException f) {
LOGGER.error("Reactions input file not parseable. %s", f.getMessage());
throw f;
}
}
}
// Build all pieces of SAR generator
ReactionProjector projector = new ReactionProjector();
ExpandedReactionSearcher generalizer = new ExpandedReactionSearcher(projector);
McsCalculator reactionMcsCalculator = new McsCalculator(McsCalculator.REACTION_BUILDING_OPTIONS);
McsCalculator sarMcsCalculator = new McsCalculator(McsCalculator.SAR_OPTIONS);
FullReactionBuilder reactionBuilder = new FullReactionBuilder(reactionMcsCalculator, generalizer, projector);
SarFactory substructureSarFactory = new OneSubstrateSubstructureSar.Factory(sarMcsCalculator);
SarFactory carbonCountSarFactory = new OneSubstrateCarbonCountSar.Factory();
List<SarFactory> sarFactories = Arrays.asList(carbonCountSarFactory, substructureSarFactory);
ErosCorpus roCorpus = new ErosCorpus();
roCorpus.loadValidationCorpus();
ReactionGroupCharacterizer reactionGroupCharacterizer =
new OneSubstrateOneRoCharacterizer(dbApi, sarFactories, reactionBuilder, roCorpus);
SarCorpusBuilder corpusBuilder = new SarCorpusBuilder(groups, reactionGroupCharacterizer);
LOGGER.info("Parsed arguments and constructed SAR corpus builder. Building corpus.");
SarCorpus sarCorpus = corpusBuilder.build();
LOGGER.info("Built sar corpus. Printing to file in json format.");
sarCorpus.printToJsonFile(outputFile);
LOGGER.info("Complete!");
}
}