/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.reachables;
import act.server.NoSQLAPI;
import act.shared.Reaction;
import chemaxon.license.LicenseException;
import chemaxon.license.LicenseProcessingException;
import chemaxon.reaction.ReactionException;
import com.act.biointerpretation.mechanisminspection.Ero;
import com.act.biointerpretation.mechanisminspection.MechanisticValidator;
import com.act.biointerpretation.mechanisminspection.ReactionRenderer;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class CladeTraversal {
public static final String OPTION_TARGET_INCHI = "i";
public static final String OPTION_OUTPUT_INCHI_FILE_NAME = "o";
public static final String OPTION_OUTPUT_REACTION_FILE_NAME = "r";
public static final String OPTION_OUTPUT_FAILED_REACTIONS_DIR_NAME = "d";
public static final String OPTION_ACT_DATA_FILE = "a";
public static final String PABA_INCHI = "InChI=1S/C7H7NO2/c8-6-3-1-5(2-4-6)7(9)10/h1-4H,8H2,(H,9,10)";
public static final String DEFAULT_INCHI_FILE = "Inchis.txt";
public static final String DEFAULT_REACTIONS_FILE = "Reactions.txt";
public static final String DEFAULT_ACTDATA_FILE = "result.actdata";
public static final String HELP_MESSAGE = StringUtils.join(new String[]{
"This class traverses the reachables tree from a target start point to find all chemicals derived from" +
"the start point chemical."
}, "");
public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{
add(Option.builder(OPTION_TARGET_INCHI)
.argName("target inchi")
.desc("The target inchi to start the clade search from.")
.hasArg()
.longOpt("target-inchi")
);
add(Option.builder(OPTION_OUTPUT_INCHI_FILE_NAME)
.argName("inchi output file")
.desc("A file containing a list of InChIs the correspond to the clade of the target molecule.")
.hasArg()
.longOpt("output-inchis")
);
add(Option.builder(OPTION_OUTPUT_REACTION_FILE_NAME)
.argName("reaction output file")
.desc("A file containing a list of reaction pathways corresponding to all paths from start point to a particular" +
"reachable.")
.hasArg()
.longOpt("output-reactions")
);
add(Option.builder(OPTION_OUTPUT_FAILED_REACTIONS_DIR_NAME)
.argName("failed reaction output directory")
.desc("A directory containing reaction drawing of all the reaction that failed the mechanistic validator.")
.hasArg()
.longOpt("output-reaction-rendering")
);
add(Option.builder(OPTION_ACT_DATA_FILE)
.argName("act data file")
.desc("The act data file to read the reachables tree from.")
.hasArg()
.longOpt("act-data-file")
);
add(Option.builder("h")
.argName("help")
.desc("Prints this help message.")
.longOpt("help")
);
}};
public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
static {
HELP_FORMATTER.setWidth(100);
}
private static final Logger LOGGER = LogManager.getFormatterLogger(CladeTraversal.class);
private static final NoSQLAPI db = new NoSQLAPI("marvin_v2", "marvin_v2");
private ActData actData;
private Map<Long, Set<Long>> parentToChildren;
private MechanisticValidator validator;
public CladeTraversal(MechanisticValidator validator, ActData actData) {
this.actData = actData;
this.validator = validator;
this.parentToChildren = constructParentToChildrenAssociations();
}
public static void main(String[] args) throws Exception {
Options opts = new Options();
for (Option.Builder b : OPTION_BUILDERS) {
opts.addOption(b.build());
}
CommandLine cl = null;
try {
CommandLineParser parser = new DefaultParser();
cl = parser.parse(opts, args);
} catch (ParseException e) {
System.err.format("Argument parsing failed: %s\n", e.getMessage());
HELP_FORMATTER.printHelp(CladeTraversal.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
if (cl.hasOption("help")) {
HELP_FORMATTER.printHelp(CladeTraversal.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
return;
}
String targetInchi = cl.getOptionValue(OPTION_TARGET_INCHI, PABA_INCHI);
String inchiFileName = cl.getOptionValue(OPTION_OUTPUT_INCHI_FILE_NAME, DEFAULT_INCHI_FILE);
String reactionsFileName = cl.getOptionValue(OPTION_OUTPUT_REACTION_FILE_NAME, DEFAULT_REACTIONS_FILE);
String reactionDirectory = cl.getOptionValue(OPTION_OUTPUT_FAILED_REACTIONS_DIR_NAME, "/");
String actDataFile = cl.getOptionValue(OPTION_ACT_DATA_FILE, DEFAULT_ACTDATA_FILE);
runCladeExpansion(actDataFile, targetInchi, inchiFileName, reactionsFileName, reactionDirectory);
}
public static void runCladeExpansion(String actDataFile, String targetInchi, String inchiFileName,
String reactionsFileName, String reactionDirectory)
throws IOException, ReactionException, LicenseProcessingException {
MechanisticValidator validator = new MechanisticValidator(db);
validator.init();
ActData.instance().deserialize(actDataFile);
CladeTraversal cladeTraversal = new CladeTraversal(validator, ActData.instance());
Long idFromInchi = cladeTraversal.findNodeIdFromInchi(targetInchi);
if (idFromInchi == null) {
LOGGER.error("Could not find target inchi %s in the reachables tree", targetInchi);
return;
}
cladeTraversal.traverseTreeFromStartPoint(idFromInchi, inchiFileName, reactionsFileName, reactionDirectory);
}
/**
* This function constructs a map of parent -> list of children associations of chemical ids based on the reachables tree.
*
* @return Returns a map of parent to list of children associations.
*/
private Map<Long, Set<Long>> constructParentToChildrenAssociations() {
Map<Long, Set<Long>> parentToChildrenAssociations = new HashMap<>();
for (Map.Entry<Long, Long> childIdToParentId : this.actData.getActTree().parents.entrySet()) {
Long parentId = childIdToParentId.getValue();
Long childId = childIdToParentId.getKey();
Set<Long> childIds = parentToChildrenAssociations.get(parentId);
if (childIds == null) {
childIds = new HashSet<>();
parentToChildrenAssociations.put(parentId, childIds);
}
childIds.add(childId);
}
return parentToChildrenAssociations;
}
/**
* This function finds the node id from an inchi, which is useful since the reachable tree structure is referenced
* by node ids.
*
* @param inchi - The inchi to find the node id from.
* @return The node id of the inchi.
*/
private Long findNodeIdFromInchi(String inchi) {
return this.actData.chemInchis.get(inchi);
}
/**
* This function traverses the reachables tree from the given start point using BFS, adds all the chemical's derivatives
* to a file based on if they pass the mechanistic validator, and the derivatives' reaction pathway from the target
* is also logged. Finally, for all the reactions that did not pass the mechanistic validator, we render those reactions
* for furthur analysis into a directory.
*
* @param startPointId - The start point node id to traverse from
* @param validatedInchisFileName - The file containing all the derivative inchis that pass the validator.
* @param reactionPathwayFileName - The file containing the reaction pathway information from source to target.
* @param renderedReactionDirName - The directory containing all the rendered chemical reactions that failed the
* mechanistic validator.
* @throws IOException
*/
private void traverseTreeFromStartPoint(Long startPointId, String validatedInchisFileName, String reactionPathwayFileName,
String renderedReactionDirName) throws IOException {
ReactionRenderer render = new ReactionRenderer();
PrintWriter validatedInchisWriter = new PrintWriter(validatedInchisFileName, "UTF-8");
PrintWriter reactionPathwayWriter = new PrintWriter(reactionPathwayFileName, "UTF-8");
LinkedList<Long> queue = new LinkedList<>();
queue.addAll(this.parentToChildren.get(startPointId));
while (!queue.isEmpty()) {
Long candidateId = queue.pop();
validatedInchisWriter.println(db.readChemicalFromInKnowledgeGraph(candidateId).getInChI());
reactionPathwayWriter.println(formatPathFromSrcToDerivativeOfSrc(startPointId, candidateId));
Set<Long> children = this.parentToChildren.get(candidateId);
if (children != null) {
for (Long child : children) {
for (Long rxnId : rxnIdsForEdge(candidateId, child)) {
// In the case of a negative rxn id, this signifies the reaction is happening in reverse to what is
// referenced in the DB. In order to get the correct db index, one has to transform this negative reaction
// into its actual id.
if (rxnId < 0) {
rxnId = Reaction.reverseNegativeId(rxnId);
}
// Validate the reaction and only add its children to the queue if the reaction makes sense to our internal
// ros and the child is not in the queue already.
Map<Integer, List<Ero>> validatorResults = this.validator.validateOneReaction(rxnId);
if (validatorResults != null && validatorResults.size() > 0 && !queue.contains(child)) {
queue.add(child);
} else {
try {
render.drawReaction(db.getReadDB(), rxnId, renderedReactionDirName, true);
} catch (Exception e) {
LOGGER.error("Error caught when trying to draw and save reaction %d with error message: %s", rxnId, e.getMessage());
}
}
}
}
}
}
reactionPathwayWriter.close();
validatedInchisWriter.close();
}
/**
* The function creates a ordered list of chemicals from src to dst.
*
* @param src - The src id
* @param dst - The dst id
* @return Returns a list of ids from src to dst.
*/
public LinkedList<Long> pathFromSrcToDerivativeOfSrc(Long src, Long dst) {
LinkedList<Long> result = new LinkedList<>();
Long id = dst;
result.add(id);
while (!id.equals(src)) {
Long newId = this.actData.getActTree().parents.get(id);
result.add(newId);
id = newId;
}
Collections.reverse(result);
return result;
}
/**
* This function finds all reactions that explain the given combination of src and dst chemicals.
*
* @param src - The src node id.
* @param dst - The dst node id.
* @return Returns a set of rxn ids where src in the substrates and dst in the products.
*/
public Set<Long> rxnIdsForEdge(Long src, Long dst) {
Set<Long> rxnsThatProduceChem = GlobalParams.USE_RXN_CLASSES ? ActData.instance().rxnClassesThatProduceChem.get(dst) :
ActData.instance().rxnsThatProduceChem.get(dst);
Set<Long> rxnsThatConsumeChem = GlobalParams.USE_RXN_CLASSES ? ActData.instance().rxnClassesThatConsumeChem.get(src) :
ActData.instance().rxnsThatConsumeChem.get(src);
Set<Long> intersection = new HashSet<>(rxnsThatProduceChem);
intersection.retainAll(rxnsThatConsumeChem);
return intersection;
}
/**
* This function pretty prints a string that explains the reaction pathway from src to dst.
*
* @param src - The src chemical id
* @param dst - The dst chemical id
* @return This function returns a string format of the reaction pathway.
*/
public String formatPathFromSrcToDerivativeOfSrc(Long src, Long dst) {
String result = "";
List<Long> path = pathFromSrcToDerivativeOfSrc(src, dst);
for (int i = 0; i < path.size() - 1; i++) {
result += db.readChemicalFromInKnowledgeGraph(path.get(i)).getInChI();
result += " --- ";
Set<Long> rxnIds = rxnIdsForEdge(path.get(i), path.get(i + 1));
result += StringUtils.join(rxnIds, ",");
result += " ---> ";
}
result += db.readChemicalFromInKnowledgeGraph(path.get(path.size() - 1)).getInChI();
return result;
}
}