/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.reachables; import act.server.NoSQLAPI; import act.shared.Reaction; import chemaxon.license.LicenseException; import chemaxon.license.LicenseProcessingException; import chemaxon.reaction.ReactionException; import com.act.biointerpretation.mechanisminspection.Ero; import com.act.biointerpretation.mechanisminspection.MechanisticValidator; import com.act.biointerpretation.mechanisminspection.ReactionRenderer; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; public class CladeTraversal { public static final String OPTION_TARGET_INCHI = "i"; public static final String OPTION_OUTPUT_INCHI_FILE_NAME = "o"; public static final String OPTION_OUTPUT_REACTION_FILE_NAME = "r"; public static final String OPTION_OUTPUT_FAILED_REACTIONS_DIR_NAME = "d"; public static final String OPTION_ACT_DATA_FILE = "a"; public static final String PABA_INCHI = "InChI=1S/C7H7NO2/c8-6-3-1-5(2-4-6)7(9)10/h1-4H,8H2,(H,9,10)"; public static final String DEFAULT_INCHI_FILE = "Inchis.txt"; public static final String DEFAULT_REACTIONS_FILE = "Reactions.txt"; public static final String DEFAULT_ACTDATA_FILE = "result.actdata"; public static final String HELP_MESSAGE = StringUtils.join(new String[]{ "This class traverses the reachables tree from a target start point to find all chemicals derived from" + "the start point chemical." }, ""); public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{ add(Option.builder(OPTION_TARGET_INCHI) .argName("target inchi") .desc("The target inchi to start the clade search from.") .hasArg() .longOpt("target-inchi") ); add(Option.builder(OPTION_OUTPUT_INCHI_FILE_NAME) .argName("inchi output file") .desc("A file containing a list of InChIs the correspond to the clade of the target molecule.") .hasArg() .longOpt("output-inchis") ); add(Option.builder(OPTION_OUTPUT_REACTION_FILE_NAME) .argName("reaction output file") .desc("A file containing a list of reaction pathways corresponding to all paths from start point to a particular" + "reachable.") .hasArg() .longOpt("output-reactions") ); add(Option.builder(OPTION_OUTPUT_FAILED_REACTIONS_DIR_NAME) .argName("failed reaction output directory") .desc("A directory containing reaction drawing of all the reaction that failed the mechanistic validator.") .hasArg() .longOpt("output-reaction-rendering") ); add(Option.builder(OPTION_ACT_DATA_FILE) .argName("act data file") .desc("The act data file to read the reachables tree from.") .hasArg() .longOpt("act-data-file") ); add(Option.builder("h") .argName("help") .desc("Prints this help message.") .longOpt("help") ); }}; public static final HelpFormatter HELP_FORMATTER = new HelpFormatter(); static { HELP_FORMATTER.setWidth(100); } private static final Logger LOGGER = LogManager.getFormatterLogger(CladeTraversal.class); private static final NoSQLAPI db = new NoSQLAPI("marvin_v2", "marvin_v2"); private ActData actData; private Map<Long, Set<Long>> parentToChildren; private MechanisticValidator validator; public CladeTraversal(MechanisticValidator validator, ActData actData) { this.actData = actData; this.validator = validator; this.parentToChildren = constructParentToChildrenAssociations(); } public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build()); } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(CladeTraversal.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(CladeTraversal.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } String targetInchi = cl.getOptionValue(OPTION_TARGET_INCHI, PABA_INCHI); String inchiFileName = cl.getOptionValue(OPTION_OUTPUT_INCHI_FILE_NAME, DEFAULT_INCHI_FILE); String reactionsFileName = cl.getOptionValue(OPTION_OUTPUT_REACTION_FILE_NAME, DEFAULT_REACTIONS_FILE); String reactionDirectory = cl.getOptionValue(OPTION_OUTPUT_FAILED_REACTIONS_DIR_NAME, "/"); String actDataFile = cl.getOptionValue(OPTION_ACT_DATA_FILE, DEFAULT_ACTDATA_FILE); runCladeExpansion(actDataFile, targetInchi, inchiFileName, reactionsFileName, reactionDirectory); } public static void runCladeExpansion(String actDataFile, String targetInchi, String inchiFileName, String reactionsFileName, String reactionDirectory) throws IOException, ReactionException, LicenseProcessingException { MechanisticValidator validator = new MechanisticValidator(db); validator.init(); ActData.instance().deserialize(actDataFile); CladeTraversal cladeTraversal = new CladeTraversal(validator, ActData.instance()); Long idFromInchi = cladeTraversal.findNodeIdFromInchi(targetInchi); if (idFromInchi == null) { LOGGER.error("Could not find target inchi %s in the reachables tree", targetInchi); return; } cladeTraversal.traverseTreeFromStartPoint(idFromInchi, inchiFileName, reactionsFileName, reactionDirectory); } /** * This function constructs a map of parent -> list of children associations of chemical ids based on the reachables tree. * * @return Returns a map of parent to list of children associations. */ private Map<Long, Set<Long>> constructParentToChildrenAssociations() { Map<Long, Set<Long>> parentToChildrenAssociations = new HashMap<>(); for (Map.Entry<Long, Long> childIdToParentId : this.actData.getActTree().parents.entrySet()) { Long parentId = childIdToParentId.getValue(); Long childId = childIdToParentId.getKey(); Set<Long> childIds = parentToChildrenAssociations.get(parentId); if (childIds == null) { childIds = new HashSet<>(); parentToChildrenAssociations.put(parentId, childIds); } childIds.add(childId); } return parentToChildrenAssociations; } /** * This function finds the node id from an inchi, which is useful since the reachable tree structure is referenced * by node ids. * * @param inchi - The inchi to find the node id from. * @return The node id of the inchi. */ private Long findNodeIdFromInchi(String inchi) { return this.actData.chemInchis.get(inchi); } /** * This function traverses the reachables tree from the given start point using BFS, adds all the chemical's derivatives * to a file based on if they pass the mechanistic validator, and the derivatives' reaction pathway from the target * is also logged. Finally, for all the reactions that did not pass the mechanistic validator, we render those reactions * for furthur analysis into a directory. * * @param startPointId - The start point node id to traverse from * @param validatedInchisFileName - The file containing all the derivative inchis that pass the validator. * @param reactionPathwayFileName - The file containing the reaction pathway information from source to target. * @param renderedReactionDirName - The directory containing all the rendered chemical reactions that failed the * mechanistic validator. * @throws IOException */ private void traverseTreeFromStartPoint(Long startPointId, String validatedInchisFileName, String reactionPathwayFileName, String renderedReactionDirName) throws IOException { ReactionRenderer render = new ReactionRenderer(); PrintWriter validatedInchisWriter = new PrintWriter(validatedInchisFileName, "UTF-8"); PrintWriter reactionPathwayWriter = new PrintWriter(reactionPathwayFileName, "UTF-8"); LinkedList<Long> queue = new LinkedList<>(); queue.addAll(this.parentToChildren.get(startPointId)); while (!queue.isEmpty()) { Long candidateId = queue.pop(); validatedInchisWriter.println(db.readChemicalFromInKnowledgeGraph(candidateId).getInChI()); reactionPathwayWriter.println(formatPathFromSrcToDerivativeOfSrc(startPointId, candidateId)); Set<Long> children = this.parentToChildren.get(candidateId); if (children != null) { for (Long child : children) { for (Long rxnId : rxnIdsForEdge(candidateId, child)) { // In the case of a negative rxn id, this signifies the reaction is happening in reverse to what is // referenced in the DB. In order to get the correct db index, one has to transform this negative reaction // into its actual id. if (rxnId < 0) { rxnId = Reaction.reverseNegativeId(rxnId); } // Validate the reaction and only add its children to the queue if the reaction makes sense to our internal // ros and the child is not in the queue already. Map<Integer, List<Ero>> validatorResults = this.validator.validateOneReaction(rxnId); if (validatorResults != null && validatorResults.size() > 0 && !queue.contains(child)) { queue.add(child); } else { try { render.drawReaction(db.getReadDB(), rxnId, renderedReactionDirName, true); } catch (Exception e) { LOGGER.error("Error caught when trying to draw and save reaction %d with error message: %s", rxnId, e.getMessage()); } } } } } } reactionPathwayWriter.close(); validatedInchisWriter.close(); } /** * The function creates a ordered list of chemicals from src to dst. * * @param src - The src id * @param dst - The dst id * @return Returns a list of ids from src to dst. */ public LinkedList<Long> pathFromSrcToDerivativeOfSrc(Long src, Long dst) { LinkedList<Long> result = new LinkedList<>(); Long id = dst; result.add(id); while (!id.equals(src)) { Long newId = this.actData.getActTree().parents.get(id); result.add(newId); id = newId; } Collections.reverse(result); return result; } /** * This function finds all reactions that explain the given combination of src and dst chemicals. * * @param src - The src node id. * @param dst - The dst node id. * @return Returns a set of rxn ids where src in the substrates and dst in the products. */ public Set<Long> rxnIdsForEdge(Long src, Long dst) { Set<Long> rxnsThatProduceChem = GlobalParams.USE_RXN_CLASSES ? ActData.instance().rxnClassesThatProduceChem.get(dst) : ActData.instance().rxnsThatProduceChem.get(dst); Set<Long> rxnsThatConsumeChem = GlobalParams.USE_RXN_CLASSES ? ActData.instance().rxnClassesThatConsumeChem.get(src) : ActData.instance().rxnsThatConsumeChem.get(src); Set<Long> intersection = new HashSet<>(rxnsThatProduceChem); intersection.retainAll(rxnsThatConsumeChem); return intersection; } /** * This function pretty prints a string that explains the reaction pathway from src to dst. * * @param src - The src chemical id * @param dst - The dst chemical id * @return This function returns a string format of the reaction pathway. */ public String formatPathFromSrcToDerivativeOfSrc(Long src, Long dst) { String result = ""; List<Long> path = pathFromSrcToDerivativeOfSrc(src, dst); for (int i = 0; i < path.size() - 1; i++) { result += db.readChemicalFromInKnowledgeGraph(path.get(i)).getInChI(); result += " --- "; Set<Long> rxnIds = rxnIdsForEdge(path.get(i), path.get(i + 1)); result += StringUtils.join(rxnIds, ","); result += " ---> "; } result += db.readChemicalFromInKnowledgeGraph(path.get(path.size() - 1)).getInChI(); return result; } }