/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.l2expansion; import act.shared.Chemical; import chemaxon.formats.MolFormatException; import chemaxon.reaction.ReactionException; import chemaxon.struc.Molecule; import com.act.analysis.chemicals.molecules.MoleculeImporter; import com.act.biointerpretation.mechanisminspection.Ero; import com.act.biointerpretation.mechanisminspection.ErosCorpus; import com.act.biointerpretation.sars.SerializableReactor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; // TODO: write tests for this class. public class TwoSubstrateRoExpander extends L2Expander { private static final Logger LOGGER = LogManager.getFormatterLogger(TwoSubstrateRoExpander.class); private static final Integer TWO_SUBSTRATES = 2; private final List<Chemical> chemicalsA; private final List<Chemical> chemicalsB; private final ErosCorpus roCorpus; public TwoSubstrateRoExpander(List<Chemical> chemicalsA, List<Chemical> chemicalsB, ErosCorpus roCorpus, PredictionGenerator generator) { super(generator); this.chemicalsA = chemicalsA; this.chemicalsB = chemicalsB; this.roCorpus = roCorpus; } /** * This function performs pairwise L2 expansion on two sets of substrates. The function is optimized for only * computing RO expansions on chemical combinations where both chemicals have passed the RO substructure matching. * This is why this class requires chemicals rather than just inchis - we can't run the optimizations if the chemicals * aren't in our DB. * * @return The L2PredictionCorpus of all products generated. * @throws IOException * @throws ReactionException */ @Override public Iterable<PredictionSeed> getPredictionSeeds() { roCorpus.filterCorpusBySubstrateCount(TWO_SUBSTRATES); LOGGER.info("The number of ROs to apply is %d", roCorpus.getRos().size()); LOGGER.info("Constructing ro to molecule structures for metabolite list and chemicals of interest list."); Map<Integer, Set<Molecule>> roIdToMoleculesA = constructRoToMolecules(chemicalsA); Map<Integer, Set<Molecule>> roIdToMoleculesB = constructRoToMolecules(chemicalsB); LOGGER.info("Perform L2 expansion for each ro in the list"); List<PredictionSeed> result = new ArrayList<>(); int roProcessedCounter = 0; for (Ero ro : roCorpus.getRos()) { SerializableReactor reactor; try { reactor = new SerializableReactor(ro.getReactor(), ro.getId()); } catch (ReactionException e) { LOGGER.info("Skipping ro %d, couldn't get Reactor.", ro.getId()); continue; } roProcessedCounter++; LOGGER.info("Processing the %d indexed ro out of %s ros", roProcessedCounter, roCorpus.getRos().size()); Set<Molecule> roMoleculesA = roIdToMoleculesB.get(ro.getId()); Set<Molecule> roMoleculesB = roIdToMoleculesA.get(ro.getId()); if (roMoleculesA == null || roMoleculesB == null) { continue; } for (Molecule moleculeA : roMoleculesA) { for (Molecule moleculeB : roMoleculesB) { List<Molecule> substrates = Arrays.asList(moleculeA, moleculeB); result.add(new PredictionSeed(ro.getId().toString(), substrates, reactor, NO_SAR)); } } } return result; } /** * This function constructs a ro to set of molecules map * * @param chemicals List of chemicals to process * @return A map of ro to set of molecules that match the ro's substructure */ private Map<Integer, Set<Molecule>> constructRoToMolecules(List<Chemical> chemicals) { Map<Integer, Set<Molecule>> result = new HashMap<>(); for (Chemical chemical : chemicals) { try { Molecule mol = MoleculeImporter.importMolecule(chemical); for (Integer roId : chemical.getSubstructureRoIds()) { Set<Molecule> molecules = result.get(roId); if (molecules == null) { molecules = new HashSet<>(); result.put(roId, molecules); } molecules.add(mol); } } catch (MolFormatException e) { LOGGER.error("MolFormatException on metabolite %s. %s", chemical.getInChI(), e.getMessage()); } } return result; } }