/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.l2expansion;
import act.shared.Chemical;
import chemaxon.formats.MolFormatException;
import chemaxon.reaction.ReactionException;
import chemaxon.struc.Molecule;
import com.act.analysis.chemicals.molecules.MoleculeImporter;
import com.act.biointerpretation.mechanisminspection.Ero;
import com.act.biointerpretation.mechanisminspection.ErosCorpus;
import com.act.biointerpretation.sars.SerializableReactor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
// TODO: write tests for this class.
public class TwoSubstrateRoExpander extends L2Expander {
private static final Logger LOGGER = LogManager.getFormatterLogger(TwoSubstrateRoExpander.class);
private static final Integer TWO_SUBSTRATES = 2;
private final List<Chemical> chemicalsA;
private final List<Chemical> chemicalsB;
private final ErosCorpus roCorpus;
public TwoSubstrateRoExpander(List<Chemical> chemicalsA,
List<Chemical> chemicalsB,
ErosCorpus roCorpus,
PredictionGenerator generator) {
super(generator);
this.chemicalsA = chemicalsA;
this.chemicalsB = chemicalsB;
this.roCorpus = roCorpus;
}
/**
* This function performs pairwise L2 expansion on two sets of substrates. The function is optimized for only
* computing RO expansions on chemical combinations where both chemicals have passed the RO substructure matching.
* This is why this class requires chemicals rather than just inchis - we can't run the optimizations if the chemicals
* aren't in our DB.
*
* @return The L2PredictionCorpus of all products generated.
* @throws IOException
* @throws ReactionException
*/
@Override
public Iterable<PredictionSeed> getPredictionSeeds() {
roCorpus.filterCorpusBySubstrateCount(TWO_SUBSTRATES);
LOGGER.info("The number of ROs to apply is %d", roCorpus.getRos().size());
LOGGER.info("Constructing ro to molecule structures for metabolite list and chemicals of interest list.");
Map<Integer, Set<Molecule>> roIdToMoleculesA = constructRoToMolecules(chemicalsA);
Map<Integer, Set<Molecule>> roIdToMoleculesB = constructRoToMolecules(chemicalsB);
LOGGER.info("Perform L2 expansion for each ro in the list");
List<PredictionSeed> result = new ArrayList<>();
int roProcessedCounter = 0;
for (Ero ro : roCorpus.getRos()) {
SerializableReactor reactor;
try {
reactor = new SerializableReactor(ro.getReactor(), ro.getId());
} catch (ReactionException e) {
LOGGER.info("Skipping ro %d, couldn't get Reactor.", ro.getId());
continue;
}
roProcessedCounter++;
LOGGER.info("Processing the %d indexed ro out of %s ros", roProcessedCounter, roCorpus.getRos().size());
Set<Molecule> roMoleculesA = roIdToMoleculesB.get(ro.getId());
Set<Molecule> roMoleculesB = roIdToMoleculesA.get(ro.getId());
if (roMoleculesA == null || roMoleculesB == null) {
continue;
}
for (Molecule moleculeA : roMoleculesA) {
for (Molecule moleculeB : roMoleculesB) {
List<Molecule> substrates = Arrays.asList(moleculeA, moleculeB);
result.add(new PredictionSeed(ro.getId().toString(), substrates, reactor, NO_SAR));
}
}
}
return result;
}
/**
* This function constructs a ro to set of molecules map
*
* @param chemicals List of chemicals to process
* @return A map of ro to set of molecules that match the ro's substructure
*/
private Map<Integer, Set<Molecule>> constructRoToMolecules(List<Chemical> chemicals) {
Map<Integer, Set<Molecule>> result = new HashMap<>();
for (Chemical chemical : chemicals) {
try {
Molecule mol = MoleculeImporter.importMolecule(chemical);
for (Integer roId : chemical.getSubstructureRoIds()) {
Set<Molecule> molecules = result.get(roId);
if (molecules == null) {
molecules = new HashSet<>();
result.put(roId, molecules);
}
molecules.add(mol);
}
} catch (MolFormatException e) {
LOGGER.error("MolFormatException on metabolite %s. %s", chemical.getInChI(), e.getMessage());
}
}
return result;
}
}