/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.sars;
import chemaxon.core.ChemConst;
import chemaxon.struc.Molecule;
import chemaxon.struc.RxnMolecule;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* This class is used to filter the potential substrates of an enzyme based on their number of carbons.
* This should generally not be used alone, but as an extra filter on top of a substructure based SAR.
*
* This is useful primarily to avoid matching very large, complicated molecules, to SARs for which all
* of our evidence points to relatively small substrates. For example, if a substructure SAR was constructed on 5
* substrates, with Carbon counts between 10 and 20, we wouldn't necessarily want to assume that the corresponding
* enzyme would also act on a molecule with 100 Carbon atoms, even if that molecule matched the substructure SAR.
*/
public class OneSubstrateCarbonCountSar implements Sar {
private static final Logger LOGGER = LogManager.getFormatterLogger(OneSubstrateCarbonCountSar.class);
@JsonProperty("min_carbon_count")
private int minCount;
@JsonProperty("max_carbon_count")
private int maxCount;
/**
* For JSON.
*/
private OneSubstrateCarbonCountSar() {
}
public OneSubstrateCarbonCountSar(int minCount, int maxCount) {
this.minCount = minCount;
this.maxCount = maxCount;
}
public int getMinCount() {
return minCount;
}
/**
* For JSON.
*/
private void setMinCount(int minCount) {
this.minCount = minCount;
}
public int getMaxCount() {
return maxCount;
}
/**
* For JSON.
*/
private void setMaxCount(int maxCount) {
this.maxCount = maxCount;
}
@Override
public boolean test(List<Molecule> substrates) {
// This class of SARs is only valid on single-substrate reactions.
if (substrates.size() != 1) {
return false;
}
Molecule oneSubstrate = substrates.get(0);
return oneSubstrate.getAtomCount(ChemConst.C) >= minCount && oneSubstrate.getAtomCount(ChemConst.C) <= maxCount;
}
/**
* TODO: Add a configurable fuzziness to the builder.
* This would allow it to build a SAR to accept atoms with carbon counts within some range of the seen reactions'
* counts, rather than only those strictly within the observed bounds.
*/
public static class Factory implements SarFactory {
@Override
public Sar buildSar(List<RxnMolecule> reactions) {
if (!DbAPI.areAllOneSubstrate(reactions)) {
throw new IllegalArgumentException("Reactions are not all one substrate.");
}
List<Integer> carbonCounts = getSubstrateCarbonCounts(reactions);
return new OneSubstrateCarbonCountSar(Collections.min(carbonCounts), Collections.max(carbonCounts));
}
private List<Integer> getSubstrateCarbonCounts(List<RxnMolecule> reactions) {
return reactions.stream()
.map(rxn -> rxn.getReactants()[0])
.map(molecule -> molecule.getAtomCount(ChemConst.C))
.collect(Collectors.toList());
}
}
}