OneSubstrateCarbonCountSar.java example

Explorer
act-master
/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.biointerpretation.sars;

import chemaxon.core.ChemConst;
import chemaxon.struc.Molecule;
import chemaxon.struc.RxnMolecule;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

/**
 * This class is used to filter the potential substrates of an enzyme based on their number of carbons.
 * This should generally not be used alone, but as an extra filter on top of a substructure based SAR.
 *
 * This is useful primarily to avoid matching very large, complicated molecules, to SARs for which all
 * of our evidence points to relatively small substrates. For example, if a substructure SAR was constructed on 5
 * substrates, with Carbon counts between 10 and 20, we wouldn't necessarily want to assume that the corresponding
 * enzyme would also act on a molecule with 100 Carbon atoms, even if that molecule matched the substructure SAR.
 */
public class OneSubstrateCarbonCountSar implements Sar {

  private static final Logger LOGGER = LogManager.getFormatterLogger(OneSubstrateCarbonCountSar.class);

  @JsonProperty("min_carbon_count")
  private int minCount;

  @JsonProperty("max_carbon_count")
  private int maxCount;

  /**
   * For JSON.
   */
  private OneSubstrateCarbonCountSar() {
  }

  public OneSubstrateCarbonCountSar(int minCount, int maxCount) {
    this.minCount = minCount;
    this.maxCount = maxCount;
  }

  public int getMinCount() {
    return minCount;
  }

  /**
   * For JSON.
   */
  private void setMinCount(int minCount) {
    this.minCount = minCount;
  }

  public int getMaxCount() {
    return maxCount;
  }

  /**
   * For JSON.
   */
  private void setMaxCount(int maxCount) {
    this.maxCount = maxCount;
  }

  @Override
  public boolean test(List<Molecule> substrates) {
    // This class of SARs is only valid on single-substrate reactions.
    if (substrates.size() != 1) {
      return false;
    }

    Molecule oneSubstrate = substrates.get(0);
    return oneSubstrate.getAtomCount(ChemConst.C) >= minCount && oneSubstrate.getAtomCount(ChemConst.C) <= maxCount;
  }

  /**
   * TODO: Add a configurable fuzziness to the builder.
   * This would allow it to build a SAR to accept atoms with carbon counts within some range of the seen reactions'
   * counts, rather than only those strictly within the observed bounds.
   */
  public static class Factory implements SarFactory {

    @Override
    public Sar buildSar(List<RxnMolecule> reactions) {
      if (!DbAPI.areAllOneSubstrate(reactions)) {
        throw new IllegalArgumentException("Reactions are not all one substrate.");
      }

      List<Integer> carbonCounts = getSubstrateCarbonCounts(reactions);
      return new OneSubstrateCarbonCountSar(Collections.min(carbonCounts), Collections.max(carbonCounts));
    }


    private List<Integer> getSubstrateCarbonCounts(List<RxnMolecule> reactions) {
      return reactions.stream()
          .map(rxn -> rxn.getReactants()[0])
          .map(molecule -> molecule.getAtomCount(ChemConst.C))
          .collect(Collectors.toList());
    }
  }
}