/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.sars;
import chemaxon.struc.Molecule;
import com.chemaxon.search.mcs.MaxCommonSubstructure;
import com.chemaxon.search.mcs.McsSearchOptions;
import com.chemaxon.search.mcs.RingHandlingMode;
import java.util.List;
public class McsCalculator {
/**
* Bond matching = true --> only bonds of same order will match
* connectedMode = true --> only return one fragment
* ringHandlingMode = KEEP_RINGS --> don't allow a ring to be only partially matched by substructure; all or nothing
* We do not match bond type because it throws off a lot of matches on benzene rings with bonds shifted.
* TODO: further investigate bond type regarding aromatization and rings.
*/
public static final McsSearchOptions REACTION_BUILDING_OPTIONS =
new McsSearchOptions.Builder()
.bondTypeMatching(true)
.connectedMode(false)
.ringHandlingMode(RingHandlingMode.KEEP_RINGS)
.build();
public static final McsSearchOptions SAR_OPTIONS =
new McsSearchOptions.Builder()
.bondTypeMatching(false)
.connectedMode(false)
.ringHandlingMode(RingHandlingMode.KEEP_RINGS)
.build();
private final MaxCommonSubstructure mcs;
public McsCalculator(McsSearchOptions mcsOptions) {
this.mcs = MaxCommonSubstructure.newInstance(mcsOptions);
}
/**
* Gets MCS of any number of molecules by iteratively applying Chemaxon's MCS search to all substrates.
* For an array of n molecules, this will use n-1 MCS operations.
* TODO: experiment with LibraryMcs instead of MaxCommonSubstructure here; it may find a better overall match.
*
* @param molecules The molecules to get the MCS of.
* @return The MCS of all input molecules.
*/
public Molecule getMCS(List<Molecule> molecules) {
if (molecules.isEmpty()) {
throw new IllegalArgumentException("Cannot get MCS of empty list of molecules.");
}
Molecule substructure = molecules.get(0);
for (Molecule mol : molecules.subList(1, molecules.size())) {
substructure = getMcsOfPair(substructure, mol);
}
return substructure;
}
/**
* Helper method to find MCS of exactly two molecules.
*/
private Molecule getMcsOfPair(Molecule moleculeA, Molecule moleculeB) {
mcs.setMolecules(moleculeA, moleculeB);
return mcs.nextResult().getAsMolecule();
}
}