/* $Revision$ $Author$ $Date$
*
* Copyright (C) 2005-2007 Christian Hoppe <chhoppe@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
package org.openscience.cdk.tools;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import org.openscience.cdk.AtomContainer;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.ConnectivityChecker;
import org.openscience.cdk.graph.PathTools;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.interfaces.IRingSet;
import org.openscience.cdk.ringsearch.RingPartitioner;
import org.openscience.cdk.ringsearch.SSSRFinder;
import org.openscience.cdk.smiles.SmilesGenerator;
/**
* Generate ring and Murcko-like fragments.
* <ul>
* <li>ring fragments (largest ringsystems)</li>
* <li>Murcko fragments described by Murcko et al. {@cdk.cite MURCKO96}</li>
* </ul>
*
* Linkers include the ring atoms. In CDK notion aromatic atoms are flagged as ISAROMATIC and have small letters in smiles.
* If you want to create molecules out of these smiles you have to change all the letters to upper ones.
* For this please use isSmilesToUpperCase() and setSmilesToUpperCase()
*
* <p>Due to some problems with SaturationChecker the SMILES generation might be a problem.
* When you want to use the get..SmileArray methods please do (that seems to work, refer test 13+14):
* <pre>
* HydrogenAdder ha= new HydrogenAdder();
* ha.addExplicitHydrogensToSatisfyValency(molecule);
* GenerateFragments gf=new GenerateFragments();
* gf.generateMurckoFragments(molecule,booelan,booelan);
* String[] smiles=gf.getMurckoFrameworksAsSmileArray();
* </pre>
*
* @author chhoppe from EUROSCREEN
* @cdk.created 2006-3-23
* @cdk.module extra
* @cdk.githash
* @cdk.keyword Murcko fragments
* @cdk.bug 1848591
**/
public class GenerateFragments {
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(GenerateFragments.class);
private List murckoFragments =null;
private List ringFragments=null;
private List linkerFragments=null;
private IRingSet ringSetsMolecule=null;
private boolean sidechainHetatoms=true;
private boolean exocyclicDoubleBonds=true;
private boolean smilesToUpperCase=false;
//private IChemObjectBuilder builder;
/**
* generates ring fragments from SSSR and RingPartitioner method
* @param molecule
* void
*/
public void generateRingFragments(IMolecule molecule){
this.ringFragments= new ArrayList();
this.ringSetsMolecule = new SSSRFinder(molecule).findSSSR();
if (this.ringSetsMolecule.getAtomContainerCount() > 0) {
this.ringFragments=RingPartitioner.partitionRings(ringSetsMolecule);
}
}
/**
* generates Murcko fragments takes two parameters
* @param molecule the molecule
* @param sidechainHetatoms boolean if sidchain hetero atoms should be included (true)
* @param exocyclicDoubleBonds boolean if bonds with order >1 should be included on ring systems and linkers
* @param minimumRingSize int indicates the minimum ring size as to considered as fragment (ringSize<minimimRingSize)
* return void
*/
public void generateMurckoFragments(IMolecule molecule, boolean sidechainHetatoms,boolean exocyclicDoubleBonds, int minimumRingSize) throws CDKException {
//logger.debug("****** generatemurckoFragments *******");
//VARIABLES
this.murckoFragments =new Vector();
this.linkerFragments=new Vector();
this.sidechainHetatoms=sidechainHetatoms;
this.exocyclicDoubleBonds=exocyclicDoubleBonds;
IAtom firstRingAtom = null;
IAtom secondRingAtom = null;
IAtomContainer firstRingAtomContainer=null;
IAtomContainer secondRingAtomContainer=null;
IAtomContainer firstRingSubstituents=null;
IAtomContainer secondRingSubstituents=null;
IAtomContainer path=null;
IMolecule murckoFragment=null;
IMolecule linkerFragment=null;
List tmpRingFragments=new Vector();
//Initialize
generateRingFragments(molecule);
for (int i = 0; i < molecule.getAtomCount(); i++) {
molecule.getAtom(i).setFlag(CDKConstants.ISINRING, false);
}
//logger.debug("Number of RingSystems:"+this.ringFragments.size());
for (int f = 0; f < this.ringFragments.size(); f++) {
firstRingAtomContainer = molecule.getBuilder().newAtomContainer();
IRingSet ringSet = (IRingSet)this.ringFragments.get(f);
for (int i=0;i<ringSet.getAtomContainerCount();i++) {
firstRingAtomContainer.add(ringSet.getAtomContainer(i));
}
if (firstRingAtomContainer.getAtomCount()>=minimumRingSize){
tmpRingFragments.add(firstRingAtomContainer);
for (int g = 0; g < firstRingAtomContainer.getAtomCount(); g++) {
molecule.getAtom(molecule.getAtomNumber(firstRingAtomContainer.getAtom(g))).setFlag(CDKConstants.ISINRING, true);
}
}
}
// START
//logger.debug("Number of RingSystems:"+tmpRingFragments.size());
if (tmpRingFragments.size() > 1) {
//go through all ringsystems
//for (int f = 0; f < this.ringFragments.size()-1; f++) {
for (int f = 0; f < tmpRingFragments.size()-1; f++) {
//firstRingAtomContainer = RingSetManipulator.getAllInOneContainer((IRingSet) this.ringFragments.get(f));
firstRingAtomContainer = (IAtomContainer)tmpRingFragments.get(f);
//for (int g = f+1; g < this.ringFragments.size(); g++) {
//secondRingAtomContainer = RingSetManipulator.getAllInOneContainer((IRingSet) this.ringFragments.get(g));
for (int g = f+1; g < tmpRingFragments.size(); g++) {
secondRingAtomContainer = (IAtomContainer)tmpRingFragments.get(g);
for (int h = 0; h < firstRingAtomContainer.getAtomCount(); h++){
firstRingAtom=firstRingAtomContainer.getAtom(h);
firstRingSubstituents=getPossibleLinkerSubstituents(firstRingAtom,molecule,firstRingAtomContainer);
if (firstRingSubstituents.getAtomCount()>0){
//go through substituents of first ring
for (int i = 0; i < firstRingSubstituents.getAtomCount(); i++){
// logger.debug("First Ring Sub is in RING");
// //check for ring-ring system
//Is a zero-atom linker
if (firstRingSubstituents.getAtom(i).getFlag(CDKConstants.ISINRING) && secondRingAtomContainer.contains(firstRingSubstituents.getAtom(i))){
//logger.debug("\tFound a ring-ring System");
murckoFragment=new Molecule();
murckoFragment=addFragments(firstRingAtomContainer,murckoFragment,molecule);
murckoFragment=addFragments(secondRingAtomContainer,murckoFragment,molecule);
murckoFragment=addFragmentBonds(murckoFragment,molecule);
linkerFragment=new Molecule();
linkerFragment.addAtom(firstRingAtom);
linkerFragment.addAtom(firstRingSubstituents.getAtom(i));
linkerFragment=addFragmentBonds(linkerFragment, molecule);
this.linkerFragments.add(linkerFragment);
this.murckoFragments.add(murckoFragment);
//logger.debug("MFragment:"+murckoFragment.getAtomCount()+" CC:"+ConnectivityChecker.isConnected(murckoFragment));
//logger.debug(murckoFragment.toString());
//logger.debug("\tADD MURCKOFRAGMENT");
break;
}
// compare to substituents of second ring
for (int j = 0; j < secondRingAtomContainer.getAtomCount(); j++){
secondRingAtom=secondRingAtomContainer.getAtom(j);
secondRingSubstituents=getPossibleLinkerSubstituents(secondRingAtom,molecule,secondRingAtomContainer);
if (secondRingSubstituents.getAtomCount()>0){
//go through substituents of second ring
for (int k = 0; k < secondRingSubstituents.getAtomCount(); k++){//For-k
//logger.debug("First Ring Size:"+firstRingAtomContainer.getAtomCount()+" 2.Ring Size:"+secondRingAtomContainer.getAtomCount());
//logger.debug(f+".ringSub:"+molecule.getAtomNumber(firstRingSubstituents.getAtomAt(i))+" Sym:"+firstRingSubstituents.getAtomAt(i).getSymbol()+" "+g+".ringSub:"+molecule.getAtomNumber(secondRingSubstituents.getAtomAt(k)));
path=new AtomContainer();
resetFlags(molecule);
PathTools.depthFirstTargetSearch(molecule,firstRingSubstituents.getAtom(i),secondRingSubstituents.getAtom(k),path);
/*logger.debug("\tPATHSIZE:"+path.getAtomCount());
logger.debug("\tFIRST PATHATOM:"+molecule.getAtomNumber(path.getAtomAt(0)));
try{
logger.debug("\tLAST PATHATOM:"+molecule.getAtomNumber(path.getAtomAt(path.getAtomCount()-1)));
}catch(Exception eS){
logger.debug("\tNO LAST PATHATOM");
}*/
if (firstRingSubstituents.getAtom(i)==secondRingSubstituents.getAtom(k)){
//logger.debug("\tSubstituents are equal");
path.addAtom(firstRingSubstituents.getAtom(i));
}
//Check Path, direct connection between the substituents ->linker
if ((checkPath(firstRingAtom, secondRingAtom, path) && path.getAtomCount()>0)){
murckoFragment=new Molecule();
//add both root atoms to path
if (!path.contains(firstRingSubstituents.getAtom(i))){
path.addAtom(firstRingSubstituents.getAtom(i));
}
if (!path.contains(secondRingSubstituents.getAtom(k))){
path.addAtom(secondRingSubstituents.getAtom(k));
}
// //add both ring atoms to path
//if (!path.contains(firstRingAtom)){
// path.addAtom(firstRingAtom);
//}
//if (!path.contains(secondRingAtom)){
// path.addAtom(secondRingAtom);
//}
//1. add path
//2. add rings
//3. connect ring atoms to path
murckoFragment=addPathFragments(path,murckoFragment,molecule);
linkerFragment=new Molecule(murckoFragment);
if (!linkerFragment.contains(firstRingAtom)){
linkerFragment.addAtom(firstRingAtom);
}
if (!linkerFragment.contains(secondRingAtom)){
linkerFragment.addAtom(secondRingAtom);
}
linkerFragment=addFragmentBonds(linkerFragment, molecule);
murckoFragment=addFragments(firstRingAtomContainer,murckoFragment,molecule);
murckoFragment=addFragments(secondRingAtomContainer,murckoFragment,molecule);
murckoFragment=addFragmentBonds(murckoFragment,molecule);
this.linkerFragments.add(linkerFragment);
this.murckoFragments.add(murckoFragment);
//logger.debug("\tADD MURCKOFRAGMENT");
}else{
//logger.debug("\tEND PATH");
}
}//For-k
}//if 2.ring sub
}//For-j
}//For-i
}//if 1.ring sub
}//For-h
}//For-g
}//For-f
}else if (tmpRingFragments.size() ==1){
//logger.debug("Number of RingSystems is 1");
murckoFragment=new Molecule();
murckoFragment=addFragments((IRingSet) this.ringFragments.get(0),murckoFragment,molecule);
murckoFragment=addFragmentBonds(murckoFragment,molecule);
this.murckoFragments.add(murckoFragment);
}
}
/**
* add the atoms on the shortest path to the murcko fragment
* Care about ring atoms in the path and sidechainHetatoms
* @param addAtomContainer path
* @param targetMolecule murcko fragment storage
* @param mainMolecule original molecule
* @return IMolecule murcko fragment
*/
private IMolecule addPathFragments(IAtomContainer addAtomContainer,IMolecule targetMolecule, IMolecule mainMolecule){
IAtomContainer ringAtomContainer=null;
List atoms=null;
//1. check if linker atom is member of a ring system
//2. check if heteroatoms bonded to a non ring linker atom should be included
//3. check if exocyclic double or triple bonded atoms to linker schould be included
for (int i=0;i<addAtomContainer.getAtomCount();i++){
if (addAtomContainer.getAtom(i).getFlag(CDKConstants.ISINRING)&& !targetMolecule.contains(addAtomContainer.getAtom(i))){
//Find all Ring atoms and add them
for (int j = 0; j < this.ringFragments.size(); j++) {
ringAtomContainer = addAtomContainer.getBuilder().newAtomContainer();
IRingSet ringSet = (IRingSet)this.ringFragments.get(j);
for (int k=0; k<ringSet.getAtomContainerCount(); k++) {
ringAtomContainer.add(ringSet.getAtomContainer(k));
}
if (ringAtomContainer.contains(addAtomContainer.getAtom(i))){
targetMolecule=addFragments(ringAtomContainer, targetMolecule,mainMolecule);
break;
}
}
}else if((this.sidechainHetatoms || this.exocyclicDoubleBonds) && !targetMolecule.contains(addAtomContainer.getAtom(i))){
atoms=mainMolecule.getConnectedAtomsList(addAtomContainer.getAtom(i));
targetMolecule.addAtom(addAtomContainer.getAtom(i));
for (int j = 0; j < atoms.size(); j++) {
IAtom atom = (IAtom)atoms.get(j);
//logger.debug("HETATOM:"+atoms[j].getSymbol());
if (this.sidechainHetatoms && !addAtomContainer.getAtom(i).getFlag(CDKConstants.ISINRING) && !(atom.getSymbol()).equals("C") && !(atom.getSymbol()).equals("H") && !targetMolecule.contains(atom)){
//logger.debug("HETATOM TRUE");
targetMolecule.addAtom(atom);
}
if (this.exocyclicDoubleBonds &&
mainMolecule.getBond(atom,addAtomContainer.getAtom(i)).getOrder() != IBond.Order.SINGLE &&
!targetMolecule.contains(atom)){
//logger.debug("EXOCYCLIC DB TRUE");
targetMolecule.addAtom(atom);
}
}
}else{
if (!targetMolecule.contains(addAtomContainer.getAtom(i))){
targetMolecule.addAtom(addAtomContainer.getAtom(i));
}
}
}
return targetMolecule;
}
/**
* add bonds to the murcko fragments
* @param targetMolecule murcko fragment storage
* @param mainMolecule original molecule
* @return IMolecule murcko fragment
*/
private IMolecule addFragmentBonds(IMolecule targetMolecule, IMolecule mainMolecule){
int firstAtomNumber=0;
int secondAtomNumber=0;
for (int i=0;i<targetMolecule.getAtomCount()-1;i++){
for (int j = i+1; j < targetMolecule.getAtomCount(); j++) {
if (mainMolecule.getBond(targetMolecule.getAtom(i),targetMolecule.getAtom(j)) !=null){
firstAtomNumber=targetMolecule.getAtomNumber(targetMolecule.getAtom(i));
secondAtomNumber=targetMolecule.getAtomNumber(targetMolecule.getAtom(j));
targetMolecule.addBond(firstAtomNumber,secondAtomNumber,mainMolecule.getBond(targetMolecule.getAtom(i),targetMolecule.getAtom(j)).getOrder());
if (mainMolecule.getBond(targetMolecule.getAtom(i),targetMolecule.getAtom(j)).getFlag(CDKConstants.ISAROMATIC) == true){
targetMolecule.getBond(targetMolecule.getAtom(firstAtomNumber),targetMolecule.getAtom(secondAtomNumber)).setFlag(CDKConstants.ISAROMATIC, true);
}
}
}
}
return targetMolecule;
}
private IMolecule addFragments(IRingSet ringSet, IMolecule targetMolecule, IMolecule mainMolecule){
for (int i=0;i<ringSet.getAtomContainerCount();i++) {
addFragments(ringSet.getAtomContainer(i), targetMolecule, mainMolecule);
}
return targetMolecule;
}
/**
* add the rings to the murcko fragment
* @param addAtomContainer IAtomContainer with the ring atoms
* @param targetMolecule murcko fragment
* @return IMolecule murcko fragment
*/
private IMolecule addFragments(IAtomContainer addAtomContainer, IMolecule targetMolecule, IMolecule mainMolecule){
List<IAtom> atoms;
for (int i=0;i<addAtomContainer.getAtomCount();i++){
targetMolecule.addAtom(addAtomContainer.getAtom(i));
targetMolecule.addAtom(addAtomContainer.getAtom(i));
//Check for double bonds
atoms=mainMolecule.getConnectedAtomsList(addAtomContainer.getAtom(i));
for (IAtom atom : atoms) {
if (this.exocyclicDoubleBonds &&
mainMolecule.getBond(atom, addAtomContainer.getAtom(i)).getOrder() != IBond.Order.SINGLE &&
!targetMolecule.contains(atom)) {
targetMolecule.addAtom(atom);
}
}
}
return targetMolecule;
}
/**
* checks if the starting point and the end point of the shortest path are in the path
* if true path is rejected
* @param firstRingAtom IAtom start point
* @param secondRingAtom IAtom end point
* @param path IAtomContainer path
* @return boolean true if path is reasonable
*/
private boolean checkPath(IAtom firstRingAtom, IAtom secondRingAtom, IAtomContainer path){
//logger.debug("CHECK PATH");
return !(path.contains(firstRingAtom) || path.contains(secondRingAtom));
}
/**
* get starting points (IAtom) of possible linkers
* @param ringAtom IAtom the ring atom
* @param molecule IMolecule original molecule
* @param ringSystem IAtomContainer the ring system
* @return IAtomContainer possible starting points of linkers
*/
private IAtomContainer getPossibleLinkerSubstituents(IAtom ringAtom,IMolecule molecule, IAtomContainer ringSystem){
List<IAtom> atoms = molecule.getConnectedAtomsList(ringAtom);
IAtomContainer substituents=new AtomContainer();
for (IAtom atom : atoms) {
if (!ringSystem.contains(atom) && !atom.getSymbol().equals("H")) {
substituents.addAtom(atom);
}
}
return substituents;
}
/**
* check for zero-Atom linkers
* @param firstRingAtom IAtom of the first root atom
* @param secondRingAtom IAtom of the second root atom
* @param molecule IMolecule original molecule
* @return boolean true for zero atom linker, eg in biphenyl systems
*/
private boolean zeroAtomLinker(IAtom firstRingAtom, IAtom secondRingAtom, IMolecule molecule){
List atoms= molecule.getConnectedAtomsList(firstRingAtom);
return atoms.contains(secondRingAtom);
}
/**
* @return String[] smiles of the murcko fragments
*/
public String[] getMurckoFrameworksAsSmileArray(){
SmilesGenerator sg;
String[] murckoFragmentsmiles={};
if (this.murckoFragments !=null){
murckoFragmentsmiles=new String[this.murckoFragments.size()];
//logger.debug("SIZE OF MURCKO VECTOR:"+this.murckoFragments.size());
//logger.debug("SIZE OF SMILES[]:"+murckoFragmentsmiles.length);
for (int i =0;i<this.murckoFragments.size();i++){
try{
IMolecule mol=(IMolecule)this.murckoFragments.get(i);
if (ConnectivityChecker.isConnected(mol)){
sg = new SmilesGenerator();
if (smilesToUpperCase){
murckoFragmentsmiles[i]=sg.createSMILES(mol).toUpperCase();
}else{
murckoFragmentsmiles[i]=sg.createSMILES(mol);
}
}else{
logger.debug("ERROR in getMurckoFrameworksAsSmileArray due to:Molecule is not connected");
}
} catch (Exception e){
logger.error("ERROR in getMurckoFrameworksAsSmileArray due to:"+e.toString());
logger.debug(e);
}
}
}
return murckoFragmentsmiles;
}
/**
* @return String[] smiles of the ring fragments NOT WORKING
*/
public String[] getRingFragmentsAsSmileArray(){
SmilesGenerator sg;
String[] ringFragmentSmiles={};
if (this.ringFragments !=null){
ringFragmentSmiles=new String[this.ringFragments.size()];
//logger.debug("SIZE OF MURCKO VECTOR:"+this.ringFragments.size());
//logger.debug("SIZE OF SMILES[]:"+ringFragmentSmiles.length);
for (int i =0;i<this.ringFragments.size();i++){
try{
IMolecule mol=(IMolecule)this.ringFragments.get(i);
sg = new SmilesGenerator();
if (smilesToUpperCase){
ringFragmentSmiles[i]=sg.createSMILES(mol).toUpperCase();
}else{
ringFragmentSmiles[i]=sg.createSMILES(mol);
}
} catch (Exception e){
logger.error("ERROR in smile generation due to:"+e.toString());
}
}
}
return ringFragmentSmiles;
}
/**
* @return String[] smiles of the linker fragments
*/
public String[] getLinkerFragmentsAsSmileArray(){
SmilesGenerator sg;
String[] linkerFragmentSmiles={};
if (this.linkerFragments !=null){
linkerFragmentSmiles=new String[this.linkerFragments.size()];
//logger.debug("SIZE OF MURCKO VECTOR:"+this.ringFragments.size());
//logger.debug("SIZE OF SMILES[]:"+ringFragmentSmiles.length);
for (int i =0;i<this.linkerFragments.size();i++){
try{
IMolecule mol=(IMolecule)this.linkerFragments.get(i);
sg = new SmilesGenerator();
if (smilesToUpperCase){
linkerFragmentSmiles[i]=sg.createSMILES(mol).toUpperCase();
}else{
linkerFragmentSmiles[i]=sg.createSMILES(mol);
}
} catch (Exception e){
logger.error("ERROR in smile generation due to:"+e.toString());
}
}
}
return linkerFragmentSmiles;
}
private void resetFlags(IMolecule molecule){
for (int i=0;i<molecule.getAtomCount();i++){
molecule.getAtom(i).setFlag(CDKConstants.VISITED, false);
}
}
/**
* @return Vector murckoFragments
*/
public List getMurckoFrameworks() {
return this.murckoFragments;
}
/**
* @return Vector ringFragments
*/
public List getRingFragments() {
return this.ringFragments;
}
/**
* @return Vector linkerFragments
*/
public List getLinkerFragments() {
return this.linkerFragments;
}
public boolean isSmilesToUpperCase() {
return smilesToUpperCase;
}
public void setSmilesToUpperCase(boolean smilesToUpperCase) {
this.smilesToUpperCase = smilesToUpperCase;
}
}