/* $RCSfile$ * $Author$ * $Date$ * $Revision$ * * Copyright (C) 2004-2008 Rajarshi Guha <rajarshi.guha@gmail.com> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.pharmacophore; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import javax.vecmath.Point3d; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.DefaultChemObjectBuilder; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.geometry.GeometryTools; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.isomorphism.UniversalIsomorphismTester; import org.openscience.cdk.isomorphism.matchers.IQueryAtom; import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer; import org.openscience.cdk.isomorphism.mcss.RMap; import org.openscience.cdk.smiles.smarts.SMARTSQueryTool; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; /** * Identifies atoms whose 3D arrangement matches a specified pharmacophore query. * <p/> * A pharmacophore is defined by a set of atoms and distances between them. More generically * we can restate this as a set of pharmacophore groups and the distances between them. Note * that a pharmacophore group may consist of one or more atoms and the distances can be * specified as a distance range rather than an exact distance. * <p/> * The goal of a pharmacophore query is to identify atom in a molecule whose 3D arrangement * match a specified query. * <p/> * To perform a query one must first create a set of pharmacophore groups and specify the * distances between them. Each pharmacophore group is represented by a {@link org.openscience.cdk.pharmacophore.PharmacophoreAtom} * and the distances between them are represented by a {@link org.openscience.cdk.pharmacophore.PharmacophoreBond}. * These are collected in a {@link org.openscience.cdk.isomorphism.matchers.QueryAtomContainer}. * <p/> * Given the query pharmacophore one can use this class to check with it occurs in a specified molecule. * Note that for full generality pharmacophore searches are performed using conformations of molecules. * This can easily be accomplished using this class together with the {@link org.openscience.cdk.ConformerContainer} * class. See the example below. * <p/> * Currently this class will allow you to perform pharmacophore searches using triads, quads or any number * of pharmacophore groups. However, only distances and angles between pharmacophore groups are considered, so * alternative constraints such as torsions and so on cannot be considered at this point. * <p/> * After a query has been performed one can retrieve the matching groups (as opposed to the matching atoms * of the target molecule). However since a pharmacophore group (which is an object of class {@link PharmacophoreAtom}) * allows you to access the indices of the corresponding atoms in the target molecule, this is not very * difficult. * Example usage: * <pre> * QueryAtomContainer query = new QueryAtomContainer(); * <p/> * PharmacophoreQueryAtom o = new PharmacophoreQueryAtom("D", "[OX1]"); * PharmacophoreQueryAtom n1 = new PharmacophoreQueryAtom("A", "[N]"); * PharmacophoreQueryAtom n2 = new PharmacophoreQueryAtom("A", "[N]"); * <p/> * query.addAtom(o); * query.addAtom(n1); * query.addAtom(n2); * <p/> * PharmacophoreQueryBond b1 = new PharmacophoreQueryBond(o, n1, 4.0, 4.5); * PharmacophoreQueryBond b2 = new PharmacophoreQueryBond(o, n2, 4.0, 5.0); * PharmacophoreQueryBond b3 = new PharmacophoreQueryBond(n1, n2, 5.4, 5.8); * <p/> * query.addBond(b1); * query.addBond(b2); * query.addBond(b3); * <p/> * String filename = "/Users/rguha/pcore1.sdf"; * IteratingMDLConformerReader reader = new IteratingMDLConformerReader( * new FileReader(new File(filename)), DefaultChemObjectBuilder.getInstance()); * <p/> * ConformerContainer conformers; * if (reader.hasNext()) conformers = (ConformerContainer) reader.next(); * <p/> * boolean firstTime = true; * for (IAtomContainer conf : conformers) { * boolean status; * if (firstTime) { * status = matcher.matches(conf, true); * firstTime = false; * } else status = matcher.matches(conf, false); * if (status) { * // OK, matched. Do something * } * } * </pre> * * <h3>Extensions to SMARTS</h3> * * The pharmacophore supports some extentions to the SMARTS language that lead * to flexible pharmacophore definitions Note that these extensions are specific to * pharmacophore usage and are not generally provided by the SMARTS parser itself. * <p> * <ul> * <li> | - this allows one to perform a logical OR between two or more SMARTS patterns. An example might * be a pharmacophore group that is meant to match a 5 membered ring or a 6 membered ring. This cannot be * written in a single ordinary SMARTS pattern. However using this one extension one can write * <pre>A1AAAA1|A1AAAAA1</pre> * </ul> * * @author Rajarshi Guha * @cdk.module pcore * @cdk.githash * @cdk.keyword pharmacophore * @cdk.keyword 3D isomorphism * @see org.openscience.cdk.pharmacophore.PharmacophoreAtom * @see org.openscience.cdk.pharmacophore.PharmacophoreBond * @see org.openscience.cdk.pharmacophore.PharmacophoreQueryAtom * @see org.openscience.cdk.pharmacophore.PharmacophoreQueryBond */ @TestClass("org.openscience.cdk.pharmacophore.PharmacophoreMatcherTest") public class PharmacophoreMatcher { private ILoggingTool logger = LoggingToolFactory.createLoggingTool(PharmacophoreMatcher.class); private PharmacophoreQuery pharmacophoreQuery = null; private List<List<PharmacophoreAtom>> matchingPAtoms = null; private List<List<IBond>> matchingPBonds = null; private List<List<RMap>> bondMapping; private IAtomContainer pharmacophoreMolecule = null; private List<HashMap<IBond, IBond>> bondMapHash = null; /** * An empty constructor. * <p/> * You should set the query before performing a match */ public PharmacophoreMatcher() { } /** * Initialize the matcher with a query. * * @param pharmacophoreQuery The query pharmacophore * @see org.openscience.cdk.pharmacophore.PharmacophoreQueryAtom * @see org.openscience.cdk.pharmacophore.PharmacophoreQueryBond */ public PharmacophoreMatcher(PharmacophoreQuery pharmacophoreQuery) { this.pharmacophoreQuery = pharmacophoreQuery; } /** * Performs the pharmacophore matching. * <p/> * This method will analyze the specified target molecule to identify pharmacophore * groups. If dealing with conformer data it is probably more efficient to use * the other form of this method which allows one to skip the pharmacophore group * identification step after the first conformer. * * @param atomContainer The target molecule. Must have 3D coordinates * @return true is the target molecule contains the query pharmacophore * @throws org.openscience.cdk.exception.CDKException * if the query pharmacophore was not set or the query is invalid or if the molecule * does not have 3D coordinates * @see #matches(org.openscience.cdk.interfaces.IAtomContainer, boolean) */ @TestMethod("testCNSPcore") public boolean matches(IAtomContainer atomContainer) throws CDKException { return matches(atomContainer, true); } /** * Performs the pharmacophore matching. * * @param atomContainer The target molecule. Must have 3D coordinates * @param initializeTarget If <i>true</i>, the target molecule specified in the * first argument will be analyzed to identify matching pharmacophore groups. If <i>false</i> * this is not performed. The latter case is only useful when dealing with conformers * since for a given molecule, all conformers will have the same pharmacophore groups * and only the constraints will change from one conformer to another. * @return true is the target molecule contains the query pharmacophore * @throws org.openscience.cdk.exception.CDKException * if the query pharmacophore was not set or the query is invalid or if the molecule * does not have 3D coordinates */ @TestMethod("testMatcherQuery1") public boolean matches(IAtomContainer atomContainer, boolean initializeTarget) throws CDKException { if (!GeometryTools.has3DCoordinates(atomContainer)) throw new CDKException("Molecule must have 3D coordinates"); if (pharmacophoreQuery == null) throw new CDKException("Must set the query pharmacophore before matching"); if (!checkQuery(pharmacophoreQuery)) throw new CDKException("A problem in the query. Make sure all pharmacophore groups of the same symbol have the same same SMARTS"); String title = (String) atomContainer.getProperty(CDKConstants.TITLE); if (initializeTarget) pharmacophoreMolecule = getPharmacophoreMolecule(atomContainer); else { // even though the atoms comprising the pcore groups are // constant, their coords will differ, so we need to make // sure we get the latest set of effective coordinates for (IAtom iAtom : pharmacophoreMolecule.atoms()) { PharmacophoreAtom patom = (PharmacophoreAtom) iAtom; List<Integer> tmpList = new ArrayList<Integer>(); for (int idx : patom.getMatchingAtoms()) tmpList.add(idx); Point3d coords = getEffectiveCoordinates(atomContainer, tmpList); patom.setPoint3d(coords); } } if (pharmacophoreMolecule.getAtomCount() < pharmacophoreQuery.getAtomCount()) { logger.debug("Target [" + title + "] did not match the query SMARTS. Skipping constraints"); return false; } bondMapping = UniversalIsomorphismTester.getSubgraphMaps(pharmacophoreMolecule, pharmacophoreQuery); logger.debug(" Got " + bondMapping.size() + " hits"); return bondMapping.size() > 0; } /** * Get the matching pharmacophore constraints. * <p/> * The method should be called after performing the match, otherwise the return value is null. * The method returns a List of List's. Each List represents the pharmacophore constraints in the * target molecule that matched the query. Since constraints are conceptually modeled on bonds * the result is a list of list of IBond. You should coerce these to the appropriate pharmacophore * bond to get at the underlying grops. * * @return a List of a List of pharmacophore constraints in the target molecule that match the query * @see org.openscience.cdk.pharmacophore.PharmacophoreBond * @see org.openscience.cdk.pharmacophore.PharmacophoreAngleBond */ @TestMethod("testMatchedBonds") public List<List<IBond>> getMatchingPharmacophoreBonds() { if (bondMapping == null) return null; matchingPBonds = new ArrayList<List<IBond>>(); bondMapHash = new ArrayList<HashMap<IBond, IBond>>(); for (Object aBondMapping : bondMapping) { List list = (List) aBondMapping; List<IBond> bondList = new ArrayList<IBond>(); HashMap<IBond, IBond> tmphash = new HashMap<IBond, IBond>(); for (Object aList : list) { RMap map = (RMap) aList; int bondID = map.getId1(); bondList.add(pharmacophoreMolecule.getBond(bondID)); tmphash.put(pharmacophoreMolecule.getBond(map.getId1()), pharmacophoreQuery.getBond(map.getId2())); } bondMapHash.add(tmphash); matchingPBonds.add(bondList); } return matchingPBonds; } /** * Return a list of HashMap's that allows one to get the query constraint for a given pharmacophore bond. * <p/> * This should be called after calling {@link #getMatchingPharmacophoreBonds()}, otherwise the * return value is null. If the matching is successfull, the return value is a List of HashMaps, each * HashMap corresponding to a seperate match. Each HashMap is keyed on the {@link org.openscience.cdk.pharmacophore.PharmacophoreBond} * in the target molecule that matched a contstraint ({@link org.openscience.cdk.pharmacophore.PharmacophoreQueryBond} or * {@link org.openscience.cdk.pharmacophore.PharmacophoreQueryAngleBond}. The value is the corresponding query bond. * * @return A List of HashMaps, identifying the query constraint corresponding to a matched constraint in the target * molecule. */ @TestMethod("testMatchedBonds") public List<HashMap<IBond, IBond>> getTargetQueryBondMappings() { return bondMapHash; } /** * Get the matching pharmacophore groups. * <p/> * The method should be called after performing the match, otherwise the return value is null. * The method returns a List of List's. Each List represents the pharmacophore groups in the * target molecule that matched the query. Each pharmacophore group contains the indices of the * atoms (in the target molecule) that correspond to the group. * * @return a List of a List of pharmacophore groups in the target molecule that match the query * @see org.openscience.cdk.pharmacophore.PharmacophoreAtom */ @TestMethod("testMatchedAtoms") public List<List<PharmacophoreAtom>> getMatchingPharmacophoreAtoms() { if (pharmacophoreMolecule == null || bondMapping == null) return null; matchingPAtoms = getAtomMappings(bondMapping, pharmacophoreMolecule); return matchingPAtoms; } /** * Get the uniue matching pharmacophore groups. * <p/> * The method should be called after performing the match, otherwise the return value is null. * The method returns a List of List's. Each List represents the pharmacophore groups in the * target molecule that matched the query. Each pharmacophore group contains the indices of the * atoms (in the target molecule) that correspond to the group. * <p/> * This is analogous to the USA form of return value from a SMARTS match. * * @return a List of a List of pharmacophore groups in the target molecule that match the query * @see org.openscience.cdk.pharmacophore.PharmacophoreAtom */ @TestMethod("testMatchedAtoms") public List<List<PharmacophoreAtom>> getUniqueMatchingPharmacophoreAtoms() { getMatchingPharmacophoreAtoms(); List<List<PharmacophoreAtom>> ret = new ArrayList<List<PharmacophoreAtom>>(); List<String> tmp = new ArrayList<String>(); for (List<PharmacophoreAtom> pmatch : matchingPAtoms) { List<Integer> ilist = new ArrayList<Integer>(); for (PharmacophoreAtom patom : pmatch) { int[] indices = patom.getMatchingAtoms(); for (int i : indices) { if (!ilist.contains(i)) ilist.add(i); } } Collections.sort(ilist); // convert the list of ints to a string String s = ""; for (int i : ilist) s += i; tmp.add(s); } // now we go through our integer list and see if we can get rid of duplicates List<String> utmp = new ArrayList<String>(); for (int i = 0; i < tmp.size(); i++) { String ilist = tmp.get(i); if (!utmp.contains(ilist)) { utmp.add(ilist); ret.add(matchingPAtoms.get(i)); } } return ret; } /** * Get the query pharmacophore * * @return The query */ @TestMethod("testGetterSetter") public PharmacophoreQuery getPharmacophoreQuery() { return pharmacophoreQuery; } /** * Set a pharmacophore query * * @param query The query */ @TestMethod("testGetterSetter") public void setPharmacophoreQuery(PharmacophoreQuery query) { pharmacophoreQuery = query; } private IAtomContainer getPharmacophoreMolecule(IAtomContainer atomContainer) throws CDKException { SMARTSQueryTool sqt = new SMARTSQueryTool("C"); IAtomContainer pharmacophoreMolecule = DefaultChemObjectBuilder.getInstance().newAtomContainer(); // lets loop over each pcore query atom HashMap<String, String> map = new HashMap<String, String>(); logger.debug("Converting [" + atomContainer.getProperty(CDKConstants.TITLE) + "] to a pcore molecule"); for (IAtom atom : pharmacophoreQuery.atoms()) { PharmacophoreQueryAtom qatom = (PharmacophoreQueryAtom) atom; String smarts = qatom.getSmarts(); // a pcore query might have multiple instances of a given pcore atom (say // 2 hydrophobic groups separated by X unit). In such a case we want to find // the atoms matching the pgroup SMARTS just once, rather than redoing the // matching for each instance of the pcore query atom. if (!map.containsKey(qatom.getSymbol())) map.put(qatom.getSymbol(), smarts); else if (map.get(qatom.getSymbol()).equals(smarts)) { continue; } // see if the smarts for this pcore query atom gets any matches // in our query molecule. If so, then cllect each set of // matching atoms and for each set make a new pcore atom and // add it to the pcore atom container object // Note that we allow a special form of SMARTS where the | operator // represents logical or of multi-atom groups (as opposed to ',' // which is for single atom matches) String[] subSmarts = smarts.split("\\|"); for (String subSmart : subSmarts) { sqt.setSmarts(subSmart); if (sqt.matches(atomContainer)) { List<List<Integer>> mappings = sqt.getUniqueMatchingAtoms(); for (List<Integer> atomIndices : mappings) { Point3d coords = getEffectiveCoordinates(atomContainer, atomIndices); PharmacophoreAtom patom = new PharmacophoreAtom(smarts, qatom.getSymbol(), coords); patom.setMatchingAtoms(intIndices(atomIndices)); if (!pharmacophoreMolecule.contains(patom)) pharmacophoreMolecule.addAtom(patom); } } } logger.debug("\tFound " + sqt.getUniqueMatchingAtoms().size() + " unique matches for " + smarts); } // now that we have added all the pcore atoms to the container // we need to join all atoms with pcore bonds (i.e. distance constraints) if (hasDistanceConstraints(pharmacophoreQuery)) { int npatom = pharmacophoreMolecule.getAtomCount(); for (int i = 0; i < npatom - 1; i++) { for (int j = i + 1; j < npatom; j++) { PharmacophoreAtom atom1 = (PharmacophoreAtom) pharmacophoreMolecule.getAtom(i); PharmacophoreAtom atom2 = (PharmacophoreAtom) pharmacophoreMolecule.getAtom(j); PharmacophoreBond bond = new PharmacophoreBond(atom1, atom2); pharmacophoreMolecule.addBond(bond); } } } // if we have angle constraints, generate only the valid // possible angle relationships, rather than all possible if (hasAngleConstraints(pharmacophoreQuery)) { int nangleDefs = 0; for (IBond bond : pharmacophoreQuery.bonds()) { if (!(bond instanceof PharmacophoreQueryAngleBond)) continue; IAtom startQAtom = bond.getAtom(0); IAtom middleQAtom = bond.getAtom(1); IAtom endQAtom = bond.getAtom(2); // make a list of the patoms in the target that match // each type of angle atom List<IAtom> startl = new ArrayList<IAtom>(); List<IAtom> middlel = new ArrayList<IAtom>(); List<IAtom> endl = new ArrayList<IAtom>(); for (IAtom tatom : pharmacophoreMolecule.atoms()) { if (tatom.getSymbol().equals(startQAtom.getSymbol())) startl.add(tatom); if (tatom.getSymbol().equals(middleQAtom.getSymbol())) middlel.add(tatom); if (tatom.getSymbol().equals(endQAtom.getSymbol())) endl.add(tatom); } // now we form the relevant angles, but we will // have reversed repeats List<IAtom[]> tmpl = new ArrayList<IAtom[]>(); for (IAtom middle : middlel) { for (IAtom start : startl) { if (middle.equals(start)) continue; for (IAtom end : endl) { if (start.equals(end) || middle.equals(end)) continue; tmpl.add(new IAtom[]{start, middle, end}); } } } // now clean up reversed repeats List<IAtom[]> unique = new ArrayList<IAtom[]>(); for (int i = 0; i < tmpl.size(); i++) { IAtom[] seq1 = tmpl.get(i); boolean isRepeat = false; for (int j = 0; j < unique.size(); j++) { if (i == j) continue; IAtom[] seq2 = unique.get(j); if (seq1[1] == seq2[1] && seq1[0] == seq2[2] && seq1[2] == seq2[0]) { isRepeat = true; } } if (!isRepeat) unique.add(seq1); } // finally we can add the unique angle to the target for (IAtom[] seq : unique) { PharmacophoreAngleBond pbond = new PharmacophoreAngleBond( (PharmacophoreAtom) seq[0], (PharmacophoreAtom) seq[1], (PharmacophoreAtom) seq[2]); pharmacophoreMolecule.addBond(pbond); nangleDefs++; } } logger.debug("Added " + nangleDefs + " defs to the target pcore molecule"); } return pharmacophoreMolecule; } private boolean hasDistanceConstraints(IQueryAtomContainer query) { for (IBond bond : query.bonds()) { if (bond instanceof PharmacophoreQueryBond) return true; } return false; } private boolean hasAngleConstraints(IQueryAtomContainer query) { for (IBond bond : query.bonds()) { if (bond instanceof PharmacophoreQueryAngleBond) return true; } return false; } private int[] intIndices(List<Integer> atomIndices) { int[] ret = new int[atomIndices.size()]; for (int i = 0; i < atomIndices.size(); i++) ret[i] = atomIndices.get(i); return ret; } private Point3d getEffectiveCoordinates(IAtomContainer atomContainer, List<Integer> atomIndices) { Point3d ret = new Point3d(0, 0, 0); for (Object atomIndice : atomIndices) { int atomIndex = (Integer) atomIndice; Point3d coord = atomContainer.getAtom(atomIndex).getPoint3d(); ret.x += coord.x; ret.y += coord.y; ret.z += coord.z; } ret.x /= atomIndices.size(); ret.y /= atomIndices.size(); ret.z /= atomIndices.size(); return ret; } private List<List<PharmacophoreAtom>> getAtomMappings(List bondMapping, IAtomContainer atomContainer) { List<List<PharmacophoreAtom>> atomMapping = new ArrayList<List<PharmacophoreAtom>>(); // loop over each mapping for (Object aBondMapping : bondMapping) { List list = (List) aBondMapping; List<Integer> tmp = new ArrayList<Integer>(); List<PharmacophoreAtom> atomList = new ArrayList<PharmacophoreAtom>(); // loop over this mapping for (Object aList : list) { RMap map = (RMap) aList; int bondID = map.getId1(); // get the atoms in this bond IBond bond = atomContainer.getBond(bondID); IAtom atom1 = bond.getAtom(0); IAtom atom2 = bond.getAtom(1); Integer idx1 = atomContainer.getAtomNumber(atom1); Integer idx2 = atomContainer.getAtomNumber(atom2); if (!tmp.contains(idx1)) { tmp.add(idx1); atomList.add(new PharmacophoreAtom((PharmacophoreAtom) atom1)); } if (!tmp.contains(idx2)) { tmp.add(idx2); atomList.add(new PharmacophoreAtom((PharmacophoreAtom) atom2)); } } if (tmp.size() > 0) atomMapping.add(atomList); } return atomMapping; } private boolean checkQuery(IQueryAtomContainer query) { if (!(query instanceof PharmacophoreQuery)) return false; HashMap<String, String> map = new HashMap<String, String>(); for (int i = 0; i < query.getAtomCount(); i++) { IQueryAtom atom = (IQueryAtom) query.getAtom(i); if (!(atom instanceof PharmacophoreQueryAtom)) return false; PharmacophoreQueryAtom pqatom = (PharmacophoreQueryAtom) atom; String label = pqatom.getSymbol(); String smarts = pqatom.getSmarts(); if (!map.containsKey(label)) map.put(label, smarts); else { if (!map.get(label).equals(smarts)) return false; } } return true; } }