/* * Copyright (C) 2010 Mark Rijnbeek <mark_rynbeek@users.sf.net> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may * distribute with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * */ package org.openscience.cdk.isomorphism.matchers; import java.io.Serializable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.vecmath.Point2d; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.ChemObject; import org.openscience.cdk.PseudoAtom; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IChemObject; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; /** * Represents information contained in a Symyx RGfile (R-group query file).<br> * It contains a root structure (the scaffold if you like), a map with * R-group definitions (each of which can contain multiple substitutes) and * a map with attachment points. The attachment points define a connection * order for the substitutes, which is relevant when an Rgroup is connected * to the scaffold with more than one bond. * <P> * This class can also be used to produce all the valid configurations * for the combination of its root,definitions and conditions. * <P> * This Javadoc does not contain a code sample how to create a new RGroupQuery * from scratch, because a sensible RGroupQuery has quite a few attributes to be set * including a root plus a bunch of substituents, which are all atom containers. * So that would be a lot of sample code here. <br> * The best way to get a feel for the way the RGroup objects are populated is to * run the {@link org.openscience.cdk.io.RGroupQueryReaderTest} and look at the sample * input RGroup query files contained in the CDK and how they translate into * RGroupXX objects. The JChempaint application can visualize the input files for you. * * @cdk.module isomorphism * @cdk.githash * @cdk.keyword Rgroup * @cdk.keyword R group * @cdk.keyword R-group * @author Mark Rijnbeek */ public class RGroupQuery extends ChemObject implements IChemObject, Serializable, IRGroupQuery { private static final long serialVersionUID = -1656116487614720605L; private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQuery.class); /** * The root structure (or scaffold) to which R-groups r attached. */ private IAtomContainer rootStructure; /** * Rgroup definitions, each a list of possible substitutes for the * given R number. */ private Map<Integer, RGroupList> rGroupDefinitions; /** * For each Rgroup Atom there may be a map containing (number,bond), * being the attachment order (1,2) and the bond to attach to. */ private Map<IAtom, Map<Integer, IBond>> rootAttachmentPoints; /** * Returns all R# type atoms (pseudo atoms) found in the root structure * for a certain provided RGgroup number.<p> * @param rgroupNumber R# number, 1..32 * @return list of (pseudo) atoms with the provided rgroupNumber as label */ public List<IAtom> getRgroupQueryAtoms(Integer rgroupNumber) { List<IAtom> rGroupQueryAtoms = null; if (rootStructure != null) { rGroupQueryAtoms = new ArrayList<IAtom>(); for (int i = 0; i < rootStructure.getAtomCount(); i++) { IAtom atom = rootStructure.getAtom(i); if (atom instanceof PseudoAtom) { PseudoAtom rGroup = (PseudoAtom)atom; if (!rGroup.getLabel().equals("R") && // just "R" is not a proper query atom rGroup.getLabel().startsWith("R") && (rgroupNumber == null || new Integer(rGroup.getLabel().substring(1)).equals(rgroupNumber))) rGroupQueryAtoms.add(atom); } } } return rGroupQueryAtoms; } /** * Returns all R# type atoms (pseudo atoms) found in the root structure. * @return list of (pseudo) R# atoms */ public List<IAtom> getAllRgroupQueryAtoms() { return getRgroupQueryAtoms(null); } private static Pattern validLabelPattern = Pattern.compile("^R\\d+$"); /** * Validates a Pseudo atom's label to be valid RGroup query label (R1..R32). * @param Rxx R-group label like R1 or R10 * @return true if R1..R32, otherwise false */ public static boolean isValidRgroupQueryLabel(String Rxx) { Matcher matcher = validLabelPattern.matcher(Rxx); if (matcher.find()) { int groupNumber = new Integer(Rxx.substring(1)); if (groupNumber >= 1 && groupNumber <= 32) { return true; } } return false; } public boolean areSubstituentsDefined() { List<IAtom> allRgroupAtoms = getAllRgroupQueryAtoms(); if (allRgroupAtoms == null) return false; for (IAtom rgp : allRgroupAtoms) { if (RGroupQuery.isValidRgroupQueryLabel(((PseudoAtom)rgp).getLabel())) { int groupNum = new Integer(((PseudoAtom)rgp).getLabel().substring(1)); if (rGroupDefinitions == null || rGroupDefinitions.get(groupNum) == null || rGroupDefinitions.get(groupNum).getRGroups() == null || rGroupDefinitions.get(groupNum).getRGroups().size() == 0) { return false; } } } return true; } public boolean areRootAtomsDefined() { for (Integer rgpNum : rGroupDefinitions.keySet()) { boolean represented=false; rootLoop: for (IAtom rootAtom : this.getRootStructure().atoms()) { if (rootAtom instanceof PseudoAtom && rootAtom.getSymbol().startsWith("R")) { PseudoAtom pseudo = (PseudoAtom) rootAtom; if(pseudo.getLabel().length()>1) { int rootAtomRgrpNumber = new Integer(pseudo.getLabel().substring(1)); if (rootAtomRgrpNumber==rgpNum) { represented=true; break rootLoop; } } } } if(!represented) { return false; } } return true; } public List<IAtomContainer> getAllConfigurations() throws CDKException { if (!areSubstituentsDefined()) { throw new CDKException("Can not configure molecules: missing R# group definitions."); } //result = a list of concrete atom containers that are valid interpretations of the RGroup query List<IAtomContainer> result = new ArrayList<IAtomContainer>(); //rGroupNumbers = list holding each R# number for this RGroup query List<Integer> rGroupNumbers = new ArrayList<Integer>(); //distributions = a list of valid distributions, that is a one/zero representation // indicating which atom in an atom series belonging to a particular // R# group is present (1) or absent (0). List<Integer[]> distributions = new ArrayList<Integer[]>(); List<List<RGroup>> substitutes = new ArrayList<List<RGroup>>(); //Valid occurrences for each R# group List<List<Integer>> occurrences = new ArrayList<List<Integer>>(); List<Integer> occurIndexes = new ArrayList<Integer>(); //Build up each R# group data before recursively finding configurations. Iterator<Integer> rGroupNumItr = rGroupDefinitions.keySet().iterator(); if (rGroupNumItr.hasNext()) { while (rGroupNumItr.hasNext()) { int r = rGroupNumItr.next(); rGroupNumbers.add(r); List<Integer> validOcc = rGroupDefinitions.get(r).matchOccurence(getRgroupQueryAtoms(r).size()); if (validOcc.size() == 0) { throw new CDKException("Occurrence '" + rGroupDefinitions.get(r).getOccurrence() + "' defined for Rgroup " + r + " results in no subsititute options for this R-group."); } occurrences.add(validOcc); occurIndexes.add(0); } //Init distributions: empty and with the right list size for (int i = 0; i < rGroupNumbers.size(); i++) { distributions.add(null); substitutes.add(null); } //Start finding valid configurations using recursion, output will be put in 'result'. findConfigurationsRecursively(rGroupNumbers, occurrences, occurIndexes, distributions, substitutes, 0, result); } return result; } /** * Recursive function to produce valid configurations * for {@link #getAllConfigurations()}. */ private void findConfigurationsRecursively(List<Integer> rGroupNumbers, List<List<Integer>> occurrences, List<Integer> occurIndexes, List<Integer[]> distributions, List<List<RGroup>> substitutes, int level, List<IAtomContainer> result) throws CDKException { if (level == rGroupNumbers.size()) { if (!checkIfThenConditionsMet(rGroupNumbers, distributions)) return; // Clone the root to get a scaffold to plug the substitutes into. IAtomContainer root = this.getRootStructure(); IAtomContainer rootClone = null; try { rootClone = (IAtomContainer)root.clone(); } catch (CloneNotSupportedException e) { //Abort with CDK exception throw new CDKException("clone() failed; could not perform R-group substitution."); } for (int rgpIdx = 0; rgpIdx < rGroupNumbers.size(); rgpIdx++) { int rNum = rGroupNumbers.get(rgpIdx); int pos = 0; List<RGroup> mapped = substitutes.get(rgpIdx); for (RGroup substitute : mapped) { IAtom rAtom = this.getRgroupQueryAtoms(rNum).get(pos); if (substitute !=null) { IAtomContainer rgrpClone = null; try { rgrpClone = (IAtomContainer)(substitute.getGroup().clone()); } catch (CloneNotSupportedException e) { throw new CDKException("clone() failed; could not perform R-group substitution."); } //root cloned, substitute cloned. These now need to be attached to each other.. rootClone.add(rgrpClone); Map<Integer, IBond> rAttachmentPoints = this.getRootAttachmentPoints().get(rAtom); if (rAttachmentPoints != null) { // Loop over attachment points of the R# atom for (int apo = 0; apo < rAttachmentPoints.size(); apo++) { IBond bond = rAttachmentPoints.get(apo + 1); //Check how R# is attached to bond int whichAtomInBond = 0; if (bond.getAtom(1).equals(rAtom)) whichAtomInBond = 1; IAtom subsAt = null; if (apo == 0) subsAt = substitute.getFirstAttachmentPoint(); else subsAt = substitute.getSecondAttachmentPoint(); //Do substitution with the clones IBond cloneBond = rootClone.getBond(getBondPosition(bond, root)); if (subsAt != null) { IAtom subsCloneAtom = rgrpClone.getAtom(getAtomPosition(subsAt, substitute.getGroup())); cloneBond.setAtom(subsCloneAtom, whichAtomInBond); } } } //Optional: shift substitutes 2D for easier visual checking if (rAtom.getPoint2d() != null && substitute != null && substitute.getFirstAttachmentPoint() != null && substitute.getFirstAttachmentPoint().getPoint2d() != null) { Point2d pointR = rAtom.getPoint2d(); Point2d pointC = substitute.getFirstAttachmentPoint().getPoint2d(); double xDiff = pointC.x - pointR.x; double yDiff = pointC.y - pointR.y; for (IAtom subAt : rgrpClone.atoms()) { if (subAt.getPoint2d() != null) { subAt.getPoint2d().x -= xDiff; subAt.getPoint2d().y -= yDiff; } } } } else { //Distribution flag is 0, this means the R# group will not be substituted. //Any atom connected to this group should be given the defined RestH value. IAtom discarded = rootClone.getAtom(getAtomPosition(rAtom, root)); for (IBond r0Bond : rootClone.bonds()) { if (r0Bond.contains(discarded)) { for (IAtom atInBond : r0Bond.atoms()) { atInBond.setProperty(CDKConstants.REST_H, this.getRGroupDefinitions().get(rNum).isRestH()); } } } } pos++; } } //Remove R# remnants from the clone, bonds and atoms that may linger. boolean confHasRGroupBonds = true; while (confHasRGroupBonds) { for (IBond cloneBond : rootClone.bonds()) { boolean removeBond = false; if (cloneBond.getAtom(0) instanceof PseudoAtom && isValidRgroupQueryLabel(((PseudoAtom)cloneBond.getAtom(0)).getLabel())) removeBond = true; else if (cloneBond.getAtom(1) instanceof PseudoAtom && isValidRgroupQueryLabel(((PseudoAtom)cloneBond.getAtom(1)).getLabel())) removeBond = true; if (removeBond) { rootClone.removeBond(cloneBond); confHasRGroupBonds = true; break; } confHasRGroupBonds = false; } } boolean confHasRGroupAtoms = true; while (confHasRGroupAtoms) { for (IAtom cloneAt : rootClone.atoms()) { if (cloneAt instanceof PseudoAtom) if (isValidRgroupQueryLabel(((PseudoAtom)cloneAt).getLabel())) { rootClone.removeAtom(cloneAt); confHasRGroupAtoms = true; break; } confHasRGroupAtoms = false; } } //Add to result list result.add(rootClone); } else { for (int idx = 0; idx < occurrences.get(level).size(); idx++) { occurIndexes.set(level, idx); //With an occurrence picked 0..n for this level's R-group, now find //all possible distributions (positional alternatives). int occurrence = occurrences.get(level).get(idx); int positions = this.getRgroupQueryAtoms(rGroupNumbers.get(level)).size(); Integer[] candidate = new Integer[positions]; for (int j = 0; j < candidate.length; j++) { candidate[j] = 0; } List<Integer[]> rgrpDistributions = new ArrayList<Integer[]>(); findDistributions(occurrence, candidate, rgrpDistributions, 0); for (Integer[] distribution : rgrpDistributions) { distributions.set(level, distribution); RGroup[] mapping = new RGroup[distribution.length]; List<List<RGroup>> mappedSubstitutes = new ArrayList<List<RGroup>>(); mapSubstitutes(this.getRGroupDefinitions().get(rGroupNumbers.get(level)),0, distribution, mapping, mappedSubstitutes); for (List<RGroup> mappings : mappedSubstitutes) { substitutes.set(level,mappings); findConfigurationsRecursively(rGroupNumbers, occurrences, occurIndexes, distributions, substitutes, level + 1, result); } } } } } /** * Finds valid distributions for a given R# group and it occurrence * condition taken from the LOG line.<br> * For example: if we have three Rn group atoms, and ">2" for * the occurrence, then there are fours possible ways to make a * distribution: 3 ways to put in two atoms, and one way * to put in all 3 atoms. Etc. * @param occur * @param candidate * @param distributions * @param level */ private void findDistributions(int occur, Integer[] candidate, List<Integer[]> distributions, int level) { if (level != candidate.length) { for (int i = 0; i < 2; i++) { candidate[level] = i; int sum = 0; for (int x = 0; x < candidate.length; x++) sum += candidate[x]; if (sum == occur) { distributions.add(candidate.clone()); } else { findDistributions(occur, candidate, distributions, level + 1); } } } } /** * Maps the distribution of an R-group to all possible substitute combinations. * This is best illustrated by an example.<br> * Say R2 occurs twice in the root, and has condition >0. So a valid * output configuration can have either one or two substitutes. * The distributions will have been calculated to be the following * solutions: [0,1], [1,0], [1,1] <br> * To start with [1,1], assume two possible substitutes have been * defined for R2, namely *C=O and *C-N. Then the distribution [1,1] * should lead to four mappings: <br> * [*C=O,*C=O], [*C-N,*C-N], [*C=O,*C-N], [*C-N,*C=O]. <br> * These mappings are generated in this function, as well as the other valid mappings * for [0,1] and [1,0]: <br> * [*C=O,null], [*C-N,null], [null,*C=O], [null,*C-N]. <br> * So the example would have this function produce eight mappings (result list size==8). * * @param rgpList * @param listOffset * @param distribution * @param mapping * @param result */ private void mapSubstitutes(RGroupList rgpList, int listOffset, Integer[] distribution, RGroup[] mapping, List<List<RGroup>> result) { if(listOffset==distribution.length) { List<RGroup> mapped= new ArrayList<RGroup>(); for(RGroup rgrp : mapping) mapped.add(rgrp); result.add(mapped); } else { if (distribution[listOffset]==0) { mapping[listOffset]=null; mapSubstitutes(rgpList, listOffset+1, distribution, mapping, result); } else { for (RGroup rgrp :rgpList.getRGroups()) { mapping[listOffset]=rgrp; mapSubstitutes(rgpList, listOffset+1, distribution, mapping, result); } } } } /** * Helper method, used to help construct a configuration. * @param atom * @param container * @return the array position of atom in container */ private int getAtomPosition(IAtom atom, IAtomContainer container) { for (int i = 0; i < container.getAtomCount(); i++) { if (atom.equals(container.getAtom(i))) { return i; } } return -1; } /** * Helper method, used to help construct a configuration. * @param bond * @param container * @return the array position of the bond in the container */ private int getBondPosition(IBond bond, IAtomContainer container) { for (int i = 0; i < container.getBondCount(); i++) { if (bond.equals(container.getBond(i))) { return i; } } return -1; } /** * Helper method to see if an array is all zeroes or not. * Used to check if the distribution of substitutes over an R-group * is all zeroes, meaning there will be no substitution done. * @param arr * @return true if arr's values are all zero. */ private boolean allZeroArray(Integer[] arr) { for (int flag : arr) if (flag != 0) return false; return true; } /** * Checks whether IF..THEN conditions that can be set for the R-groups are met. * It is used to filter away invalid configurations in {@link #findConfigurationsRecursively}. * <P> * Scenario: suppose R1 is substituted 0 times, whereas R2 is substituted. * Also suppose there is a condition IF R2 THEN R1. Because R1 does not * occur but R2 does, the IF..THEN condition is not met: this function * will return false, the configuration should be discarded. * @param rGroupNumbers * @param distributions * @return true if all IF..THEN RGroup conditions are met. */ private boolean checkIfThenConditionsMet(List<Integer> rGroupNumbers, List<Integer[]> distributions) { for (int outer = 0; outer < rGroupNumbers.size(); outer++) { int rgroupNum = rGroupNumbers.get(outer); if (allZeroArray(distributions.get(outer))) { for (int inner = 0; inner < rGroupNumbers.size(); inner++) { int rgroupNum2 = rGroupNumbers.get(inner); if (!allZeroArray(distributions.get(inner))) { RGroupList rgrpList = rGroupDefinitions.get(rgroupNum2); if (rgrpList.getRequiredRGroupNumber() == rgroupNum) { logger.info(" Rejecting >> all 0 for " + rgroupNum + " but requirement found from " + rgrpList.getRGroupNumber()); return false; } } } } } return true; } public int getAtomContainerCount() { int retVal=0; if(this.rootStructure!=null) retVal++; for(Integer r: rGroupDefinitions.keySet()) { for (RGroup rgrp : rGroupDefinitions.get(r).getRGroups()) { if (rgrp.getGroup()!=null) { retVal++; } } } return retVal; } public List<IAtomContainer> getSubstituents() { List<IAtomContainer> substitutes = new ArrayList<IAtomContainer>(); for(Integer r : rGroupDefinitions.keySet()) { for (RGroup rgrp : rGroupDefinitions.get(r).getRGroups()) { IAtomContainer subst =rgrp.getGroup(); if (subst!=null) substitutes.add(subst); } } return substitutes; } public void setRootStructure(IAtomContainer rootStructure) { this.rootStructure = rootStructure; } public IAtomContainer getRootStructure() { return rootStructure; } public void setRootAttachmentPoints(Map<IAtom, Map<Integer, IBond>> rootAttachmentPoints) { this.rootAttachmentPoints = rootAttachmentPoints; } public Map<IAtom, Map<Integer, IBond>> getRootAttachmentPoints() { return rootAttachmentPoints; } public void setRGroupDefinitions(Map<Integer, RGroupList> rGroupDefinitions) { this.rGroupDefinitions = rGroupDefinitions; } public Map<Integer, RGroupList> getRGroupDefinitions() { return rGroupDefinitions; } }