/* $Revision$ $Author$ $Date$
*
* Copyright (C) 2002-2007 Stephane Werner <mail@ixelis.net>
*
* This code has been kindly provided by Stephane Werner
* and Thierry Hanser from IXELIS mail@ixelis.net
*
* IXELIS sarl - Semantic Information Systems
* 17 rue des C???res 67200 Strasbourg, France
* Tel/Fax : +33(0)3 88 27 81 39 Email: mail@ixelis.net
*
* CDK Contact: cdk-devel@lists.sf.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
package org.openscience.cdk.isomorphism;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.isomorphism.matchers.IQueryAtom;
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;
import org.openscience.cdk.isomorphism.matchers.IQueryBond;
import org.openscience.cdk.isomorphism.mcss.RGraph;
import org.openscience.cdk.isomorphism.mcss.RMap;
import org.openscience.cdk.isomorphism.mcss.RNode;
import org.openscience.cdk.tools.manipulator.BondManipulator;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* This class implements a multipurpose structure comparison tool.
* It allows to find maximal common substructure, find the
* mapping of a substructure in another structure, and the mapping of
* two isomorphic structures.
*
* <p>Structure comparison may be associated to bond constraints
* (mandatory bonds, e.g. scaffolds, reaction cores,...) on each source graph.
* The constraint flexibility allows a number of interesting queries.
* The substructure analysis relies on the RGraph generic class (see: RGraph)
* This class implements the link between the RGraph model and the
* the CDK model in this way the {@link RGraph} remains independent and may be used
* in other contexts.
*
* <p>This algorithm derives from the algorithm described in
* {@cdk.cite HAN90} and modified in the thesis of T. Hanser {@cdk.cite HAN93}.
*
* <p>With the {@link #isSubgraph(IAtomContainer, IAtomContainer)} method,
* the second, and only the second argument <b>may</b> be a {@link IQueryAtomContainer},
* which allows one to do SMARTS or MQL like queries.
* The first {@link IAtomContainer} must never be an {@link IQueryAtomContainer}.
* An example:<pre>
* SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
* IAtomContainer atomContainer = sp.parseSmiles("CC(=O)OC(=O)C"); // acetic acid anhydride
* IAtomContainer SMILESquery = sp.parseSmiles("CC"); // acetic acid anhydride
* IQueryAtomContainer query = IQueryAtomContainerCreator.createBasicQueryContainer(SMILESquery);
* boolean isSubstructure = UniversalIsomorphismTester.isSubgraph(atomContainer, query);
* </pre>
*
* <p><font color="#FF0000">WARNING</font>:
* As a result of the adjacency perception used in this algorithm
* there is a single limitation: cyclopropane and isobutane are seen as isomorph.
* This is due to the fact that these two compounds are the only ones where
* each bond is connected two each other bond (bonds are fully connected)
* with the same number of bonds and still they have different structures
* The algorithm could be easily enhanced with a simple atom mapping manager
* to provide an atom level overlap definition that would reveal this case.
* We decided not to penalize the whole procedure because of one single
* exception query. Furthermore isomorphism may be discarded since the number of atoms are
* not the same (3 != 4) and in most case this will be already
* screened out by a fingerprint based filtering.
* It is possible to add a special treatment for this special query.
* Be reminded that this algorithm matches bonds only.
* </p>
* <p>
* <b>Note</b>While most isomorphism queries involve a multi-atom query structure
* there may be cases in which the query atom is a single atom. In such a case
* a mapping of target bonds to query bonds is not feasible. In such a case, the RMap objects
* correspond to atom indices rather than bond indices. In general, this will not affect user
* code and the same sequence of method calls for matching multi-atom query structures will
* work for single atom query structures as well.
* </p>
*
* @author Stephane Werner from IXELIS mail@ixelis.net
* @cdk.created 2002-07-17
* @cdk.require java1.4+
* @cdk.module standard
* @cdk.githash
*/
public class UniversalIsomorphismTester {
final static int ID1 = 0;
final static int ID2 = 1;
private static long start;
public static long timeout=-1;
///////////////////////////////////////////////////////////////////////////
// Query Methods
//
// This methods are simple applications of the RGraph model on atom containers
// using different constrains and search options. They give an exemple of the
// most common queries but of course it is possible to define other type of
// queries exploiting the constrain and option combinations
//
////
// Isomorphism search
/**
* Tests if g1 and g2 are isomorph.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return true if the 2 molecule are isomorph
* @throws CDKException if the first molecule is an instance of IQueryAtomContainer
*/
public static boolean isIsomorph(IAtomContainer g1, IAtomContainer g2) throws CDKException{
if (g1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
if (g2.getAtomCount() != g1.getAtomCount()) return false;
// check single atom case
if (g2.getAtomCount() == 1) {
IAtom atom = g1.getAtom(0);
IAtom atom2 = g2.getAtom(0);
if (atom instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom)atom;
return qAtom.matches(g2.getAtom(0));
} else if (atom2 instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom)atom2;
return qAtom.matches(g1.getAtom(0));
} else {
String atomSymbol = atom.getSymbol();
return g1.getAtom(0).getSymbol().equals(atomSymbol);
}
}
return (getIsomorphMap(g1, g2) != null);
}
/**
* Returns the first isomorph mapping found or null.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return the first isomorph mapping found projected of g1. This is a List of RMap objects containing Ids of matching bonds.
*/
public static List<RMap> getIsomorphMap(IAtomContainer g1, IAtomContainer g2) throws CDKException{
if (g1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
List<RMap> result = null;
List<List<RMap>> rMapsList = search(g1, g2, getBitSet(g1), getBitSet(g2), false, false);
if (!rMapsList.isEmpty()) {
result = rMapsList.get(0);
}
return result;
}
/**
* Returns the first isomorph 'atom mapping' found for g2 in g1.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return the first isomorph atom mapping found projected on g1.
* This is a List of RMap objects containing Ids of matching atoms.
* @throws CDKException if the first molecules is not an instance of {@link IQueryAtomContainer}
*/
public static List<RMap> getIsomorphAtomsMap(IAtomContainer g1, IAtomContainer g2) throws CDKException {
if (g1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
List<RMap> list = checkSingleAtomCases(g1, g2);
if (list == null) {
return makeAtomsMapOfBondsMap(UniversalIsomorphismTester.getIsomorphMap(g1, g2), g1, g2);
} else if (list.isEmpty()) {
return null;
} else {
return list;
}
}
/**
* Returns all the isomorph 'mappings' found between two
* atom containers.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return the list of all the 'mappings'
*/
public static List<List<RMap>> getIsomorphMaps(IAtomContainer g1, IAtomContainer g2) throws CDKException{
return search(g1, g2, getBitSet(g1), getBitSet(g2), true, true);
}
/////
// Subgraph search
/**
* Returns all the subgraph 'bond mappings' found for g2 in g1.
* This is an {@link List} of {@link List}s of {@link RMap} objects.
*
* Note that if the query molecule is a single atom, then bond mappings
* cannot be defined. In such a case, the {@link RMap} object refers directly to
* atom - atom mappings. Thus RMap.id1 is the index of the target atom
* and RMap.id2 is the index of the matching query atom (in this case,
* it will always be 0). Note that in such a case, there is no need
* to call {@link #makeAtomsMapsOfBondsMaps(List, IAtomContainer, IAtomContainer)},
* though if it is called, then the
* return value is simply the same as the return value of this method.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return the list of all the 'mappings' found projected of g1
*
* @see #makeAtomsMapsOfBondsMaps(List, IAtomContainer, IAtomContainer)
*/
public static List<List<RMap>> getSubgraphMaps(IAtomContainer g1, IAtomContainer g2) throws CDKException{
return search(g1, g2, new BitSet(), getBitSet(g2), true, true);
}
/**
* Returns the first subgraph 'bond mapping' found for g2 in g1.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return the first subgraph bond mapping found projected on g1. This is a {@link List} of
* {@link RMap} objects containing Ids of matching bonds.
*/
public static List<RMap> getSubgraphMap(IAtomContainer g1, IAtomContainer g2) throws CDKException{
List<RMap> result = null;
List<List<RMap>> rMapsList = search(g1, g2, new BitSet(), getBitSet(g2), false, false);
if (!rMapsList.isEmpty()) {
result = rMapsList.get(0);
}
return result;
}
/**
* Returns all subgraph 'atom mappings' found for g2 in g1, where g2 must be a substructure
* of g1. If it is not a substructure, null will be returned.
* This is an {@link List} of {@link List}s of {@link RMap} objects.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 substructure to be mapped. May be an {@link IQueryAtomContainer}.
* @return all subgraph atom mappings found projected on g1. This is a
* {@link List} of {@link RMap} objects containing Ids of matching atoms.
*/
public static List<List<RMap>> getSubgraphAtomsMaps(IAtomContainer g1,
IAtomContainer g2)
throws CDKException {
List<RMap> list = checkSingleAtomCases(g1, g2);
if (list == null) {
return makeAtomsMapsOfBondsMaps(
UniversalIsomorphismTester.getSubgraphMaps(g1, g2), g1, g2
);
} else {
List<List<RMap>> atomsMap = new ArrayList<List<RMap>>();
atomsMap.add(list);
return atomsMap;
}
}
/**
* Returns the first subgraph 'atom mapping' found for g2 in g1, where g2 must be a substructure
* of g1. If it is not a substructure, null will be returned.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 substructure to be mapped. May be an {@link IQueryAtomContainer}.
* @return the first subgraph atom mapping found projected on g1.
* This is a {@link List} of {@link RMap} objects containing Ids of matching atoms.
*/
public static List<RMap> getSubgraphAtomsMap(IAtomContainer g1,
IAtomContainer g2)
throws CDKException {
List<RMap> list = checkSingleAtomCases(g1, g2);
if (list == null) {
return makeAtomsMapOfBondsMap(UniversalIsomorphismTester.getSubgraphMap(g1, g2), g1, g2);
} else if (list.isEmpty()) {
return null;
} else {
return list;
}
}
/**
* Tests if g2 a subgraph of g1.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return true if g2 a subgraph on g1
*/
public static boolean isSubgraph(IAtomContainer g1, IAtomContainer g2) throws CDKException{
if (g1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
if (g2.getAtomCount() > g1.getAtomCount()) return false;
// test for single atom case
if (g2.getAtomCount() == 1) {
IAtom atom = g2.getAtom(0);
for (int i=0; i<g1.getAtomCount(); i++) {
IAtom atom2 = g1.getAtom(i);
if (atom instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom)atom;
if (qAtom.matches(atom2)) return true;
} else if (atom2 instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom)atom2;
if (qAtom.matches(atom)) return true;
} else {
if (atom2.getSymbol().equals(atom.getSymbol())) return true;
}
}
return false;
}
if (!testSubgraphHeuristics(g1, g2)) return false;
return (getSubgraphMap(g1, g2) != null);
}
////
// Maximum common substructure search
/**
* Returns all the maximal common substructure between twp atom containers.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return the list of all the maximal common substructure
* found projected of g1 (list of AtomContainer )
*/
public static List<IAtomContainer> getOverlaps(IAtomContainer g1, IAtomContainer g2) throws CDKException{
start=System.currentTimeMillis();
List<List<RMap>> rMapsList = search(g1, g2, new BitSet(), new BitSet(), true, false);
// projection on G1
List<IAtomContainer> graphList = projectList(rMapsList, g1, ID1);
// reduction of set of solution (isomorphism and substructure
// with different 'mappings'
return getMaximum(graphList);
}
/**
* Transforms an AtomContainer into a {@link BitSet} (which's size = number of bond
* in the atomContainer, all the bit are set to true).
*
* @param ac {@link IAtomContainer} to transform
* @return The bitSet
*/
public static BitSet getBitSet(IAtomContainer ac) {
BitSet bs;
int n = ac.getBondCount();
if (n != 0) {
bs = new BitSet(n);
for (int i = 0; i < n; i++) { bs.set(i); }
} else {
bs = new BitSet();
}
return bs;
}
//////////////////////////////////////////////////
// Internal methods
/**
* Builds the {@link RGraph} ( resolution graph ), from two atomContainer
* (description of the two molecules to compare)
* This is the interface point between the CDK model and
* the generic MCSS algorithm based on the RGRaph.
*
* @param g1 Description of the first molecule
* @param g2 Description of the second molecule
* @return the rGraph
*/
public static RGraph buildRGraph(IAtomContainer g1, IAtomContainer g2) throws CDKException{
RGraph rGraph = new RGraph();
nodeConstructor(rGraph, g1, g2);
arcConstructor(rGraph, g1, g2);
return rGraph;
}
/**
* General {@link RGraph} parsing method (usually not used directly)
* This method is the entry point for the recursive search
* adapted to the atom container input.
*
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @param c1 initial condition ( bonds from g1 that
* must be contains in the solution )
* @param c2 initial condition ( bonds from g2 that
* must be contains in the solution )
* @param findAllStructure if false stop at the first structure found
* @param findAllMap if true search all the 'mappings' for one same
* structure
* @return a List of Lists of {@link RMap} objects that represent the search solutions
*/
public static List<List<RMap>> search(IAtomContainer g1, IAtomContainer g2, BitSet c1,
BitSet c2, boolean findAllStructure, boolean findAllMap) throws CDKException{
// handle single query atom case separately
if (g2.getAtomCount() == 1) {
List<List<RMap>> matches = new ArrayList<List<RMap>>();
IAtom queryAtom = g2.getAtom(0);
// we can have a IQueryAtomContainer *or* an IAtomContainer
if (queryAtom instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom) queryAtom;
for (IAtom atom : g1.atoms()) {
if (qAtom.matches(atom)) {
List<RMap> lmap = new ArrayList<RMap>();
lmap.add(new RMap(g1.getAtomNumber(atom), 0));
matches.add(lmap);
}
}
} else {
for (IAtom atom : g1.atoms()) {
if (queryAtom.getSymbol().equals(atom.getSymbol())) {
List<RMap> lmap = new ArrayList<RMap>();
lmap.add(new RMap(g1.getAtomNumber(atom), 0));
matches.add(lmap);
}
}
}
return matches;
}
// reset result
List<List<RMap>> rMapsList = new ArrayList<List<RMap>>();
// build the RGraph corresponding to this problem
RGraph rGraph = buildRGraph(g1, g2);
// parse the RGraph with the given constrains and options
rGraph.parse(c1, c2, findAllStructure, findAllMap);
List<BitSet> solutionList = rGraph.getSolutions();
// conversions of RGraph's internal solutions to G1/G2 mappings
for (BitSet set : solutionList) {
rMapsList.add(rGraph.bitSetToRMap(set));
}
return rMapsList;
}
//////////////////////////////////////
// Manipulation tools
/**
* Projects a list of {@link RMap} on a molecule.
*
* @param rMapList the list to project
* @param g the molecule on which project
* @param id the id in the {@link RMap} of the molecule g
* @return an AtomContainer
*/
public static IAtomContainer project(List<RMap> rMapList, IAtomContainer g, int id) {
IAtomContainer ac = g.getBuilder().newAtomContainer();
Map<IAtom,IAtom> table = new HashMap<IAtom,IAtom>();
IAtom a1;
IAtom a2;
IAtom a;
IBond bond;
for (Iterator<RMap> i = rMapList.iterator(); i.hasNext(); ) {
RMap rMap = i.next();
if (id == UniversalIsomorphismTester.ID1) {
bond = g.getBond(rMap.getId1());
} else {
bond = g.getBond(rMap.getId2());
}
a = bond.getAtom(0);
a1 = (IAtom) table.get(a);
if (a1 == null) {
try {
a1 = (IAtom)a.clone();
} catch (CloneNotSupportedException e) {
e.printStackTrace();
}
ac.addAtom(a1);
table.put(a, a1);
}
a = bond.getAtom(1);
a2 = table.get(a);
if (a2 == null) {
try {
a2 = (IAtom)a.clone();
} catch (CloneNotSupportedException e) {
e.printStackTrace();
}
ac.addAtom(a2);
table.put(a, a2);
}
IBond newBond = g.getBuilder().newBond(a1, a2, bond.getOrder());
newBond.setFlag(
CDKConstants.ISAROMATIC,
bond.getFlag(CDKConstants.ISAROMATIC)
);
ac.addBond(newBond);
}
return ac;
}
/**
* Projects a list of RMapsList on a molecule.
*
* @param rMapsList list of RMapsList to project
* @param g the molecule on which project
* @param id the id in the RMap of the molecule g
* @return a list of AtomContainer
*/
public static List<IAtomContainer> projectList(List<List<RMap>> rMapsList, IAtomContainer g, int id) {
List<IAtomContainer> graphList = new ArrayList<IAtomContainer>();
for (List<RMap> rMapList : rMapsList) {
IAtomContainer ac = project(rMapList, g, id);
graphList.add(ac);
}
return graphList;
}
/**
* Removes all redundant solution.
*
* @param graphList the list of structure to clean
* @return the list cleaned
* @throws CDKException if there is a problem in obtaining subgraphs
*/
private static List<IAtomContainer> getMaximum(List<IAtomContainer> graphList) throws CDKException {
List<IAtomContainer> reducedGraphList = new ArrayList<IAtomContainer>();
reducedGraphList.addAll(graphList);
for (int i = 0; i < graphList.size(); i++) {
IAtomContainer gi = graphList.get(i);
for (int j = i + 1; j < graphList.size(); j++) {
IAtomContainer gj = graphList.get(j);
// Gi included in Gj or Gj included in Gi then
// reduce the irrelevant solution
if (isSubgraph(gj, gi)) {
reducedGraphList.remove(gi);
} else if (isSubgraph(gi, gj)) {
reducedGraphList.remove(gj);
}
}
}
return reducedGraphList;
}
/**
* Checks for single atom cases before doing subgraph/isomorphism search.
*
* @param g1 AtomContainer to match on. Must not be an {@link IQueryAtomContainer}.
* @param g2 AtomContainer as query. May be an {@link IQueryAtomContainer}.
* @return {@link List} of {@link List} of {@link RMap} objects for the Atoms (not Bonds!), null if no single atom case
* @throws CDKException if the first molecule is an instance of IQueryAtomContainer
*/
public static List<RMap> checkSingleAtomCases(IAtomContainer g1, IAtomContainer g2) throws CDKException {
if (g1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
if (g2.getAtomCount() == 1) {
List<RMap> arrayList = new ArrayList<RMap>();
IAtom atom = g2.getAtom(0);
if (atom instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom)atom;
for (int i=0; i<g1.getAtomCount(); i++){
if(qAtom.matches(g1.getAtom(i)))
arrayList.add(new RMap(i,0));
}
} else {
String atomSymbol = atom.getSymbol();
for(int i=0; i<g1.getAtomCount(); i++){
if(g1.getAtom(i).getSymbol().equals(atomSymbol))
arrayList.add(new RMap(i,0));
}
}
return arrayList;
} else if (g1.getAtomCount() == 1) {
List<RMap> arrayList = new ArrayList<RMap>();
IAtom atom = g1.getAtom(0);
for (int i=0; i<g2.getAtomCount(); i++) {
IAtom atom2 = g2.getAtom(i);
if (atom2 instanceof IQueryAtom) {
IQueryAtom qAtom = (IQueryAtom)atom2;
if (qAtom.matches(atom))
arrayList.add(new RMap(0,i));
} else {
if(atom2.getSymbol().equals(atom.getSymbol()))
arrayList.add(new RMap(0,i));
}
}
return arrayList;
} else {
return null;
}
}
/**
* This makes maps of matching atoms out of a maps of matching bonds as produced by the
* get(Subgraph|Ismorphism)Maps methods.
*
* @param l The list produced by the getMap method.
* @param g1 The first atom container. Must not be a {@link IQueryAtomContainer}.
* @param g2 The second one (first and second as in getMap). May be an {@link QueryAtomContainer}.
* @return A List of {@link List}s of {@link RMap} objects of matching Atoms.
*/
public static List<List<RMap>> makeAtomsMapsOfBondsMaps(List<List<RMap>> l, IAtomContainer g1, IAtomContainer g2) {
if (l == null) {
return l;
}
if (g2.getAtomCount() == 1) return l; // since the RMap is already an atom-atom mapping
List<List<RMap>> result = new ArrayList<List<RMap>>();
for (List<RMap> l2 : l) {
result.add(makeAtomsMapOfBondsMap(l2, g1, g2));
}
return result;
}
/**
* This makes a map of matching atoms out of a map of matching bonds as produced by the
* get(Subgraph|Ismorphism)Map methods.
*
* @param l The list produced by the getMap method.
* @param g1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param g2 second molecule. May be an {@link IQueryAtomContainer}.
* @return The mapping found projected on g1. This is a {@link List} of {@link RMap} objects
* containing Ids of matching atoms.
*/
public static List<RMap> makeAtomsMapOfBondsMap(List<RMap> l, IAtomContainer g1, IAtomContainer g2) {
if(l==null)
return(l);
List<RMap> result = new ArrayList<RMap>();
for (int i = 0; i < l.size(); i++) {
IBond bond1 = g1.getBond(l.get(i).getId1());
IBond bond2 = g2.getBond(l.get(i).getId2());
IAtom[] atom1 = BondManipulator.getAtomArray(bond1);
IAtom[] atom2 = BondManipulator.getAtomArray(bond2);
for (int j = 0; j < 2; j++) {
List<IBond> bondsConnectedToAtom1j = g1.getConnectedBondsList(atom1[j]);
for (int k = 0; k < bondsConnectedToAtom1j.size(); k++) {
if (bondsConnectedToAtom1j.get(k) != bond1) {
IBond testBond = (IBond)bondsConnectedToAtom1j.get(k);
for (int m = 0; m < l.size(); m++) {
IBond testBond2;
if (((RMap) l.get(m)).getId1() == g1.getBondNumber(testBond)) {
testBond2 = g2.getBond(((RMap) l.get(m)).getId2());
for (int n = 0; n < 2; n++) {
List<IBond> bondsToTest = g2.getConnectedBondsList(atom2[n]);
if (bondsToTest.contains(testBond2)) {
RMap map;
if (j == n) {
map = new RMap(g1.getAtomNumber(atom1[0]), g2.getAtomNumber(atom2[0]));
} else {
map = new RMap(g1.getAtomNumber(atom1[1]), g2.getAtomNumber(atom2[0]));
}
if (!result.contains(map)) {
result.add(map);
}
RMap map2;
if (j == n) {
map2 = new RMap(g1.getAtomNumber(atom1[1]), g2.getAtomNumber(atom2[1]));
} else {
map2 = new RMap(g1.getAtomNumber(atom1[0]), g2.getAtomNumber(atom2[1]));
}
if (!result.contains(map2)) {
result.add(map2);
}
}
}
}
}
}
}
}
}
return result;
}
/**
* Builds the nodes of the {@link RGraph} ( resolution graph ), from
* two atom containers (description of the two molecules to compare)
*
* @param gr the target RGraph
* @param ac1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param ac2 second molecule. May be an {@link IQueryAtomContainer}.
* @throws CDKException if it takes too long to identify overlaps
*/
private static void nodeConstructor(RGraph gr, IAtomContainer ac1, IAtomContainer ac2) throws CDKException {
if (ac1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
// resets the target graph.
gr.clear();
// compares each bond of G1 to each bond of G2
for (int i = 0; i < ac1.getBondCount(); i++) {
for (int j = 0; j < ac2.getBondCount(); j++) {
if(timeout>-1 && (System.currentTimeMillis()-start)>timeout)
throw new CDKException("Timeout exceeded in getOverlaps");
IBond bondA2 = ac2.getBond(j);
if (bondA2 instanceof IQueryBond) {
IQueryBond queryBond = (IQueryBond)bondA2;
IQueryAtom atom1 = (IQueryAtom)(bondA2.getAtom(0));
IQueryAtom atom2 = (IQueryAtom)(bondA2.getAtom(1));
IBond bond = ac1.getBond(i);
if (queryBond.matches(bond)) {
// ok, bonds match
if (atom1.matches(bond.getAtom(0)) && atom2.matches(bond.getAtom(1)) ||
atom1.matches(bond.getAtom(1)) && atom2.matches(bond.getAtom(0))) {
// ok, atoms match in either order
gr.addNode(new RNode(i,j));
}
}
} else {
// if both bonds are compatible then create an association node
// in the resolution graph
if (
( // bond type conditions
( // same bond order and same aromaticity flag (either both on or off)
ac1.getBond(i).getOrder() == ac2.getBond(j).getOrder() &&
ac1.getBond(i).getFlag(CDKConstants.ISAROMATIC) ==
ac2.getBond(j).getFlag(CDKConstants.ISAROMATIC)
)
||
( // both bond are aromatic
ac1.getBond(i).getFlag(CDKConstants.ISAROMATIC) &&
ac2.getBond(j).getFlag(CDKConstants.ISAROMATIC)
)
)
&&
( // atome type conditions
( // a1 = a2 && b1 = b2
ac1.getBond(i).getAtom(0).getSymbol().equals(ac2.getBond(j).getAtom(0).getSymbol()) &&
ac1.getBond(i).getAtom(1).getSymbol().equals(ac2.getBond(j).getAtom(1).getSymbol())
)
||
( // a1 = b2 && b1 = a2
ac1.getBond(i).getAtom(0).getSymbol().equals(ac2.getBond(j).getAtom(1).getSymbol()) &&
ac1.getBond(i).getAtom(1).getSymbol().equals(ac2.getBond(j).getAtom(0).getSymbol())
)
)
) {
gr.addNode(new RNode(i, j));
}
}
}
}
}
/**
* Build edges of the {@link RGraph}s.
* This method create the edge of the RGraph and
* calculates the incompatibility and neighborhood
* relationships between RGraph nodes.
*
* @param gr the rGraph
* @param ac1 first molecule. Must not be an {@link IQueryAtomContainer}.
* @param ac2 second molecule. May be an {@link IQueryAtomContainer}.
* @throws CDKException if it takes too long to get the overlaps
*/
private static void arcConstructor(RGraph gr, IAtomContainer ac1, IAtomContainer ac2) throws CDKException{
// each node is incompatible with himself
for (int i = 0; i < gr.getGraph().size(); i++) {
RNode x = (RNode) gr.getGraph().get(i);
x.getForbidden().set(i);
}
IBond a1;
IBond a2;
IBond b1;
IBond b2;
gr.setFirstGraphSize(ac1.getBondCount());
gr.setSecondGraphSize(ac2.getBondCount());
for (int i = 0; i < gr.getGraph().size(); i++) {
RNode x = gr.getGraph().get(i);
// two nodes are neighbors if their adjacency
// relationship in are equivalent in G1 and G2
// else they are incompatible.
for (int j = i + 1; j < gr.getGraph().size(); j++) {
if(timeout>-1 && (System.currentTimeMillis()-start)>timeout)
throw new CDKException("Timeout exceeded in getOverlaps");
RNode y = gr.getGraph().get(j);
a1 = ac1.getBond(gr.getGraph().get(i).getRMap().getId1());
a2 = ac2.getBond(gr.getGraph().get(i).getRMap().getId2());
b1 = ac1.getBond(gr.getGraph().get(j).getRMap().getId1());
b2 = ac2.getBond(gr.getGraph().get(j).getRMap().getId2());
if (a2 instanceof IQueryBond) {
if (a1.equals(b1) || a2.equals(b2) ||
!queryAdjacencyAndOrder(a1, b1, a2, b2)) {
x.getForbidden().set(j);
y.getForbidden().set(i);
} else if (hasCommonAtom(a1, b1)) {
x.getExtension().set(j);
y.getExtension().set(i);
}
} else {
if (a1.equals(b1) || a2.equals(b2) ||
(!getCommonSymbol(a1, b1).equals(getCommonSymbol(a2, b2)))) {
x.getForbidden().set(j);
y.getForbidden().set(i);
} else if (hasCommonAtom(a1, b1)) {
x.getExtension().set(j);
y.getExtension().set(i);
}
}
}
}
}
/**
* Determines if two bonds have at least one atom in common.
*
* @param a first bond
* @param b second bond
* @return the symbol of the common atom or "" if
* the 2 bonds have no common atom
*/
private static boolean hasCommonAtom(IBond a, IBond b) {
return a.contains(b.getAtom(0)) || a.contains(b.getAtom(1));
}
/**
* Determines if 2 bond have 1 atom in common and returns the common symbol.
*
* @param a first bond
* @param b second bond
* @return the symbol of the common atom or "" if
* the 2 bonds have no common atom
*/
private static String getCommonSymbol(IBond a, IBond b) {
String symbol = "";
if (a.contains(b.getAtom(0))) {
symbol = b.getAtom(0).getSymbol();
} else if (a.contains(b.getAtom(1))) {
symbol = b.getAtom(1).getSymbol();
}
return symbol;
}
/**
* Determines if 2 bond have 1 atom in common if second is a query AtomContainer.
*
* @param a1 first bond
* @param b1 second bond
* @return the symbol of the common atom or "" if
* the 2 bonds have no common atom
*/
private static boolean queryAdjacency(IBond a1, IBond b1, IBond a2, IBond b2) {
IAtom atom1 = null;
IAtom atom2 = null;
if (a1.contains(b1.getAtom(0))) {
atom1 = b1.getAtom(0);
} else if (a1.contains(b1.getAtom(1))) {
atom1 = b1.getAtom(1);
}
if (a2.contains(b2.getAtom(0))) {
atom2 = b2.getAtom(0);
} else if (a2.contains(b2.getAtom(1))) {
atom2 = b2.getAtom(1);
}
if (atom1 != null && atom2 != null){
// well, this looks fishy: the atom2 is not always a IQueryAtom !
return ((IQueryAtom)atom2).matches(atom1);
} else return atom1 == null && atom2 == null;
}
/**
* Determines if 2 bond have 1 atom in common if second is a query AtomContainer
* and whether the order of the atoms is correct (atoms match).
*
* @param bond1 first bond
* @param bond2 second bond
* @param queryBond1 first query bond
* @param queryBond2 second query bond
* @return the symbol of the common atom or "" if the 2 bonds have no common atom
*/
private static boolean queryAdjacencyAndOrder(IBond bond1, IBond bond2, IBond queryBond1, IBond queryBond2) {
IAtom centralAtom = null;
IAtom centralQueryAtom = null;
if (bond1.contains(bond2.getAtom(0))) {
centralAtom = bond2.getAtom(0);
} else if (bond1.contains(bond2.getAtom(1))) {
centralAtom = bond2.getAtom(1);
}
if (queryBond1.contains(queryBond2.getAtom(0))) {
centralQueryAtom = queryBond2.getAtom(0);
} else if (queryBond1.contains(queryBond2.getAtom(1))) {
centralQueryAtom = queryBond2.getAtom(1);
}
if (centralAtom != null && centralQueryAtom != null
&& ((IQueryAtom)centralQueryAtom).matches(centralAtom)) {
IQueryAtom queryAtom1 = (IQueryAtom)queryBond1.getConnectedAtom(centralQueryAtom);
IQueryAtom queryAtom2 = (IQueryAtom)queryBond2.getConnectedAtom(centralQueryAtom);
IAtom atom1 = bond1.getConnectedAtom(centralAtom);
IAtom atom2 = bond2.getConnectedAtom(centralAtom);
if (queryAtom1.matches(atom1) && queryAtom2.matches(atom2) ||
queryAtom1.matches(atom2) && queryAtom2.matches(atom1)) {
return true;
} else return false;
} else return centralAtom == null && centralQueryAtom == null;
}
/**
* Checks some simple heuristics for whether the subgraph query can
* realistically be a subgraph of the supergraph. If, for example, the
* number of nitrogen atoms in the query is larger than that of the supergraph
* it cannot be part of it.
*
* @param ac1 the supergraph to be checked. Must not be an {@link IQueryAtomContainer}.
* @param ac2 the subgraph to be tested for. May be an {@link IQueryAtomContainer}.
* @return true if the subgraph ac2 has a chance to be a subgraph of ac1
* @throws CDKException if the first molecule is an instance of {@link IQueryAtomContainer}
*/
private static boolean testSubgraphHeuristics(IAtomContainer ac1, IAtomContainer ac2)
throws CDKException {
if (ac1 instanceof IQueryAtomContainer)
throw new CDKException(
"The first IAtomContainer must not be an IQueryAtomContainer"
);
int ac1SingleBondCount = 0;
int ac1DoubleBondCount = 0;
int ac1TripleBondCount = 0;
int ac1AromaticBondCount = 0;
int ac2SingleBondCount = 0;
int ac2DoubleBondCount = 0;
int ac2TripleBondCount = 0;
int ac2AromaticBondCount = 0;
int ac1SCount = 0;
int ac1OCount = 0;
int ac1NCount = 0;
int ac1FCount = 0;
int ac1ClCount = 0;
int ac1BrCount = 0;
int ac1ICount = 0;
int ac1CCount = 0;
int ac2SCount = 0;
int ac2OCount = 0;
int ac2NCount = 0;
int ac2FCount = 0;
int ac2ClCount = 0;
int ac2BrCount = 0;
int ac2ICount = 0;
int ac2CCount = 0;
IBond bond;
IAtom atom;
for (int i = 0; i < ac1.getBondCount(); i++)
{
bond = ac1.getBond(i);
if (bond.getFlag(CDKConstants.ISAROMATIC)) ac1AromaticBondCount ++;
else if (bond.getOrder() == IBond.Order.SINGLE) ac1SingleBondCount ++;
else if (bond.getOrder() == IBond.Order.DOUBLE) ac1DoubleBondCount ++;
else if (bond.getOrder() == IBond.Order.TRIPLE) ac1TripleBondCount ++;
}
for (int i = 0; i < ac2.getBondCount(); i++)
{
bond = ac2.getBond(i);
if (bond instanceof IQueryBond) continue;
if (bond.getFlag(CDKConstants.ISAROMATIC)) ac2AromaticBondCount ++;
else if (bond.getOrder() == IBond.Order.SINGLE) ac2SingleBondCount ++;
else if (bond.getOrder() == IBond.Order.DOUBLE) ac2DoubleBondCount ++;
else if (bond.getOrder() == IBond.Order.TRIPLE) ac2TripleBondCount ++;
}
if (ac2SingleBondCount > ac1SingleBondCount) return false;
if (ac2AromaticBondCount > ac1AromaticBondCount) return false;
if (ac2DoubleBondCount > ac1DoubleBondCount) return false;
if (ac2TripleBondCount > ac1TripleBondCount) return false;
for (int i = 0; i < ac1.getAtomCount(); i++)
{
atom = ac1.getAtom(i);
if (atom.getSymbol().equals("S")) ac1SCount ++;
else if (atom.getSymbol().equals("N")) ac1NCount ++;
else if (atom.getSymbol().equals("O")) ac1OCount ++;
else if (atom.getSymbol().equals("F")) ac1FCount ++;
else if (atom.getSymbol().equals("Cl")) ac1ClCount ++;
else if (atom.getSymbol().equals("Br")) ac1BrCount ++;
else if (atom.getSymbol().equals("I")) ac1ICount ++;
else if (atom.getSymbol().equals("C")) ac1CCount ++;
}
for (int i = 0; i < ac2.getAtomCount(); i++)
{
atom = ac2.getAtom(i);
if (atom instanceof IQueryAtom) continue;
if (atom.getSymbol().equals("S")) ac2SCount ++;
else if (atom.getSymbol().equals("N")) ac2NCount ++;
else if (atom.getSymbol().equals("O")) ac2OCount ++;
else if (atom.getSymbol().equals("F")) ac2FCount ++;
else if (atom.getSymbol().equals("Cl")) ac2ClCount ++;
else if (atom.getSymbol().equals("Br")) ac2BrCount ++;
else if (atom.getSymbol().equals("I")) ac2ICount ++;
else if (atom.getSymbol().equals("C")) ac2CCount ++;
}
if (ac1SCount < ac2SCount) return false;
if (ac1NCount < ac2NCount) return false;
if (ac1OCount < ac2OCount) return false;
if (ac1FCount < ac2FCount) return false;
if (ac1ClCount < ac2ClCount) return false;
if (ac1BrCount < ac2BrCount) return false;
if (ac1ICount < ac2ICount) return false;
return ac1CCount >= ac2CCount;
}
}