package org.openscience.cdk.qsar.descriptors.molecular; import org.openscience.cdk.DefaultChemObjectBuilder; import org.openscience.cdk.config.IsotopeFactory; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.isomorphism.UniversalIsomorphismTester; import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; import org.openscience.cdk.isomorphism.mcss.RMap; import org.openscience.cdk.qsar.AtomValenceTool; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; /** * Utility methods for chi index calculations. * <p/> * These methods are common to all the types of chi index calculations and can * be used to evaluate path, path-cluster, cluster and chain chi indices. * * @author Rajarshi Guha * @cdk.module qsarmolecular * @cdk.githash */ class ChiIndexUtils { /** * Gets the fragments from a target <code>AtomContainer</code> matching a set of query fragments. * <p/> * This method returns a list of lists. Each list contains the atoms of the target <code>AtomContainer</code> * that arise in the mapping of bonds in the target molecule to the bonds in the query fragment. * The query fragments should be constructed * using the <code>createAnyAtomAnyBondContainer</code> method of the <code>QueryAtomContainerCreator</code> * CDK class, since we are only interested in connectivity and not actual atom or bond type information. * * @param atomContainer The target <code>AtomContainer</code> * @param queries An array of query fragments * @return A list of lists, each list being the atoms that match the query fragments */ public static List<List<Integer>> getFragments(IAtomContainer atomContainer, QueryAtomContainer[] queries) { List<List<Integer>> uniqueSubgraphs = new ArrayList<List<Integer>>(); for (QueryAtomContainer query : queries) { List subgraphMaps = null; try { // we get the list of bond mappings subgraphMaps = UniversalIsomorphismTester.getSubgraphMaps(atomContainer, query); } catch (CDKException e) { e.printStackTrace(); } if (subgraphMaps == null) continue; if (subgraphMaps.size() == 0) continue; // get the atom paths in the unique set of bond maps uniqueSubgraphs.addAll(getUniqueBondSubgraphs(subgraphMaps, atomContainer)); } // lets run a check on the length of each returned fragment and delete // any that don't match the length of out query fragments. Note that since // sometimes a fragment might be a ring, it will have number of atoms // equal to the number of bonds, where as a fragment with no rings // will have number of atoms equal to the number of bonds+1. So we need to check // fragment size against all unique query sizes - I get lazy and don't check // unique query sizes, but the size of each query List<List<Integer>> retValue = new ArrayList<List<Integer>>(); for (List<Integer> fragment : uniqueSubgraphs) { for (QueryAtomContainer query : queries) { if (fragment.size() == query.getAtomCount()) { retValue.add(fragment); break; } } } return retValue; } /** * Evaluates the simple chi index for a set of fragments. * * @param atomContainer The target <code>AtomContainer</code> * @param fragLists A list of fragments * @return The simple chi index */ public static double evalSimpleIndex(IAtomContainer atomContainer, List<List<Integer>> fragLists) { double sum = 0; for (List<Integer> fragList : fragLists) { double prod = 1.0; for (Integer atomSerial : fragList) { IAtom atom = atomContainer.getAtom(atomSerial); int nconnected = atomContainer.getConnectedAtomsCount(atom); prod = prod * nconnected; } if (prod != 0) sum += 1.0 / Math.sqrt(prod); } return sum; } /** * Evaluates the valence corrected chi index for a set of fragments. * <p/> * This method takes into account the S and P atom types described in * Kier & Hall (1986), page 20 for which empirical delta V values are used. * * @param atomContainer The target <code>AtomContainer</code> * @param fragList A list of fragments * @return The valence corrected chi index * @throws CDKException if the <code>IsotopeFactory</code> cannot be created */ public static double evalValenceIndex(IAtomContainer atomContainer, List fragList) throws CDKException { try { IsotopeFactory ifac = IsotopeFactory.getInstance(DefaultChemObjectBuilder.getInstance()); ifac.configureAtoms(atomContainer); } catch (IOException e) { throw new CDKException("IO problem occured when using the CDK atom config\n"+e.getMessage(), e); } double sum = 0; for (Object aFragList : fragList) { ArrayList frag = (ArrayList) aFragList; double prod = 1.0; for (Object aFrag : frag) { int atomSerial = (Integer) aFrag; IAtom atom = atomContainer.getAtom(atomSerial); String sym = atom.getSymbol(); if (sym.equals("S")) { // check for some special S environments double tmp = deltavSulphur(atom, atomContainer); if (tmp != -1) { prod = prod * tmp; continue; } } if (sym.equals("P")) { // check for some special P environments double tmp = deltavPhosphorous(atom, atomContainer); if (tmp != -1) { prod = prod * tmp; continue; } } int z = atom.getAtomicNumber(); // TODO there should be a neater way to get the valence electron count int zv = getValenceElectronCount(atom); int hsupp = atom.getHydrogenCount(); double deltav = (double) (zv - hsupp) / (double) (z - zv - 1); prod = prod * deltav; } if (prod != 0) sum += 1.0 / Math.sqrt(prod); } return sum; } private static int getValenceElectronCount(IAtom atom) { int valency = AtomValenceTool.getValence(atom); return valency - atom.getFormalCharge(); } /** * Evaluates the empirical delt V for some S environments. * <p/> * The method checks to see whether a S atom is in a -S-S-, * -SO-, -SO2- group and returns the empirical values noted * in Kier & Hall (1986), page 20. * * @param atom The S atom in question * @param atomContainer The molecule containing the S * @return The empirical delta V if it is present in one of the above * environments, -1 otherwise */ private static double deltavSulphur(IAtom atom, IAtomContainer atomContainer) { if (!atom.getSymbol().equals("S")) return -1; // check whether it's a S in S-S List<IAtom> connected = atomContainer.getConnectedAtomsList(atom); for (IAtom connectedAtom : connected) { if (connectedAtom.getSymbol().equals("S") && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.SINGLE) return .89; } // check whether it's a S in -SO- for (IAtom connectedAtom : connected) { if (connectedAtom.getSymbol().equals("O") && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) return 1.33; } // check whether it's a S in -SO2- int count = 0; for (IAtom connectedAtom : connected) { if (connectedAtom.getSymbol().equals("O") && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) count++; } if (count == 2) return 2.67; return -1; } /** * Checks whether the P atom is in a PO environment. * <p/> * This environment is noted in Kier & Hall (1986), page 20 * * @param atom The P atom in question * @param atomContainer The molecule containing the P atom * @return The empirical delta V if present in the above environment, * -1 otherwise */ private static double deltavPhosphorous(IAtom atom, IAtomContainer atomContainer) { if (!atom.getSymbol().equals("P")) return -1; List<IAtom> connected = atomContainer.getConnectedAtomsList(atom); int conditions = 0; if (connected.size() == 4) conditions++; for (IAtom connectedAtom : connected) { if (connectedAtom.getSymbol().equals("O") && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) conditions++; if (atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.SINGLE) conditions++; } if (conditions == 5) return 2.22; return -1; } /** * Converts a set of bond mappings to a unique set of atom paths. * <p/> * This method accepts a <code>List</code> of bond mappings. It first * reduces the set to a unique set of bond maps and then for each bond map * converts it to a series of atoms making up the bonds. * * @param subgraphs A <code>List</code> of bon mappings * @param ac The molecule we are examining * @return A unique <code>List</code> of atom paths */ private static List<List<Integer>> getUniqueBondSubgraphs(List subgraphs, IAtomContainer ac) { List<List<Integer>> bondList = new ArrayList<List<Integer>>(); for (Object subgraph : subgraphs) { List current = (List) subgraph; List<Integer> ids = new ArrayList<Integer>(); for (Object aCurrent : current) { RMap rmap = (RMap) aCurrent; ids.add(rmap.getId1()); } Collections.sort(ids); bondList.add(ids); } // get the unique set of bonds HashSet<List<Integer>> hs = new HashSet<List<Integer>>(bondList); bondList = new ArrayList<List<Integer>>(hs); List<List<Integer>> paths = new ArrayList<List<Integer>>(); for (Object aBondList1 : bondList) { List aBondList = (List) aBondList1; List<Integer> tmp = new ArrayList<Integer>(); for (Object anABondList : aBondList) { int bondNumber = (Integer) anABondList; for (IAtom atom : ac.getBond(bondNumber).atoms()) { Integer atomInt = ac.getAtomNumber(atom); if (!tmp.contains(atomInt)) tmp.add(atomInt); } } paths.add(tmp); } return paths; } }