/* * $RCSfile$ * $Author$ * $Date$ * $Revision$ * * Copyright (C) 2008 Rajarshi Guha <rajarshi@users.sourceforge.net> * * Contact: rajarshi@users.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.qsar.descriptors.molecular; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.config.fragments.EStateFragments; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.qsar.DescriptorSpecification; import org.openscience.cdk.qsar.DescriptorValue; import org.openscience.cdk.qsar.IMolecularDescriptor; import org.openscience.cdk.qsar.result.IDescriptorResult; import org.openscience.cdk.qsar.result.IntegerArrayResult; import org.openscience.cdk.smiles.smarts.SMARTSQueryTool; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; /** * A fragment count descriptor that uses e-state fragments. * <p/> * Traditionally the e-state descriptors identify the relevant fragments and * then evaluate the actual e-state value. However it has been * <a href="http://www.mdpi.org/molecules/papers/91201004.pdf">shown</a> in {@cdk.cite BUTINA2004} * that simply using the <i>counts</i> of the e-state fragments can lead to QSAR models * that exhibit similar performance to those built using the actual e-state indices. * <p/> * Atom typing and aromaticity perception should be performed prior to calling this * descriptor. The atom type definitions are taken from {@cdk.cite HALL1995}. * The SMARTS definitions were obtained from <a href="http://www.rdkit.org">RDKit</a>. * <p/> * The descriptor returns an integer array result of 79 values with the * following names (see <a href="http://www.edusoft-lc.com/molconn/manuals/350/appV.html"> * here</a> for the corresponding chemical groups). * <p/> * <p/> * <table border=1 cellpadding=5> * <thead> * <tr> * <th>Serial</th> * <th>Name</th> * <th>Pattern</th> * </tr> * <tbody> * <tr> * <td>0</td><td>khs.sLi</td><td>[LiD1]-*</td> * </tr> * <tr> * <td>1</td><td>khs.ssBe</td><td>[BeD2](-*)-*</td> * </tr> * <tr> * <td>2</td><td>khs.ssssBe</td><td>[BeD4](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>3</td><td>khs.ssBH</td><td>[BD2H](-*)-*</td> * </tr> * <tr> * <td>4</td><td>khs.sssB</td><td>[BD3](-*)(-*)-*</td> * </tr> * <tr> * <td>5</td><td>khs.ssssB</td><td>[BD4](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>6</td><td>khs.sCH3</td><td>[CD1H3]-*</td> * </tr> * <tr> * <td>7</td><td>khs.dCH2</td><td>[CD1H2]=*</td> * </tr> * <tr> * <td>8</td><td>khs.ssCH2</td><td>[CD2H2](-*)-*</td> * </tr> * <tr> * <td>9</td><td>khs.tCH</td><td>[CD1H]#*</td> * </tr> * <tr> * <td>10</td><td>khs.dsCH</td><td>[CD2H](=*)-*</td> * </tr> * <tr> * <td>11</td><td>khs.aaCH</td><td>[C,c;D2H](:*):*</td> * </tr> * <tr> * <td>12</td><td>khs.sssCH</td><td>[CD3H](-*)(-*)-*</td> * </tr> * <tr> * <td>13</td><td>khs.ddC</td><td>[CD2H0](=*)=*</td> * </tr> * <tr> * <td>14</td><td>khs.tsC</td><td>[CD2H0](#*)-*</td> * </tr> * <tr> * <td>15</td><td>khs.dssC</td><td>[CD3H0](=*)(-*)-*</td> * </tr> * <tr> * <td>16</td><td>khs.aasC</td><td>[C,c;D3H0](:*)(:*)-*</td> * </tr> * <tr> * <td>17</td><td>khs.aaaC</td><td>[C,c;D3H0](:*)(:*):*</td> * </tr> * <tr> * <td>18</td><td>khs.ssssC</td><td>[CD4H0](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>19</td><td>khs.sNH3</td><td>[ND1H3]-*</td> * </tr> * <tr> * <td>20</td><td>khs.sNH2</td><td>[ND1H2]-*</td> * </tr> * <tr> * <td>21</td><td>khs.ssNH2</td><td>[ND2H2](-*)-*</td> * </tr> * <tr> * <td>22</td><td>khs.dNH</td><td>[ND1H]=*</td> * </tr> * <tr> * <td>23</td><td>khs.ssNH</td><td>[ND2H](-*)-*</td> * </tr> * <tr> * <td>24</td><td>khs.aaNH</td><td>[N,nD2H](:*):*</td> * </tr> * <tr> * <td>25</td><td>khs.tN</td><td>[ND1H0]#*</td> * </tr> * <tr> * <td>26</td><td>khs.sssNH</td><td>[ND3H](-*)(-*)-*</td> * </tr> * <tr> * <td>27</td><td>khs.dsN</td><td>[ND2H0](=*)-*</td> * </tr> * <tr> * <td>28</td><td>khs.aaN</td><td>[N,nD2H0](:*):*</td> * </tr> * <tr> * <td>29</td><td>khs.sssN</td><td>[ND3H0](-*)(-*)-*</td> * </tr> * <tr> * <td>30</td><td>khs.ddsN</td><td>[ND3H0](~[OD1H0])(~[OD1H0])-,:*</td> * </tr> * <tr> * <td>31</td><td>khs.aasN</td><td>[N,nD3H0](:*)(:*)-,:*</td> * </tr> * <tr> * <td>32</td><td>khs.ssssN</td><td>[ND4H0](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>33</td><td>khs.sOH</td><td>[OD1H]-*</td> * </tr> * <tr> * <td>34</td><td>khs.dO</td><td>[OD1H0]=*</td> * </tr> * <tr> * <td>35</td><td>khs.ssO</td><td>[OD2H0](-*)-*</td> * </tr> * <tr> * <td>36</td><td>khs.aaO</td><td>[O,oD2H0](:*):*</td> * </tr> * <tr> * <td>37</td><td>khs.sF</td><td>[FD1]-*</td> * </tr> * <tr> * <td>38</td><td>khs.sSiH3</td><td>[SiD1H3]-*</td> * </tr> * <tr> * <td>39</td><td>khs.ssSiH2</td><td>[SiD2H2](-*)-*</td> * </tr> * <tr> * <td>40</td><td>khs.sssSiH</td><td>[SiD3H1](-*)(-*)-*</td> * </tr> * <tr> * <td>41</td><td>khs.ssssSi</td><td>[SiD4H0](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>42</td><td>khs.sPH2</td><td>[PD1H2]-*</td> * </tr> * <tr> * <td>43</td><td>khs.ssPH</td><td>[PD2H1](-*)-*</td> * </tr> * <tr> * <td>44</td><td>khs.sssP</td><td>[PD3H0](-*)(-*)-*</td> * </tr> * <tr> * <td>45</td><td>khs.dsssP</td><td>[PD4H0](=*)(-*)(-*)-*</td> * </tr> * <tr> * <td>46</td><td>khs.sssssP</td><td>[PD5H0](-*)(-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>47</td><td>khs.sSH</td><td>[SD1H1]-*</td> * </tr> * <tr> * <td>48</td><td>khs.dS</td><td>[SD1H0]=*</td> * </tr> * <tr> * <td>49</td><td>khs.ssS</td><td>[SD2H0](-*)-*</td> * </tr> * <tr> * <td>50</td><td>khs.aaS</td><td>[S,sD2H0](:*):*</td> * </tr> * <tr> * <td>51</td><td>khs.dssS</td><td>[SD3H0](=*)(-*)-*</td> * </tr> * <tr> * <td>52</td><td>khs.ddssS</td><td>[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*</td> * </tr> * <tr> * <td>53</td><td>khs.sCl</td><td>[ClD1]-*</td> * </tr> * <tr> * <td>54</td><td>khs.sGeH3</td><td>[GeD1H3](-*)</td> * </tr> * <tr> * <td>55</td><td>khs.ssGeH2</td><td>[GeD2H2](-*)-*</td> * </tr> * <tr> * <td>56</td><td>khs.sssGeH</td><td>[GeD3H1](-*)(-*)-*</td> * </tr> * <tr> * <td>57</td><td>khs.ssssGe</td><td>[GeD4H0](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>58</td><td>khs.sAsH2</td><td>[AsD1H2]-*</td> * </tr> * <tr> * <td>59</td><td>khs.ssAsH</td><td>[AsD2H1](-*)-*</td> * </tr> * <tr> * <td>60</td><td>khs.sssAs</td><td>[AsD3H0](-*)(-*)-*</td> * </tr> * <tr> * <td>61</td><td>khs.sssdAs</td><td>[AsD4H0](=*)(-*)(-*)-*</td> * </tr> * <tr> * <td>62</td><td>khs.sssssAs</td><td>[AsD5H0](-*)(-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>63</td><td>khs.sSeH</td><td>[SeD1H1]-*</td> * </tr> * <tr> * <td>64</td><td>khs.dSe</td><td>[SeD1H0]=*</td> * </tr> * <tr> * <td>65</td><td>khs.ssSe</td><td>[SeD2H0](-*)-*</td> * </tr> * <tr> * <td>66</td><td>khs.aaSe</td><td>[SeD2H0](:*):*</td> * </tr> * <tr> * <td>67</td><td>khs.dssSe</td><td>[SeD3H0](=*)(-*)-*</td> * </tr> * <tr> * <td>68</td><td>khs.ddssSe</td><td>[SeD4H0](=*)(=*)(-*)-*</td> * </tr> * <tr> * <td>69</td><td>khs.sBr</td><td>[BrD1]-*</td> * </tr> * <tr> * <td>70</td><td>khs.sSnH3</td><td>[SnD1H3]-*</td> * </tr> * <tr> * <td>71</td><td>khs.ssSnH2</td><td>[SnD2H2](-*)-*</td> * </tr> * <tr> * <td>72</td><td>khs.sssSnH</td><td>[SnD3H1](-*)(-*)-*</td> * </tr> * <tr> * <td>73</td><td>khs.ssssSn</td><td>[SnD4H0](-*)(-*)(-*)-*</td> * </tr> * <tr> * <td>74</td><td>khs.sI</td><td>[ID1]-*</td> * </tr> * <tr> * <td>75</td><td>khs.sPbH3</td><td>[PbD1H3]-*</td> * </tr> * <tr> * <td>76</td><td>khs.ssPbH2</td><td>[PbD2H2](-*)-*</td> * </tr> * <tr> * <td>77</td><td>khs.sssPbH</td><td>[PbD3H1](-*)(-*)-*</td> * </tr> * <tr> * <td>78</td><td>khs.ssssPb</td><td>[PbD4H0](-*)(-*)(-*)-*</td> * </tr> * </tbody></table> * * @author Rajarshi Guha * @cdk.module qsarmolecular * @cdk.githash * @cdk.set qsar-descriptors * @cdk.dictref qsar-descriptors:kierHallSmarts */ @TestClass("org.openscience.cdk.qsar.descriptors.molecular.KierHallSmartsDescriptorTest") public class KierHallSmartsDescriptor implements IMolecularDescriptor { private static String[] names; private static final String[] smarts = EStateFragments.getSmarts(); public KierHallSmartsDescriptor() { String[] tmp = EStateFragments.getNames(); names = new String[tmp.length]; for (int i = 0; i < tmp.length; i++) names[i] = "khs." + tmp[i]; } /** * Returns a <code>Map</code> which specifies which descriptor * is implemented by this class. * <p/> * These fields are used in the map: * <ul> * <li>Specification-Reference: refers to an entry in a unique dictionary * <li>Implementation-Title: anything * <li>Implementation-Identifier: a unique identifier for this version of * this class * <li>Implementation-Vendor: CDK, JOELib, or anything else * </ul> * * @return An object containing the descriptor specification */ @TestMethod("testGetSpecification") public DescriptorSpecification getSpecification() { return new DescriptorSpecification( "http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#kierHallSmarts", this.getClass().getName(), "$Id$", "The Chemistry Development Kit"); } /** * Sets the parameters attribute of the descriptor. * * @param params The new parameters value * @throws org.openscience.cdk.exception.CDKException * if any parameters are specified * @see #getParameters */ @TestMethod("testSetParameters_arrayObject") public void setParameters(Object[] params) throws CDKException { if (params != null) throw new CDKException("Must not supply any parameters"); } /** * Gets the parameters attribute of the descriptor. * * @return The parameters value * @see #setParameters */ @TestMethod("testGetParameters") public Object[] getParameters() { return null; } @TestMethod(value = "testNamesConsistency") public String[] getDescriptorNames() { return names; } private DescriptorValue getDummyDescriptorValue(Exception e) { IntegerArrayResult result = new IntegerArrayResult(); for (String smart : smarts) result.add((int) Double.NaN); return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), result, getDescriptorNames(), e); } /** * This method calculates occurrences of the Kier & Hall E-state fragments. * * @param container The molecule for which this descriptor is to be calculated * @return Counts of the fragments */ @TestMethod("testCalculate_IAtomContainer") public DescriptorValue calculate(IAtomContainer container) { if (container == null || container.getAtomCount() == 0) { return getDummyDescriptorValue(new CDKException("Container was null or else had no atoms")); } IAtomContainer atomContainer; try { atomContainer = (IAtomContainer) container.clone(); atomContainer = AtomContainerManipulator.removeHydrogens(atomContainer); } catch (CloneNotSupportedException e) { return getDummyDescriptorValue(new CDKException("Error during clone")); } int[] counts = new int[smarts.length]; try { SMARTSQueryTool sqt = new SMARTSQueryTool("C"); for (int i = 0; i < smarts.length; i++) { sqt.setSmarts(smarts[i]); boolean status = sqt.matches(atomContainer); if (status) { counts[i] = sqt.getUniqueMatchingAtoms().size(); } else counts[i] = 0; } } catch (CDKException e) { return getDummyDescriptorValue(e); } IntegerArrayResult result = new IntegerArrayResult(); for (Integer i : counts) result.add(i); return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), result, getDescriptorNames()); } /** * Returns the specific type of the DescriptorResult object. * <p/> * The return value from this method really indicates what type of result will * be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result * can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method * allows you to do the same thing, without actually calculating the descriptor. * * @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating * the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object */ @TestMethod("testGetDescriptorResultType") public IDescriptorResult getDescriptorResultType() { return new IntegerArrayResult(smarts.length); } /** * Gets the parameterNames attribute of the descriptor. * * @return The parameterNames value */ @TestMethod("testGetParameterNames") public String[] getParameterNames() { return null; } /** * Gets the parameterType attribute of the descriptor. * * @param name Description of the Parameter * @return An Object whose class is that of the parameter requested */ @TestMethod("testGetParameterType_String") public Object getParameterType(String name) { return null; } }