/* $RCSfile$
* $Author$
* $Date$
* $Revision$
*
* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.qsar.descriptors.molecular;
import java.util.HashMap;
import java.util.Map;
import javax.vecmath.Point3d;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.geometry.GeometryTools;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.qsar.DescriptorSpecification;
import org.openscience.cdk.qsar.DescriptorValue;
import org.openscience.cdk.qsar.IMolecularDescriptor;
import org.openscience.cdk.qsar.result.DoubleArrayResult;
import org.openscience.cdk.qsar.result.DoubleArrayResultType;
import org.openscience.cdk.qsar.result.IDescriptorResult;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import Jama.EigenvalueDecomposition;
import Jama.Matrix;
/**
* Holistic descriptors described by Todeschini et al {@cdk.cite TOD98}.
* The descriptors are based on a number of atom weightings. There are 6 different
* possible weightings:
* <ol>
* <li>unit weights
* <li>atomic masses
* <li>van der Waals volumes
* <li>Mulliken atomic electronegativites
* <li>atomic polarizabilities
* <li>E-state values described by Kier & Hall
* </ol>
* Currently weighting schemes 1,2,3,4 & 5 are implemented. The weight values
* are taken from {@cdk.cite TOD98} and as a result 19 elements are considered.
* <p/>
* <p>For each weighting scheme we can obtain
* <ul>
* <li>11 directional WHIM descriptors (λ<sub>1 .. 3</sub>, ν<sub>1 .. 2</sub>, γ<sub>1 .. 3</sub>, η<sub>1 .. 3</sub>)
* <li>6 non-directional WHIM descriptors (T, A, V, K, G, D)
* </ul>
* <p/>
* <p>Though {@cdk.cite TOD98} mentions that for planar molecules only 8 directional WHIM
* descriptors are required the current code will return all 11.
* <p/>
* The descriptor returns 17 values for a given weighting scheme, named as follows:
* <ol>
* <li>Wlambda1
* <li>Wlambda2
* <li>wlambda3
* <li>Wnu1
* <li>Wnu2
* <li>Wgamma1
* <li>Wgamma2
* <li>Wgamma3
* <li>Weta1
* <li>Weta2
* <li>Weta3
* <li>WT
* <li>WA
* <li>WV
* <li>WK
* <li>WG
* <li>WD
* </ol>
* Each name will have a suffix of the form <i>.X</i> where <i>X</i> indicates
* the weighting scheme used. Possible values of <i>X</i> are
* <ul>
* <li>unity
* <li>mass
* <li>volume
* <li>eneg
* <li>polar
* </ul>
* <p/>
* <p/>
* <p>This descriptor uses these parameters:
* <table border="1">
* <tr>
* <td>Name</td>
* <td>Default</td>
* <td>Description</td>
* </tr>
* <tr>
* <td>type</td>
* <td>unity</td>
* <td>Type of weighting as described above</td>
* </tr>
* </table>
*
* @author Rajarshi Guha
* @cdk.created 2004-12-1
* @cdk.builddepends Jama-1.0.1.jar
* @cdk.depends Jama-1.0.1.jar
* @cdk.module qsarmolecular
* @cdk.githash
* @cdk.set qsar-descriptors
* @cdk.dictref qsar-descriptors:WHIM
* @cdk.keyword WHIM
* @cdk.keyword descriptor
*/
@TestClass("org.openscience.cdk.qsar.descriptors.molecular.WHIMDescriptorTest")
public class WHIMDescriptor implements IMolecularDescriptor {
static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(WHIMDescriptor.class);
String type = "";
Map<String,Double> hashatwt, hashvdw, hasheneg, hashpol;
public WHIMDescriptor() {
this.type = "unity"; // default weighting scheme
// set up the values from TOD98
this.hashatwt = new HashMap<String,Double>();
this.hashvdw = new HashMap<String, Double>();
this.hasheneg = new HashMap<String, Double>();
this.hashpol = new HashMap<String, Double>();
this.hashatwt.put("H", new Double(0.084));
this.hashatwt.put("B", new Double(0.900));
this.hashatwt.put("C", new Double(1.000));
this.hashatwt.put("N", new Double(1.166));
this.hashatwt.put("O", new Double(1.332));
this.hashatwt.put("F", new Double(1.582));
this.hashatwt.put("Al", new Double(2.246));
this.hashatwt.put("Si", new Double(2.339));
this.hashatwt.put("P", new Double(2.579));
this.hashatwt.put("S", new Double(2.670));
this.hashatwt.put("Cl", new Double(2.952));
this.hashatwt.put("Fe", new Double(4.650));
this.hashatwt.put("Co", new Double(4.907));
this.hashatwt.put("Ni", new Double(4.887));
this.hashatwt.put("Cu", new Double(5.291));
this.hashatwt.put("Zn", new Double(5.445));
this.hashatwt.put("Br", new Double(6.653));
this.hashatwt.put("Sn", new Double(9.884));
this.hashatwt.put("I", new Double(10.566));
this.hashvdw.put("H", new Double(0.299));
this.hashvdw.put("B", new Double(0.796));
this.hashvdw.put("C", new Double(1.000));
this.hashvdw.put("N", new Double(0.695));
this.hashvdw.put("O", new Double(0.512));
this.hashvdw.put("F", new Double(0.410));
this.hashvdw.put("Al", new Double(1.626));
this.hashvdw.put("Si", new Double(1.424));
this.hashvdw.put("P", new Double(1.181));
this.hashvdw.put("S", new Double(1.088));
this.hashvdw.put("Cl", new Double(1.035));
this.hashvdw.put("Fe", new Double(1.829));
this.hashvdw.put("Co", new Double(1.561));
this.hashvdw.put("Ni", new Double(0.764));
this.hashvdw.put("Cu", new Double(0.512));
this.hashvdw.put("Zn", new Double(1.708));
this.hashvdw.put("Br", new Double(1.384));
this.hashvdw.put("Sn", new Double(2.042));
this.hashvdw.put("I", new Double(1.728));
this.hasheneg.put("H", new Double(0.944));
this.hasheneg.put("B", new Double(0.828));
this.hasheneg.put("C", new Double(1.000));
this.hasheneg.put("N", new Double(1.163));
this.hasheneg.put("O", new Double(1.331));
this.hasheneg.put("F", new Double(1.457));
this.hasheneg.put("Al", new Double(0.624));
this.hasheneg.put("Si", new Double(0.779));
this.hasheneg.put("P", new Double(0.916));
this.hasheneg.put("S", new Double(1.077));
this.hasheneg.put("Cl", new Double(1.265));
this.hasheneg.put("Fe", new Double(0.728));
this.hasheneg.put("Co", new Double(0.728));
this.hasheneg.put("Ni", new Double(0.728));
this.hasheneg.put("Cu", new Double(0.740));
this.hasheneg.put("Zn", new Double(0.810));
this.hasheneg.put("Br", new Double(1.172));
this.hasheneg.put("Sn", new Double(0.837));
this.hasheneg.put("I", new Double(1.012));
this.hashpol.put("H", new Double(0.379));
this.hashpol.put("B", new Double(1.722));
this.hashpol.put("C", new Double(1.000));
this.hashpol.put("N", new Double(0.625));
this.hashpol.put("O", new Double(0.456));
this.hashpol.put("F", new Double(0.316));
this.hashpol.put("Al", new Double(3.864));
this.hashpol.put("Si", new Double(3.057));
this.hashpol.put("P", new Double(2.063));
this.hashpol.put("S", new Double(1.648));
this.hashpol.put("Cl", new Double(1.239));
this.hashpol.put("Fe", new Double(4.773));
this.hashpol.put("Co", new Double(4.261));
this.hashpol.put("Ni", new Double(3.864));
this.hashpol.put("Cu", new Double(3.466));
this.hashpol.put("Zn", new Double(4.034));
this.hashpol.put("Br", new Double(1.733));
this.hashpol.put("Sn", new Double(4.375));
this.hashpol.put("I", new Double(3.040));
}
@TestMethod("testGetSpecification")
public DescriptorSpecification getSpecification() {
return new DescriptorSpecification(
"http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#WHIM",
this.getClass().getName(),
"$Id$",
"The Chemistry Development Kit");
}
;
/**
* Sets the parameters attribute of the WHIMDescriptor object.
*
* @param params The new parameter values. The Object array should have a single element
* which should be a String. The possible values of this String are: unity,
* mass, volume, eneg, polar
* @throws CDKException if the parameters are of the wrong type
* @see #getParameters
*/
@TestMethod("testSetParameters_arrayObject")
public void setParameters(Object[] params) throws CDKException {
if (params.length != 1) {
throw new CDKException("WHIMDescriptor requires 1 parameter");
}
if (!(params[0] instanceof String)) {
throw new CDKException("Parameters must be of type String");
}
this.type = (String) params[0];
if (!this.type.equals("unity") &&
!this.type.equals("mass") &&
!this.type.equals("volume") &&
!this.type.equals("eneg") &&
!this.type.equals("polar"))
throw new CDKException("Weighting scheme must be one of those specified in the API");
}
/**
* Gets the parameters attribute of the WHIMDescriptor object.
*
* @return Two element array of Integer representing number of highest and lowest eigenvalues
* to return respectively
* @see #setParameters
*/
@TestMethod("testGetParameters")
public Object[] getParameters() {
Object[] o = new Object[1];
o[0] = this.type;
return (o);
}
@TestMethod(value="testNamesConsistency")
public String[] getDescriptorNames() {
String[] names = {
"Wlambda1", "Wlambda2", "Wlambda3",
"Wnu1", "Wnu2",
"Wgamma1", "Wgamma2", "Wgamma3",
"Weta1", "Weta2", "Weta3",
"WT", "WA", "WV", "WK", "WG", "WD"
};
for (int i = 0; i < names.length; i++) names[i] += "." + type;
return names;
}
/**
* Gets the parameterNames attribute of the WHIMDescriptor object.
*
* @return The parameterNames value
*/
@TestMethod("testGetParameterNames")
public String[] getParameterNames() {
String[] pname = new String[1];
pname[0] = "type";
return (pname);
}
/**
* Gets the parameterType attribute of the WHIMDescriptor object.
*
* @param name Description of the Parameter
* @return The parameterType value
*/
@TestMethod("testGetParameterType_String")
public Object getParameterType(String name) {
return ("");
}
private DescriptorValue getDummyDescriptorValue(Exception e) {
int ndesc = getDescriptorNames().length;
DoubleArrayResult results = new DoubleArrayResult(ndesc);
for (int i = 0; i < ndesc; i++) results.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(),
getParameters(), results, getDescriptorNames(), e);
}
/**
* Calculates 11 directional and 6 non-directional WHIM descriptors for.
* the specified weighting scheme
*
* @param container Parameter is the atom container.
* @return An ArrayList containing the descriptors in the order described above.
*/
@TestMethod("testCalculate_IAtomContainer")
public DescriptorValue calculate(IAtomContainer container) {
if (!GeometryTools.has3DCoordinates(container))
return getDummyDescriptorValue(new CDKException("Molecule must have 3D coordinates"));
double sum = 0.0;
Molecule ac;
try {
ac = (Molecule) container.clone();
} catch (CloneNotSupportedException e) {
return getDummyDescriptorValue(e);
}
// do aromaticity detecttion for calculating polarizability later on
//HueckelAromaticityDetector had = new HueckelAromaticityDetector();
//had.detectAromaticity(ac);
// get the coordinate matrix
double[][] cmat = new double[ac.getAtomCount()][3];
for (int i = 0; i < ac.getAtomCount(); i++) {
Point3d coords = ac.getAtom(i).getPoint3d();
cmat[i][0] = coords.x;
cmat[i][1] = coords.y;
cmat[i][2] = coords.z;
}
// set up the weight vector
Map<String,Double> hash = null;
double[] wt = new double[ac.getAtomCount()];
if (this.type.equals("unity")) {
for (int i = 0; i < ac.getAtomCount(); i++) wt[i] = 1.0;
} else {
if (this.type.equals("mass")) {
hash = this.hashatwt;
} else if (this.type.equals("volume")) {
hash = this.hashvdw;
} else if (this.type.equals("eneg")) {
hash = this.hasheneg;
} else if (this.type.equals("polar")) {
hash = this.hashpol;
}
for (int i = 0; i < ac.getAtomCount(); i++) {
String sym = ac.getAtom(i).getSymbol();
wt[i] = (Double) hash.get(sym);
}
}
PCA pcaobject = null;
try {
pcaobject = new PCA(cmat, wt);
} catch (CDKException cdke) {
logger.debug(cdke);
}
// directional WHIM's
double[] lambda = pcaobject.getEigenvalues();
double[] gamma = new double[3];
double[] nu = new double[3];
double[] eta = new double[3];
for (int i = 0; i < 3; i++) sum += lambda[i];
for (int i = 0; i < 3; i++) nu[i] = lambda[i] / sum;
double[][] scores = pcaobject.getScores();
for (int i = 0; i < 3; i++) {
sum = 0.0;
for (int j = 0; j < ac.getAtomCount(); j++)
sum += scores[j][i] * scores[j][i] * scores[j][i] * scores[j][i];
sum = sum / (lambda[i] * lambda[i] * ac.getAtomCount());
eta[i] = 1.0 / sum;
}
// look for symmetric & asymmetric atoms for the gamma descriptor
for (int i = 0; i < 3; i++) {
double ns = 0.0;
double na = 0.0;
for (int j = 0; j < ac.getAtomCount(); j++) {
boolean foundmatch = false;
for (int k = 0; k < ac.getAtomCount(); k++) {
if (k == j) continue;
if (scores[j][i] == -1 * scores[k][i]) {
ns++;
foundmatch = true;
break;
}
}
if (!foundmatch) na++;
}
double n = (double) ac.getAtomCount();
gamma[i] = -1.0 * ((ns / n) * Math.log(ns / n) / Math.log(2.0) + (na / n) * Math.log(1.0 / n) / Math.log(2.0));
gamma[i] = 1.0 / (1.0 + gamma[i]);
//logger.debug("ns = "+ns+" na = "+na+" gamma = "+gamma[i]);
}
// non directional WHIMS's
double t = lambda[0] + lambda[1] + lambda[2];
double a = lambda[0] * lambda[1] + lambda[0] * lambda[2] + lambda[1] * lambda[2];
double v = t + a + lambda[0] * lambda[1] * lambda[2];
double k = 0.0;
sum = 0.0;
for (int i = 0; i < 3; i++) sum += lambda[i];
for (int i = 0; i < 3; i++) k = (lambda[i] / sum) - (1.0 / 3.0);
k = k / (4.0 / 3.0);
double g = Math.pow(gamma[0] * gamma[1] * gamma[2], 1.0 / 3.0);
double d = eta[0] + eta[1] + eta[2];
// return all the stuff we calculated
DoubleArrayResult retval = new DoubleArrayResult(11 + 6);
retval.add(lambda[0]);
retval.add(lambda[1]);
retval.add(lambda[2]);
retval.add(nu[0]);
retval.add(nu[1]);
retval.add(gamma[0]);
retval.add(gamma[1]);
retval.add(gamma[2]);
retval.add(eta[0]);
retval.add(eta[1]);
retval.add(eta[2]);
retval.add(t);
retval.add(a);
retval.add(v);
retval.add(k);
retval.add(g);
retval.add(d);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames());
}
/**
* Returns the specific type of the DescriptorResult object.
* <p/>
* The return value from this method really indicates what type of result will
* be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result
* can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method
* allows you to do the same thing, without actually calculating the descriptor.
*
* @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating
* the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object
*/
@TestMethod("testGetDescriptorResultType")
public IDescriptorResult getDescriptorResultType() {
return new DoubleArrayResultType(17);
}
class PCA {
Matrix evec;
Matrix t;
double[] eval;
public PCA(double[][] cmat, double[] wt) throws CDKException {
int ncol = 3;
int nrow = wt.length;
if (cmat.length != wt.length) {
throw new CDKException("WHIMDescriptor: number of weights should be equal to number of atoms");
}
// make a copy of the coordinate matrix
double[][] d = new double[nrow][ncol];
for (int i = 0; i < nrow; i++) {
for (int j = 0; j < ncol; j++)
d[i][j] = cmat[i][j];
}
// do mean centering - though the first paper used
// barymetric centering
for (int i = 0; i < ncol; i++) {
double mean = 0.0;
for (int j = 0; j < nrow; j++)
mean += d[j][i];
mean = mean / (double) nrow;
for (int j = 0; j < nrow; j++)
d[j][i] = (d[j][i] - mean);
}
// get the covariance matrix
double[][] covmat = new double[ncol][ncol];
double sumwt = 0.;
for (int i = 0; i < nrow; i++) sumwt += wt[i];
for (int i = 0; i < ncol; i++) {
double meanx = 0;
for (int k = 0; k < nrow; k++) meanx += d[k][i];
meanx = meanx / (double) nrow;
for (int j = 0; j < ncol; j++) {
double meany = 0.0;
for (int k = 0; k < nrow; k++) meany += d[k][j];
meany = meany / (double) nrow;
double sum = 0.;
for (int k = 0; k < nrow; k++) {
//double dd = wt[k] * (d[k][i] - meanx) * (d[k][j] - meany);
//logger.debug("("+i+","+j+") "+wts[k] + " * " + d[k][i] + "-" + meanx + " * " + d[k][j] + "-" + meany + "==" + dd);
sum += wt[k] * (d[k][i] - meanx) * (d[k][j] - meany);
}
//logger.debug(sum+" / "+sumwt+"="+sum/sumwt);
covmat[i][j] = sum / sumwt;
}
}
// get the loadings (ie eigenvector matrix)
Matrix m = new Matrix(covmat);
EigenvalueDecomposition ed = m.eig();
this.eval = ed.getRealEigenvalues();
this.evec = ed.getV();
Matrix x = new Matrix(d);
this.t = x.times(this.evec);
}
double[] getEigenvalues() {
return (this.eval);
}
double[][] getScores() {
return (t.getArray());
}
}
}