/* $RCSfile$
* $Author$
* $Date$
* $Revision$
*
* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.qsar.descriptors.molecular;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
import org.openscience.cdk.charges.GasteigerMarsiliPartialCharges;
import org.openscience.cdk.charges.GasteigerPEPEPartialCharges;
import org.openscience.cdk.charges.Polarizability;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.PathTools;
import org.openscience.cdk.graph.matrix.AdjacencyMatrix;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.qsar.DescriptorSpecification;
import org.openscience.cdk.qsar.DescriptorValue;
import org.openscience.cdk.qsar.IMolecularDescriptor;
import org.openscience.cdk.qsar.result.DoubleArrayResult;
import org.openscience.cdk.qsar.result.DoubleArrayResultType;
import org.openscience.cdk.qsar.result.IDescriptorResult;
import org.openscience.cdk.tools.CDKHydrogenAdder;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.LonePairElectronChecker;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import Jama.EigenvalueDecomposition;
import Jama.Matrix;
/**
* Eigenvalue based descriptor noted for its utility in chemical diversity.
* Described by Pearlman et al. {@cdk.cite PEA99}.
* <p/>
* <p>The descriptor is based on a weighted version of the Burden matrix {@cdk.cite BUR89, BUR97}
* which takes into account both the connectivity as well as atomic
* properties of a molecule. The weights are a variety of atom properties placed along the
* diagonal of the Burden matrix. Currently three weighting schemes are employed
* <ul>
* <li>atomic weight
* <li>partial charge (Gasteiger Marsilli)
* <li>polarizability {@cdk.cite KJ81}
* </ul>
* <p>By default, the descriptor will return the highest and lowest eigenvalues for the three
* classes of descriptor in a single ArrayList (in the order shown above). However it is also
* possible to supply a parameter list indicating how many of the highest and lowest eigenvalues
* (for each class of descriptor) are required. The descriptor works with the hydrogen depleted molecule.
* <p/>
* A side effect of specifying the number of highest and lowest eigenvalues is that it is possible
* to get two copies of all the eigenvalues. That is, if a molecule has 5 heavy atoms, then specifying
* the 5 highest eigenvalues returns all of them, and specifying the 5 lowest eigenvalues returns
* all of them, resulting in two copies of all the eigenvalues.
* <p/>
* <p> Note that it is possible to
* specify an arbitrarily large number of eigenvalues to be returned. However if the number
* (i.e., nhigh or nlow) is larger than the number of heavy atoms, the remaining eignevalues
* will be NaN.
* <p/>
* Given the above description, if the aim is to gt all the eigenvalues for a molecule, you should
* set nlow to 0 and specify the number of heavy atoms (or some large number) for nhigh (or vice versa).
* <p>This descriptor uses these parameters:
* <table border="1">
* <tr>
* <td>Name</td>
* <td>Default</td>
* <td>Description</td>
* </tr>
* <tr>
* <td>nhigh</td>
* <td>1</td>
* <td>The number of highest eigenvalue</td>
* </tr>
* <tr>
* <td>nlow</td>
* <td>1</td>
* <td>The number of lowest eigenvalue</td>
* </tr>
* <tr>
* <td>checkAromaticity</td>
* <td>true</td>
* <td>Whether aromaticity should be checked</td>
* </tr>
* </table>
* <p/>
* Returns an array of values in the following order
* <ol>
* <p/>
* <li>BCUTw-1l, BCUTw-2l ... - <i>nhigh</i> lowest atom weighted BCUTS
* <li>BCUTw-1h, BCUTw-2h ... - <i>nlow</i> highest atom weighted BCUTS
* <li>BCUTc-1l, BCUTc-2l ... - <i>nhigh</i> lowest partial charge weighted BCUTS
* <li>BCUTc-1h, BCUTc-2h ... - <i>nlow</i> highest partial charge weighted BCUTS
* <li>BCUTp-1l, BCUTp-2l ... - <i>nhigh</i> lowest polarizability weighted BCUTS
* <li>BCUTp-1h, BCUTp-2h ... - <i>nlow</i> highest polarizability weighted BCUTS
*
* @author Rajarshi Guha
* @cdk.created 2004-11-30
* @cdk.builddepends Jama-1.0.1.jar
* @cdk.depends Jama-1.0.1.jar
* @cdk.module qsarmolecular
* @cdk.githash
* @cdk.set qsar-descriptors
* @cdk.dictref qsar-descriptors:BCUT
* @cdk.keyword BCUT
* @cdk.keyword descriptor
*/
@TestClass("org.openscience.cdk.qsar.descriptors.molecular.BCUTDescriptorTest")
public class BCUTDescriptor implements IMolecularDescriptor {
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(BCUTDescriptor.class);
// the number of negative & positive eigenvalues
// to return for each class of BCUT descriptor
private int nhigh;
private int nlow;
private boolean checkAromaticity;
public BCUTDescriptor() {
// set the default number of BCUT's
this.nhigh = 1;
this.nlow = 1;
this.checkAromaticity = true;
}
@TestMethod("testGetSpecification")
public DescriptorSpecification getSpecification() {
return new DescriptorSpecification(
"http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#BCUT",
this.getClass().getName(),
"$Id$",
"The Chemistry Development Kit");
}
/**
* Sets the parameters attribute of the BCUTDescriptor object.
*
* @param params The new parameter values. This descriptor takes 3 parameters: number of highest
* eigenvalues and number of lowest eigenvalues. If 0 is specified for either (the default)
* then all calculated eigenvalues are returned. The third parameter checkAromaticity is a boolean.
* If checkAromaticity is true, the method check the aromaticity, if false, means that the aromaticity has
* already been checked.
* @throws CDKException if the parameters are of the wrong type
* @see #getParameters
*/
@TestMethod("testSetParameters_arrayObject")
public void setParameters(Object[] params) throws CDKException {
// we expect 3 parameters
if (params.length != 3) {
throw new CDKException("BCUTDescriptor requires 3 parameters");
}
if (!(params[0] instanceof Integer) || !(params[1] instanceof Integer)) {
throw new CDKException("Parameters must be of type Integer");
} else if (!(params[2] instanceof Boolean)) {
throw new CDKException("The third parameter must be of type Boolean");
}
// ok, all should be fine
this.nhigh = (Integer) params[0];
this.nlow = (Integer) params[1];
this.checkAromaticity = (Boolean) params[2];
if (this.nhigh < 0 || this.nlow < 0) {
throw new CDKException("Number of eigenvalues to return must be zero or more");
}
}
/**
* Gets the parameters attribute of the BCUTDescriptor object.
*
* @return Three element array of Integer and one boolean representing number of highest and lowest eigenvalues and the checkAromaticity flag
* to return respectively
* @see #setParameters
*/
@TestMethod("testGetParameters")
public Object[] getParameters() {
Object params[] = new Object[3];
params[0] = this.nhigh;
params[1] = this.nlow;
params[2] = this.checkAromaticity;
return (params);
}
@TestMethod(value="testNamesConsistency")
public String[] getDescriptorNames() {
String[] names;
String[] suffix = {"w", "c", "p"};
names = new String[3 * nhigh + 3 * nlow];
int counter = 0;
for (String aSuffix : suffix) {
for (int i = 0; i < nhigh; i++) {
names[counter++] = "BCUT" + aSuffix + "-" + (i + 1) + "l";
}
for (int i = 0; i < nlow; i++) {
names[counter++] = "BCUT" + aSuffix + "-" + (i + 1) + "h";
}
}
return names;
}
/**
* Gets the parameterNames attribute of the BCUTDescriptor object.
*
* @return The parameterNames value
*/
@TestMethod("testGetParameterNames")
public String[] getParameterNames() {
String[] params = new String[3];
params[0] = "nhigh";
params[1] = "nlow";
params[2] = "checkAromaticity";
return (params);
}
/**
* Gets the parameterType attribute of the BCUTDescriptor object.
*
* @param name Description of the Parameter (can be either 'nhigh' or 'nlow' or checkAromaticity)
* @return The parameterType value
*/
@TestMethod("testGetParameterType_String")
public Object getParameterType(String name) {
Object object = null;
if (name.equals("nhigh")) object = 1;
if (name.equals("nlow")) {
object = 1;
}
if (name.equals("checkAromaticity")) object = true;
return (object);
}
static private class BurdenMatrix {
static double[][] evalMatrix(IAtomContainer atomContainer, double[] vsd) {
IAtomContainer local = AtomContainerManipulator.removeHydrogens(atomContainer);
int natom = local.getAtomCount();
double[][] matrix = new double[natom][natom];
for (int i = 0; i < natom; i++) {
for (int j = 0; j < natom; j++) {
matrix[i][j] = 0.0;
}
}
/* set the off diagonal entries */
for (int i = 0; i < natom - 1; i++) {
for (int j = i + 1; j < natom; j++) {
for (int k = 0; k < local.getBondCount(); k++) {
IBond bond = local.getBond(k);
if (bond.contains(local.getAtom(i)) && bond.contains(local.getAtom(j))) {
if (bond.getFlag(CDKConstants.ISAROMATIC)) matrix[i][j] = 0.15;
else if (bond.getOrder() == CDKConstants.BONDORDER_SINGLE) matrix[i][j] = 0.1;
else if (bond.getOrder() == CDKConstants.BONDORDER_DOUBLE) matrix[i][j] = 0.2;
else if (bond.getOrder() == CDKConstants.BONDORDER_TRIPLE) matrix[i][j] = 0.3;
if (local.getConnectedBondsCount(i) == 1 || local.getConnectedBondsCount(j) == 1) {
matrix[i][j] += 0.01;
}
matrix[j][i] = matrix[i][j];
} else {
matrix[i][j] = 0.001;
matrix[j][i] = 0.001;
}
}
}
}
/* set the diagonal entries */
for (int i = 0; i < natom; i++) {
if (vsd != null) matrix[i][i] = vsd[i];
else matrix[i][i] = 0.0;
}
return (matrix);
}
}
/**
* Calculates the three classes of BCUT descriptors.
*
* @param container Parameter is the atom container.
* @return An ArrayList containing the descriptors. The default is to return
* all calculated eigenvalues of the Burden matrices in the order described
* above. If a parameter list was supplied, then only the specified number
* of highest and lowest eigenvalues (for each class of BCUT) will be returned.
*/
@TestMethod("testCalculate_IAtomContainer")
public DescriptorValue calculate(IAtomContainer container) {
int counter;
Molecule molecule;
try {
molecule = (Molecule) container.clone();
} catch (CloneNotSupportedException e) {
logger.debug("Error during clone");
return getDummyDescriptorValue(new CDKException("Error occured during clone " + e));
}
// add H's in case they're not present
try {
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
CDKHydrogenAdder hAdder = CDKHydrogenAdder.getInstance(molecule.getBuilder());
hAdder.addImplicitHydrogens(molecule);
AtomContainerManipulator.convertImplicitToExplicitHydrogens(molecule);
} catch (Exception e) {
return getDummyDescriptorValue(new CDKException("Could not add hydrogens: " + e.getMessage(), e));
}
// do aromaticity detecttion for calculating polarizability later on
if (this.checkAromaticity) {
try {
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
} catch (CDKException e) {
return getDummyDescriptorValue(new CDKException("Error in atom typing: " + e.getMessage(), e));
}
try {
CDKHueckelAromaticityDetector.detectAromaticity(molecule);
} catch (CDKException e) {
return getDummyDescriptorValue(new CDKException("Error in aromaticity perception: "+e.getMessage()));
}
}
// find number of heavy atoms
int nheavy = 0;
for (int i = 0; i < molecule.getAtomCount(); i++) {
if (!molecule.getAtom(i).getSymbol().equals("H")) nheavy++;
}
if (nheavy == 0) return getDummyDescriptorValue(new CDKException("No heavy atoms in the molecule"));
double[] diagvalue = new double[nheavy];
// get atomic mass weighted BCUT
counter = 0;
try {
for (int i = 0; i < molecule.getAtomCount(); i++) {
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
diagvalue[counter] = IsotopeFactory.getInstance(molecule.getBuilder()).
getMajorIsotope(molecule.getAtom(i).getSymbol()).getExactMass();
counter++;
}
} catch (Exception e) {
return getDummyDescriptorValue(new CDKException("Could not calculate weight: " + e.getMessage(), e));
}
double[][] burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
Matrix matrix = new Matrix(burdenMatrix);
EigenvalueDecomposition eigenDecomposition = new EigenvalueDecomposition(matrix);
double[] eval1 = eigenDecomposition.getRealEigenvalues();
// get charge weighted BCUT
LonePairElectronChecker lpcheck = new LonePairElectronChecker();
GasteigerPEPEPartialCharges pepe;
GasteigerMarsiliPartialCharges peoe;
try {
lpcheck.saturate(molecule);
double[] charges = new double[molecule.getAtomCount()];
// pepe = new GasteigerPEPEPartialCharges();
// pepe.calculateCharges(molecule);
// for (int i = 0; i < molecule.getAtomCount(); i++) charges[i] = molecule.getAtom(i).getCharge();
peoe = new GasteigerMarsiliPartialCharges();
peoe.assignGasteigerMarsiliSigmaPartialCharges(molecule, true);
for (int i = 0; i < molecule.getAtomCount(); i++) charges[i] += molecule.getAtom(i).getCharge();
for (int i = 0; i < molecule.getAtomCount(); i++) {
molecule.getAtom(i).setCharge(charges[i]);
}
} catch (Exception e) {
return getDummyDescriptorValue(new CDKException("Could not calculate partial charges: " + e.getMessage(), e));
}
counter = 0;
for (int i = 0; i < molecule.getAtomCount(); i++) {
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
diagvalue[counter] = 1.0; molecule.getAtom(i).getCharge();
counter++;
}
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
matrix = new Matrix(burdenMatrix);
eigenDecomposition = new EigenvalueDecomposition(matrix);
double[] eval2 = eigenDecomposition.getRealEigenvalues();
int[][] topoDistance = PathTools.computeFloydAPSP(AdjacencyMatrix.getMatrix(molecule));
// get polarizability weighted BCUT
Polarizability pol = new Polarizability();
counter = 0;
for (int i = 0; i < molecule.getAtomCount(); i++) {
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
diagvalue[counter] = pol.calculateGHEffectiveAtomPolarizability(molecule, molecule.getAtom(i), false, topoDistance);
counter++;
}
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
matrix = new Matrix(burdenMatrix);
eigenDecomposition = new EigenvalueDecomposition(matrix);
double[] eval3 = eigenDecomposition.getRealEigenvalues();
String[] names;
String[] suffix = {"w", "c", "p"};
// return only the n highest & lowest eigenvalues
int lnlow, lnhigh, enlow, enhigh;
if (nlow > nheavy) {
lnlow = nheavy;
enlow = nlow - nheavy;
} else {
lnlow = nlow;
enlow = 0;
}
if (nhigh > nheavy) {
lnhigh = nheavy;
enhigh = nhigh - nheavy;
} else {
lnhigh = nhigh;
enhigh = 0;
}
DoubleArrayResult retval = new DoubleArrayResult( (lnlow+enlow+lnhigh+enhigh) * 3);
for (int i = 0; i < lnlow; i++) retval.add(eval1[i]);
for (int i = 0; i < enlow; i++) retval.add(Double.NaN);
for (int i = 0; i < lnhigh; i++) retval.add(eval1[eval1.length - i - 1]);
for (int i = 0; i < enhigh; i++) retval.add(Double.NaN);
for (int i = 0; i < lnlow; i++) retval.add(eval2[i]);
for (int i = 0; i < enlow; i++) retval.add(Double.NaN);
for (int i = 0; i < lnhigh; i++) retval.add(eval2[eval2.length - i - 1]);
for (int i = 0; i < enhigh; i++) retval.add(Double.NaN);
for (int i = 0; i < lnlow; i++) retval.add(eval3[i]);
for (int i = 0; i < enlow; i++) retval.add(Double.NaN);
for (int i = 0; i < lnhigh; i++) retval.add(eval3[eval3.length - i - 1]);
for (int i = 0; i < enhigh; i++) retval.add(Double.NaN);
names = new String[3 * nhigh + 3 * nlow];
counter = 0;
for (String aSuffix : suffix) {
for (int i = 0; i < nhigh; i++) {
names[counter++] = "BCUT" + aSuffix + "-" + (i + 1) + "l";
}
for (int i = 0; i < nlow; i++) {
names[counter++] = "BCUT" + aSuffix + "-" + (i + 1) + "h";
}
}
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames());
}
/**
* Returns the specific type of the DescriptorResult object.
* <p/>
* The return value from this method really indicates what type of result will
* be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result
* can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method
* allows you to do the same thing, without actually calculating the descriptor.
*
* @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating
* the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object
*/
@TestMethod("testGetDescriptorResultType")
public IDescriptorResult getDescriptorResultType() {
return new DoubleArrayResultType(6);
}
private DescriptorValue getDummyDescriptorValue(Exception e) {
DoubleArrayResult results = new DoubleArrayResult(6);
for (int i = 0; i < 6; i++) results.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(),
getParameters(), results, getDescriptorNames(), e);
}
}