/*
* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.qsar.descriptors.molecular;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.charges.GasteigerMarsiliPartialCharges;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.geometry.GeometryTools;
import org.openscience.cdk.geometry.surface.NumericalSurface;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.qsar.DescriptorSpecification;
import org.openscience.cdk.qsar.DescriptorValue;
import org.openscience.cdk.qsar.IMolecularDescriptor;
import org.openscience.cdk.qsar.result.DoubleArrayResult;
import org.openscience.cdk.qsar.result.DoubleArrayResultType;
import org.openscience.cdk.qsar.result.IDescriptorResult;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
/**
* Calculates 29 Charged Partial Surface Area (CPSA) descriptors.
* <p/>
* The CPSA's were developed by Stanton et al. ({@cdk.cite STA90}) and
* are related to the Polar Surface Area descriptors. The original
* implementation was in the ADAPT software package and the the definitions
* of the individual descriptors are presented in the following table. This class
* returns a <code>DoubleArrayResult</code> containing the 29 descriptors in the order
* described in the table.
* <table border=1 cellpadding=2>
* <caption><a name="cpsa">A Summary of the 29 CPSA Descriptors</a></caption>
* <thead>
* <tr>
* <th>IDescriptor</th><th>Meaning</th>
* </tr>
* </thead>
* <tbody>
* <tr>
* <td>PPSA-1</td><td> partial positive surface area -- sum of surface area on positive parts of molecule</td></tr><tr>
* <td>PPSA-2</td><td> partial positive surface area * total positive charge on the molecule </td></tr><tr>
* <td>PPSA-3</td><td> charge weighted partial positive surface area</td></tr><tr>
* <td>PNSA-1</td><td> partial negative surface area -- sum of surface area on negative parts of molecule</td></tr><tr>
* <td>PNSA-2</td><td> partial negative surface area * total negative charge on the molecule</td></tr><tr>
* <td>PNSA-3</td><td> charge weighted partial negative surface area</td></tr><tr>
* <td> DPSA-1</td><td> difference of PPSA-1 and PNSA-1</td></tr><tr>
* <td> DPSA-2</td><td> difference of FPSA-2 and PNSA-2</td></tr><tr>
* <td> DPSA-3</td><td> difference of PPSA-3 and PNSA-3</td></tr><tr>
* <td> FPSA-1</td><td> PPSA-1 / total molecular surface area</td></tr><tr>
* <td> FFSA-2 </td><td>PPSA-2 / total molecular surface area</td></tr><tr>
* <td> FPSA-3</td><td> PPSA-3 / total molecular surface area</td></tr><tr>
* <td> FNSA-1</td><td> PNSA-1 / total molecular surface area</td></tr><tr>
* <td> FNSA-2</td><td> PNSA-2 / total molecular surface area</td></tr><tr>
* <td> FNSA-3</td><td> PNSA-3 / total molecular surface area</td></tr><tr>
* <td> WPSA-1</td><td> PPSA-1 * total molecular surface area / 1000</td></tr><tr>
* <td>WPSA-2</td><td> PPSA-2 * total molecular surface area /1000</td></tr><tr>
* <td>WPSA-3</td><td> PPSA-3 * total molecular surface area / 1000</td></tr><tr>
* <td>WNSA-1</td><td> PNSA-1 * total molecular surface area /1000</td></tr><tr>
* <td>WNSA-2</td><td> PNSA-2 * total molecular surface area / 1000</td></tr><tr>
* <td>WNSA-3</td><td> PNSA-3 * total molecular surface area / 1000</td></tr><tr>
* <td>RPCG</td><td> relative positive charge -- most positive charge / total positive charge</td></tr><tr>
* <td> RNCG </td><td>relative negative charge -- most negative charge / total negative charge</td></tr><tr>
* <td> RPCS </td><td>relative positive charge surface area -- most positive surface area * RPCG</td></tr><tr>
* <td> RNCS </td><td>relative negative charge surface area -- most negative surface area * RNCG</td></tr>
* <tr>
* <td>THSA</td>
* <td>sum of solvent accessible surface areas of
* atoms with absolute value of partial charges
* less than 0.2
* </td>
* </tr>
* <tr>
* <td>TPSA</td>
* <td>sum of solvent accessible surface areas of
* atoms with absolute value of partial charges
* greater than or equal 0.2
* </td>
* </tr>
* <tr>
* <td>RHSA</td>
* <td>THSA / total molecular surface area
* </td>
* </tr>
* <tr>
* <td>RPSA</td>
* <td>TPSA / total molecular surface area
* </td>
* </tr>
* </tbody>
* </table>
* <p/>
* <b>NOTE</b>: The values calculated by this implementation will differ from those
* calculated by the original ADAPT implementation of the CPSA descriptors. This
* is because the original implementation used an analytical surface area algorithm
* and used partial charges obtained from MOPAC using the AM1 Hamiltonian.
* This implementation uses a numerical
* algorithm to obtain surface areas (see {@link NumericalSurface}) and obtains partial
* charges using the Gasteiger-Marsilli algorithm (see {@link GasteigerMarsiliPartialCharges}).
* <p/>
* However, a comparison of the values calculated by the two implementations indicates
* that they are qualitatively the same.
* <p/>
* <p>This descriptor uses these parameters:
* <table border="1">
* <tr>
* <td>Name</td>
* <td>Default</td>
* <td>Description</td>
* </tr>
* <tr>
* <td></td>
* <td></td>
* <td>no parameters</td>
* </tr>
* </table>
*
* @author Rajarshi Guha
* @cdk.created 2005-05-16
* @cdk.module qsarmolecular
* @cdk.githash
* @cdk.set qsar-descriptors
* @cdk.dictref qsar-descriptors:CPSA
*/
@TestClass("org.openscience.cdk.qsar.descriptors.molecular.CPSADescriptorTest")
public class CPSADescriptor implements IMolecularDescriptor {
private static final String[] names = {
"PPSA-1", "PPSA-2", "PPSA-3",
"PNSA-1", "PNSA-2", "PNSA-3",
"DPSA-1", "DPSA-2", "DPSA-3",
"FPSA-1", "FPSA-2", "FPSA-3",
"FNSA-1", "FNSA-2", "FNSA-3",
"WPSA-1", "WPSA-2", "WPSA-3",
"WNSA-1", "WNSA-2", "WNSA-3",
"RPCG", "RNCG", "RPCS", "RNCS",
"THSA", "TPSA", "RHSA", "RPSA"
};
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(CPSADescriptor.class);
public CPSADescriptor() {
}
@TestMethod("testGetSpecification")
public DescriptorSpecification getSpecification() {
return new DescriptorSpecification(
"http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#CPSA",
this.getClass().getName(),
"$Id$",
"The Chemistry Development Kit");
}
/**
* Sets the parameters attribute of the CPSADescriptor object.
*
* @param params The new parameters value
* @throws CDKException Description of the Exception
* @see #getParameters
*/
@TestMethod("testSetParameters_arrayObject")
public void setParameters(Object[] params) throws CDKException {
// no parameters for this descriptor
}
/**
* Gets the parameters attribute of the CPSADescriptor object.
*
* @return The parameters value
* @see #setParameters
*/
@TestMethod("testGetParameters")
public Object[] getParameters() {
// no parameters to return
return (null);
}
@TestMethod(value="testNamesConsistency")
public String[] getDescriptorNames() {
return names;
}
/**
* Gets the parameterNames attribute of the CPSADescriptor object.
*
* @return The parameterNames value
*/
@TestMethod("testGetParameterNames")
public String[] getParameterNames() {
// no param names to return
return (null);
}
/**
* Gets the parameterType attribute of the CPSADescriptor object.
*
* @param name Description of the Parameter
* @return The parameterType value
*/
@TestMethod("testGetParameterType_String")
public Object getParameterType(String name) {
return (null);
}
/**
* Evaluates the 29 CPSA descriptors using Gasteiger-Marsilli charges.
*
* @param atomContainer Parameter is the atom container.
* @return An ArrayList containing 29 elements in the order described above
*/
@TestMethod("testCalculate_IAtomContainer")
public DescriptorValue calculate(IAtomContainer atomContainer) {
DoubleArrayResult retval = new DoubleArrayResult();
if (!GeometryTools.has3DCoordinates(atomContainer)) {
for (int i = 0; i < 29; i++) retval.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames(), new CDKException("Molecule must have 3D coordinates"));
}
IAtomContainer container;
try {
container = (IAtomContainer) atomContainer.clone();
} catch (CloneNotSupportedException e) {
logger.debug("Error during clone");
for (int i = 0; i < 29; i++) retval.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames(), new CDKException("Error during clone"+e.getMessage()));
}
// IsotopeFactory factory = null;
// try {
// factory = IsotopeFactory.getInstance(container.getBuilder());
// } catch (Exception e) {
// logger.debug(e);
// }
GasteigerMarsiliPartialCharges peoe;
try {
peoe = new GasteigerMarsiliPartialCharges();
peoe.assignGasteigerMarsiliSigmaPartialCharges(container, true);
} catch (Exception e) {
logger.debug("Error in assigning Gasteiger-Marsilli charges");
for (int i = 0; i < 29; i++) retval.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames(), new CDKException("Error in getting G-M charges"));
}
NumericalSurface surface;
try {
surface = new NumericalSurface(container);
surface.calculateSurface();
} catch (NullPointerException npe) {
logger.debug("Error in surface area calculation");
for (int i = 0; i < 29; i++) retval.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames(),
new CDKException("Error in surface area calculation"));
}
//double molecularWeight = mfa.getMass();
double[] atomSurfaces = surface.getAllSurfaceAreas();
double totalSA = surface.getTotalSurfaceArea();
double ppsa1 = 0.0;
double ppsa3 = 0.0;
double pnsa1 = 0.0;
double pnsa3 = 0.0;
double totpcharge = 0.0;
double totncharge = 0.0;
for (int i = 0; i < container.getAtomCount(); i++) {
if (container.getAtom(i).getCharge() > 0) {
ppsa1 += atomSurfaces[i];
ppsa3 += container.getAtom(i).getCharge() * atomSurfaces[i];
totpcharge += container.getAtom(i).getCharge();
} else {
pnsa1 += atomSurfaces[i];
pnsa3 += container.getAtom(i).getCharge() * atomSurfaces[i];
totncharge += container.getAtom(i).getCharge();
}
}
double ppsa2 = ppsa1 * totpcharge;
double pnsa2 = pnsa1 * totncharge;
// fractional +ve & -ve SA
double fpsa1 = ppsa1 / totalSA;
double fpsa2 = ppsa2 / totalSA;
double fpsa3 = ppsa3 / totalSA;
double fnsa1 = pnsa1 / totalSA;
double fnsa2 = pnsa2 / totalSA;
double fnsa3 = pnsa3 / totalSA;
// surface wtd +ve & -ve SA
double wpsa1 = ppsa1 * totalSA / 1000;
double wpsa2 = ppsa2 * totalSA / 1000;
double wpsa3 = ppsa3 * totalSA / 1000;
double wnsa1 = pnsa1 * totalSA / 1000;
double wnsa2 = pnsa2 * totalSA / 1000;
double wnsa3 = pnsa3 * totalSA / 1000;
// hydrophobic and poalr surface area
double phobic = 0.0;
double polar = 0.0;
for (int i = 0; i < container.getAtomCount(); i++) {
if (Math.abs(container.getAtom(i).getCharge()) < 0.2) {
phobic += atomSurfaces[i];
} else {
polar += atomSurfaces[i];
}
}
double thsa = phobic;
double tpsa = polar;
double rhsa = phobic / totalSA;
double rpsa = polar / totalSA;
// differential +ve & -ve SA
double dpsa1 = ppsa1 - pnsa1;
double dpsa2 = ppsa2 - pnsa2;
double dpsa3 = ppsa3 - pnsa3;
double maxpcharge = 0.0;
double maxncharge = 0.0;
int pidx = 0;
int nidx = 0;
for (int i = 0; i < container.getAtomCount(); i++) {
double charge = container.getAtom(i).getCharge();
if (charge > maxpcharge) {
maxpcharge = charge;
pidx = i;
}
if (charge < maxncharge) {
maxncharge = charge;
nidx = i;
}
}
// relative descriptors
double rpcg = maxpcharge / totpcharge;
double rncg = maxncharge / totncharge;
double rpcs = atomSurfaces[pidx] * rpcg;
double rncs = atomSurfaces[nidx] * rncg;
// fill in the values
retval.add(ppsa1);
retval.add(ppsa2);
retval.add(ppsa3);
retval.add(pnsa1);
retval.add(pnsa2);
retval.add(pnsa3);
retval.add(dpsa1);
retval.add(dpsa2);
retval.add(dpsa3);
retval.add(fpsa1);
retval.add(fpsa2);
retval.add(fpsa3);
retval.add(fnsa1);
retval.add(fnsa2);
retval.add(fnsa3);
retval.add(wpsa1);
retval.add(wpsa2);
retval.add(wpsa3);
retval.add(wnsa1);
retval.add(wnsa2);
retval.add(wnsa3);
retval.add(rpcg);
retval.add(rncg);
retval.add(rpcs);
retval.add(rncs);
retval.add(thsa);
retval.add(tpsa);
retval.add(rhsa);
retval.add(rpsa);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames());
}
/**
* Returns the specific type of the DescriptorResult object.
* <p/>
* The return value from this method really indicates what type of result will
* be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result
* can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method
* allows you to do the same thing, without actually calculating the descriptor.
*
* @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating
* the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object
*/
@TestMethod("testGetDescriptorResultType")
public IDescriptorResult getDescriptorResultType() {
return new DoubleArrayResultType(29);
}
}