package org.openscience.cdk.similarity;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import javax.vecmath.Point3d;
import java.util.Iterator;
/**
* Fast similarity measure for 3D structures.
* <p/>
* This class implements a fast descriptor based 3D similarity measure described by Ballester et al
* ({@cdk.cite BALL2007}). The approach calculates the distances of each atom to four specific points: the
* centroid of the molecule, the atom that is closest to the centroid, the atom that is farthest from the
* centroid and the atom that is farthest from the previous atom. Thus we get 4 sets of distance distributions.
* The final descriptor set is generated by evaluating the first three moments of each distance distribution.
* <p/>
* The similarity between two molecules is then evaluated using the inverse of a normalized
* Manhattan type metric.
* <p/>
* This class allows you to evaluate the 3D similarity between two specified molecules as well as
* generate the 12 descriptors used to characterize the 3D structure which can then be used for a
* variety of purposes such as storing in a database.
*
* <b>Note</b>: The methods of this class do not perform hydrogen removal. If you want to
* do the calculations excluding hydrogens, you'll need to do it yourself. Also, if the molecule has
* disconnected components, you should consider one (usually the largest), otherwise all components
* are considered in the calculation.
*
* @author Rajarshi Guha
* @cdk.created 2007-03-11
* @cdk.keyword similarity, 3D, manhattan
* @cdk.githash
* @cdk.module fingerprint
*/
@TestClass("org.openscience.cdk.similarity.DistanceMomentTest")
public class DistanceMoment {
private static Point3d getGeometricCenter(IAtomContainer atomContainer) throws CDKException {
double x = 0;
double y = 0;
double z = 0;
for (IAtom atom : atomContainer.atoms()) {
Point3d p = atom.getPoint3d();
if (p == null) throw new CDKException("Molecule must have 3D coordinates");
x += p.x;
y += p.y;
z += p.z;
}
x /= atomContainer.getAtomCount();
y /= atomContainer.getAtomCount();
z /= atomContainer.getAtomCount();
return new Point3d(x, y, z);
}
private static float mu1(double[] x) {
float sum = 0;
for (double aX : x) {
sum += aX;
}
return sum / x.length;
}
private static float mu2(double[] x, double mean) {
float sum = 0;
for (double aX : x) {
sum += (aX - mean) * (aX - mean);
}
return sum / (x.length - 1);
}
private static float mu3(double[] x, double mean, double sigma) {
float sum = 0;
for (double aX : x) {
sum += ((aX - mean) / sigma) * ((aX - mean) / sigma) * ((aX - mean) / sigma);
}
return sum / x.length;
}
/**
* Evaluate the 12 descriptors used to characterize the 3D shape of a molecule.
*
* @param atomContainer The molecule to consider, should have 3D coordinates
* @return A 12 element array containing the descriptors.
* @throws CDKException if there are no 3D coordinates
*/
public static float[] generateMoments(IAtomContainer atomContainer) throws CDKException {
// lets check if we have 3D coordinates
Iterator<IAtom> atoms;
int natom = atomContainer.getAtomCount();
Point3d ctd = getGeometricCenter(atomContainer);
Point3d cst = new Point3d();
Point3d fct = new Point3d();
Point3d ftf = new Point3d();
double[] distCtd = new double[natom];
double[] distCst = new double[natom];
double[] distFct = new double[natom];
double[] distFtf = new double[natom];
atoms = atomContainer.atoms().iterator();
int counter = 0;
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
// eval dist to centroid
while (atoms.hasNext()) {
IAtom atom = atoms.next();
Point3d p = atom.getPoint3d();
double d = p.distance(ctd);
distCtd[counter++] = d;
if (d < min) {
cst.x = p.x;
cst.y = p.y;
cst.z = p.z;
min = d;
}
if (d > max) {
fct.x = p.x;
fct.y = p.y;
fct.z = p.z;
max = d;
}
}
// eval dist to cst
atoms = atomContainer.atoms().iterator();
counter = 0;
while (atoms.hasNext()) {
IAtom atom = atoms.next();
Point3d p = atom.getPoint3d();
double d = p.distance(cst);
distCst[counter++] = d;
}
// eval dist to fct
atoms = atomContainer.atoms().iterator();
counter = 0;
max = Double.MIN_VALUE;
while (atoms.hasNext()) {
IAtom atom = atoms.next();
Point3d p = atom.getPoint3d();
double d = p.distance(fct);
distFct[counter++] = d;
if (d > max) {
ftf.x = p.x;
ftf.y = p.y;
ftf.z = p.z;
max = d;
}
}
// eval dist to ftf
atoms = atomContainer.atoms().iterator();
counter = 0;
while (atoms.hasNext()) {
IAtom atom = atoms.next();
Point3d p = atom.getPoint3d();
double d = p.distance(ftf);
distFtf[counter++] = d;
}
float[] moments = new float[12];
float mean = mu1(distCtd);
float sigma2 = mu2(distCtd, mean);
float skewness = mu3(distCtd, mean, Math.sqrt(sigma2));
moments[0] = mean;
moments[1] = sigma2;
moments[2] = skewness;
mean = mu1(distCst);
sigma2 = mu2(distCst, mean);
skewness = mu3(distCst, mean, Math.sqrt(sigma2));
moments[3] = mean;
moments[4] = sigma2;
moments[5] = skewness;
mean = mu1(distFct);
sigma2 = mu2(distFct, mean);
skewness = mu3(distFct, mean, Math.sqrt(sigma2));
moments[6] = mean;
moments[7] = sigma2;
moments[8] = skewness;
mean = mu1(distFtf);
sigma2 = mu2(distFtf, mean);
skewness = mu3(distFtf, mean, Math.sqrt(sigma2));
moments[9] = mean;
moments[10] = sigma2;
moments[11] = skewness;
return moments;
}
/**
* Evaluate the 3D similarity between two molecules.
*
* The method does not remove hydrogens. If this is required, remove them from the
* molecules before passing them here.
*
* @param query The query molecule
* @param target The target molecule
* @return The similarity between the two molecules (ranging from 0 to 1)
* @throws CDKException if either molecule does not have 3D coordinates
*/
@TestMethod("test3DSim1,test3DSim2")
public static float calculate(IAtomContainer query, IAtomContainer target) throws CDKException {
float[] mom1 = generateMoments(query);
float[] mom2 = generateMoments(target);
float sum = 0;
for (int i = 0; i < mom1.length; i++) {
sum += Math.abs(mom1[i] - mom2[i]);
}
return (float) (1.0 / (1.0 + sum/12.0));
}
}