/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.nbio.structure.asa; import org.biojava.nbio.structure.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.vecmath.Point3d; import java.util.ArrayList; import java.util.TreeMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; /** * Class to calculate Accessible Surface Areas based on * the rolling ball algorithm by Shrake and Rupley. * * The code is adapted from a python implementation at http://boscoh.com/protein/asapy * (now source is available at https://github.com/boscoh/asa). * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog. * * See * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms. * Lysozyme and Insulin." JMB (1973) 79:351-371. * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of * Static Accessibility" JMB (1971) 55:379-400 * @author duarte_j * */ public class AsaCalculator { private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class); // Bosco uses as default 960, Shrake and Rupley seem to use in their paper 92 (not sure if this is actually the same parameter) public static final int DEFAULT_N_SPHERE_POINTS = 960; public static final double DEFAULT_PROBE_SIZE = 1.4; public static final int DEFAULT_NTHREADS = 1; // Chothia's amino acid atoms vdw radii public static final double TRIGONAL_CARBON_VDW = 1.76; public static final double TETRAHEDRAL_CARBON_VDW = 1.87; public static final double TRIGONAL_NITROGEN_VDW = 1.65; public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50; public static final double SULFUR_VDW = 1.85; public static final double OXIGEN_VDW = 1.40; // Chothia's nucleotide atoms vdw radii public static final double NUC_CARBON_VDW = 1.80; public static final double NUC_NITROGEN_VDW = 1.60; public static final double PHOSPHOROUS_VDW = 1.90; private class AsaCalcWorker implements Runnable { private int i; private double[] asas; public AsaCalcWorker(int i, double[] asas) { this.i = i; this.asas = asas; } @Override public void run() { asas[i] = calcSingleAsa(i); } } private Point3d[] atomCoords; private Atom[] atoms; private double[] radii; private double probe; private int nThreads; private Point3d[] spherePoints; private double cons; /** * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} * or {@link #getGroupAsas()} to calculate the ASAs * Only non-Hydrogen atoms are considered in the calculation. * @param structure * @param probe * @param nSpherePoints * @param nThreads * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to * NACCESS' -h option * @see StructureTools.getAllNonHAtomArray */ public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) { this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms); this.atomCoords = Calc.atomsToPoints(atoms); this.probe = probe; this.nThreads = nThreads; // initialising the radii by looking them up through AtomRadii radii = new double[atomCoords.length]; for (int i=0;i<atomCoords.length;i++) { radii[i] = getRadius(atoms[i]); } // initialising the sphere points to sample spherePoints = generateSpherePoints(nSpherePoints); cons = 4.0 * Math.PI / nSpherePoints; } /** * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} * or {@link #getGroupAsas()} to calculate the ASAs. * @param atoms an array of atoms not containing Hydrogen atoms * @param probe the probe size * @param nSpherePoints the number of points to be used in generating the spherical * dot-density, the more points the more accurate (and slower) calculation * @param nThreads the number of parallel threads to use for the calculation * @throws IllegalArgumentException if any atom in the array is a Hydrogen atom */ public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads) { this.atoms = atoms; this.atomCoords = Calc.atomsToPoints(atoms); this.probe = probe; this.nThreads = nThreads; for (Atom atom:atoms) { if (atom.getElement()==Element.H) throw new IllegalArgumentException("Can't calculate ASA for an array that contains Hydrogen atoms "); } // initialising the radii by looking them up through AtomRadii radii = new double[atoms.length]; for (int i=0;i<atoms.length;i++) { radii[i] = getRadius(atoms[i]); } // initialising the sphere points to sample spherePoints = generateSpherePoints(nSpherePoints); cons = 4.0 * Math.PI / nSpherePoints; } /** * Constructs a new AsaCalculator. Subsequently call {@link #calcSingleAsa(int)} * to calculate the atom ASAs. The given radius parameter will be taken as the radius for * all points given. No ASA calculation per group will be possible with this constructor, so * usage of {@link #getGroupAsas()} will result in a NullPointerException. * @param atomCoords * the coordinates representing the center of atoms * @param probe * the probe size * @param nSpherePoints * the number of points to be used in generating the spherical * dot-density, the more points the more accurate (and slower) calculation * @param nThreads * the number of parallel threads to use for the calculation * @param radius * the radius that will be assign to all given coordinates */ public AsaCalculator(Point3d[] atomCoords, double probe, int nSpherePoints, int nThreads, double radius) { this.atoms = null; this.atomCoords = atomCoords; this.probe = probe; this.nThreads = nThreads; // initialising the radii to the given radius for all atoms radii = new double[atomCoords.length]; for (int i=0;i<atomCoords.length;i++) { radii[i] = radius; } // initialising the sphere points to sample spherePoints = generateSpherePoints(nSpherePoints); cons = 4.0 * Math.PI / nSpherePoints; } /** * Calculates ASA for all atoms and return them as a GroupAsa * array (one element per residue in structure) containing ASAs per residue * and per atom. * The sorting of Groups in returned array is as specified by {@link org.biojava.nbio.structure.ResidueNumber} * @return */ public GroupAsa[] getGroupAsas() { TreeMap<ResidueNumber, GroupAsa> asas = new TreeMap<ResidueNumber, GroupAsa>(); double[] asasPerAtom = calculateAsas(); for (int i=0;i<atomCoords.length;i++) { Group g = atoms[i].getGroup(); if (!asas.containsKey(g.getResidueNumber())) { GroupAsa groupAsa = new GroupAsa(g); groupAsa.addAtomAsaU(asasPerAtom[i]); asas.put(g.getResidueNumber(), groupAsa); } else { GroupAsa groupAsa = asas.get(g.getResidueNumber()); groupAsa.addAtomAsaU(asasPerAtom[i]); } } return asas.values().toArray(new GroupAsa[asas.size()]); } /** * Calculates the Accessible Surface Areas for the atoms given in constructor and with parameters given. * Beware that the parallel implementation is quite memory hungry. It scales well as long as there is * enough memory available. * @return an array with asa values corresponding to each atom of the input array */ public double[] calculateAsas() { double[] asas = new double[atomCoords.length]; if (nThreads<=1) { // (i.e. it will also be 1 thread if 0 or negative number specified) for (int i=0;i<atomCoords.length;i++) { asas[i] = calcSingleAsa(i); } } else { // NOTE the multithreaded calculation does not scale up well in some systems, // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help // Following scaling tests are for 3hbx, calculating ASA of full asym unit (6 chains): // SCALING test done in merlinl01 (12 cores, Xeon X5670 @ 2.93GHz, 24GB RAM) //1 threads, time: 8.8s -- x1.0 //2 threads, time: 4.4s -- x2.0 //3 threads, time: 2.9s -- x3.0 //4 threads, time: 2.2s -- x3.9 //5 threads, time: 1.8s -- x4.9 //6 threads, time: 1.6s -- x5.5 //7 threads, time: 1.4s -- x6.5 //8 threads, time: 1.3s -- x6.9 // SCALING test done in pc8201 (4 cores, Core2 Quad Q9550 @ 2.83GHz, 8GB RAM) //1 threads, time: 17.2s -- x1.0 //2 threads, time: 9.7s -- x1.8 //3 threads, time: 7.7s -- x2.2 //4 threads, time: 7.9s -- x2.2 // SCALING test done in eppic01 (16 cores, Xeon E5-2650 0 @ 2.00GHz, 128GB RAM) //1 threads, time: 10.7s -- x1.0 //2 threads, time: 5.6s -- x1.9 //3 threads, time: 3.6s -- x3.0 //4 threads, time: 2.8s -- x3.9 //5 threads, time: 2.3s -- x4.8 //6 threads, time: 1.8s -- x6.0 //7 threads, time: 1.6s -- x6.8 //8 threads, time: 1.3s -- x8.0 //9 threads, time: 1.3s -- x8.5 //10 threads, time: 1.1s -- x10.0 //11 threads, time: 1.0s -- x10.9 //12 threads, time: 0.9s -- x11.4 ExecutorService threadPool = Executors.newFixedThreadPool(nThreads); for (int i=0;i<atomCoords.length;i++) { threadPool.submit(new AsaCalcWorker(i,asas)); } threadPool.shutdown(); while (!threadPool.isTerminated()); } return asas; } /** * Returns list of 3d coordinates of points on a sphere using the * Golden Section Spiral algorithm. * @param nSpherePoints the number of points to be used in generating the spherical dot-density * @return */ private Point3d[] generateSpherePoints(int nSpherePoints) { Point3d[] points = new Point3d[nSpherePoints]; double inc = Math.PI * (3.0 - Math.sqrt(5.0)); double offset = 2.0 / nSpherePoints; for (int k=0;k<nSpherePoints;k++) { double y = k * offset - 1.0 + (offset / 2.0); double r = Math.sqrt(1.0 - y*y); double phi = k * inc; points[k] = new Point3d(Math.cos(phi)*r, y, Math.sin(phi)*r); } return points; } /** * Returns list of indices of atoms within probe distance to atom k. * @param k index of atom for which we want neighbor indices */ private ArrayList<Integer> findNeighborIndices(int k) { // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30 // Thus 40 seems to be a good compromise for the starting capacity ArrayList<Integer> neighbor_indices = new ArrayList<Integer>(40); double radius = radii[k] + probe + probe; for (int i=0;i<atomCoords.length;i++) { if (i==k) continue; double dist = 0; dist = atomCoords[i].distance(atomCoords[k]); if (dist < radius + radii[i]) { neighbor_indices.add(i); } } return neighbor_indices; } private double calcSingleAsa(int i) { Point3d atom_i = atomCoords[i]; ArrayList<Integer> neighbor_indices = findNeighborIndices(i); int n_neighbor = neighbor_indices.size(); int j_closest_neighbor = 0; double radius = probe + radii[i]; int n_accessible_point = 0; for (Point3d point: spherePoints){ boolean is_accessible = true; Point3d test_point = new Point3d(point.x*radius + atom_i.x, point.y*radius + atom_i.y, point.z*radius + atom_i.z); int[] cycled_indices = new int[n_neighbor]; int arind = 0; for (int ind=j_closest_neighbor;ind<n_neighbor;ind++) { cycled_indices[arind] = ind; arind++; } for (int ind=0;ind<j_closest_neighbor;ind++){ cycled_indices[arind] = ind; arind++; } for (int j: cycled_indices) { Point3d atom_j = atomCoords[neighbor_indices.get(j)]; double r = radii[neighbor_indices.get(j)] + probe; double diff_sq = test_point.distanceSquared(atom_j); if (diff_sq < r*r) { j_closest_neighbor = j; is_accessible = false; break; } } if (is_accessible) { n_accessible_point++; } } return cons*n_accessible_point*radius*radius; } /** * Gets the radius for given amino acid and atom * @param aa * @param atom * @return */ private static double getRadiusForAmino(AminoAcid amino, Atom atom) { if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); // some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); String atomCode = atom.getName(); char aa = amino.getAminoType(); // here we use the values that Chothia gives in his paper (as NACCESS does) if (atom.getElement()==Element.O) { return OXIGEN_VDW; } else if (atom.getElement()==Element.S) { return SULFUR_VDW; } else if (atom.getElement()==Element.N) { if (atomCode.equals("NZ")) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen return TRIGONAL_NITROGEN_VDW; // trigonal Nitrogen } else if (atom.getElement()==Element.C) { // it must be a carbon if (atomCode.equals("C") || atomCode.equals("CE1") || atomCode.equals("CE2") || atomCode.equals("CE3") || atomCode.equals("CH2") || atomCode.equals("CZ") || atomCode.equals("CZ2") || atomCode.equals("CZ3")) { return TRIGONAL_CARBON_VDW; // trigonal Carbon } else if (atomCode.equals("CA") || atomCode.equals("CB") || atomCode.equals("CE") || atomCode.equals("CG1") || atomCode.equals("CG2")) { return TETRAHEDRAL_CARBON_VDW; // tetrahedral Carbon } // the rest of the cases (CD, CD1, CD2, CG) depend on amino acid else { switch (aa) { case 'F': case 'W': case 'Y': case 'H': case 'D': case 'N': return TRIGONAL_CARBON_VDW; case 'P': case 'K': case 'R': case 'M': case 'I': case 'L': return TETRAHEDRAL_CARBON_VDW; case 'Q': case 'E': if (atomCode.equals("CD")) return TRIGONAL_CARBON_VDW; else if (atomCode.equals("CG")) return TETRAHEDRAL_CARBON_VDW; default: logger.info("Unexpected carbon atom "+atomCode+" for aminoacid "+aa+", assigning its standard vdw radius"); return Element.C.getVDWRadius(); } } // not any of the expected atoms } else { // non standard aas, (e.g. MSE, LLP) will always have this problem, logger.info("Unexpected atom "+atomCode+" for aminoacid "+aa+ " ("+amino.getPDBName()+"), assigning its standard vdw radius"); return atom.getElement().getVDWRadius(); } } /** * Gets the radius for given nucleotide atom * @param atom * @return */ private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) { if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); if (atom.getElement()==Element.C) return NUC_CARBON_VDW; if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW; if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW; if (atom.getElement()==Element.O) return OXIGEN_VDW; logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius"); return atom.getElement().getVDWRadius(); } /** * Gets the van der Waals radius of the given atom following the values defined by * Chothia (1976) J.Mol.Biol.105,1-14 * NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates" * slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used * in a structure that contains Hydrogens! * * If atom is neither part of a nucleotide nor of a standard aminoacid, * the default vdw radius for the element is returned. If atom is of * unknown type (element) the vdw radius of {@link #Element().N} is returned * * @param atom * @return */ public static double getRadius(Atom atom) { if (atom.getElement()==null) { logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+ ", assigning the default vdw radius (Nitrogen vdw radius)."); return Element.N.getVDWRadius(); } Group res = atom.getGroup(); if (res==null) { logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+ atom.getPDBserial()+", assigning its default vdw radius"); return atom.getElement().getVDWRadius(); } GroupType type = res.getType(); if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom); if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom); return atom.getElement().getVDWRadius(); } }