001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.asa;
022
023import org.biojava.nbio.structure.*;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import javax.vecmath.Point3d;
028import java.util.ArrayList;
029import java.util.TreeMap;
030import java.util.concurrent.ExecutorService;
031import java.util.concurrent.Executors;
032
033
034
035
036/**
037 * Class to calculate Accessible Surface Areas based on
038 * the rolling ball algorithm by Shrake and Rupley.
039 *
040 * The code is adapted from a python implementation at http://boscoh.com/protein/asapy
041 * (now source is available at https://github.com/boscoh/asa).
042 * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog.
043 *
044 * See
045 * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms.
046 * Lysozyme and Insulin." JMB (1973) 79:351-371.
047 * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of
048 * Static Accessibility" JMB (1971) 55:379-400
049 * @author duarte_j
050 *
051 */
052public class AsaCalculator {
053
054        private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class);
055
056        // Bosco uses as default 960, Shrake and Rupley seem to use in their paper 92 (not sure if this is actually the same parameter)
057        public static final int DEFAULT_N_SPHERE_POINTS = 960;
058        public static final double DEFAULT_PROBE_SIZE = 1.4;
059        public static final int DEFAULT_NTHREADS = 1;
060
061
062
063        // Chothia's amino acid atoms vdw radii
064        public static final double TRIGONAL_CARBON_VDW = 1.76;
065        public static final double TETRAHEDRAL_CARBON_VDW = 1.87;
066        public static final double TRIGONAL_NITROGEN_VDW = 1.65;
067        public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50;
068        public static final double SULFUR_VDW = 1.85;
069        public static final double OXIGEN_VDW = 1.40;
070
071        // Chothia's nucleotide atoms vdw radii
072        public static final double NUC_CARBON_VDW = 1.80;
073        public static final double NUC_NITROGEN_VDW = 1.60;
074        public static final double PHOSPHOROUS_VDW = 1.90;
075
076
077
078
079
080        private class AsaCalcWorker implements Runnable {
081
082                private int i;
083                private double[] asas;
084
085                public AsaCalcWorker(int i, double[] asas) {
086                        this.i = i;
087                        this.asas = asas;
088                }
089
090                @Override
091                public void run() {
092                        asas[i] = calcSingleAsa(i);
093                }
094        }
095
096
097        private Point3d[] atomCoords;
098        private Atom[] atoms;
099        private double[] radii;
100        private double probe;
101        private int nThreads;
102        private Point3d[] spherePoints;
103        private double cons;
104
105        /**
106         * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()}
107         * or {@link #getGroupAsas()} to calculate the ASAs
108         * Only non-Hydrogen atoms are considered in the calculation.
109         * @param structure
110         * @param probe
111         * @param nSpherePoints
112         * @param nThreads
113         * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to
114         * NACCESS' -h option
115         * @see StructureTools.getAllNonHAtomArray
116         */
117        public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) {
118                this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms);
119                this.atomCoords = Calc.atomsToPoints(atoms);
120                this.probe = probe;
121                this.nThreads = nThreads;
122
123                // initialising the radii by looking them up through AtomRadii
124                radii = new double[atomCoords.length];
125                for (int i=0;i<atomCoords.length;i++) {
126                        radii[i] = getRadius(atoms[i]);
127                }
128
129                // initialising the sphere points to sample
130                spherePoints = generateSpherePoints(nSpherePoints);
131
132                cons = 4.0 * Math.PI / nSpherePoints;
133        }
134
135        /**
136         * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()}
137         * or {@link #getGroupAsas()} to calculate the ASAs.
138         * @param atoms an array of atoms not containing Hydrogen atoms
139         * @param probe the probe size
140         * @param nSpherePoints the number of points to be used in generating the spherical
141         * dot-density, the more points the more accurate (and slower) calculation
142         * @param nThreads the number of parallel threads to use for the calculation
143         * @throws IllegalArgumentException if any atom in the array is a Hydrogen atom
144         */
145        public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads) {
146                this.atoms = atoms;
147                this.atomCoords = Calc.atomsToPoints(atoms);
148                this.probe = probe;
149                this.nThreads = nThreads;
150
151                for (Atom atom:atoms) {
152                        if (atom.getElement()==Element.H)
153                                throw new IllegalArgumentException("Can't calculate ASA for an array that contains Hydrogen atoms ");
154                }
155
156                // initialising the radii by looking them up through AtomRadii
157                radii = new double[atoms.length];
158                for (int i=0;i<atoms.length;i++) {
159                        radii[i] = getRadius(atoms[i]);
160                }
161
162                // initialising the sphere points to sample
163                spherePoints = generateSpherePoints(nSpherePoints);
164
165                cons = 4.0 * Math.PI / nSpherePoints;
166        }
167
168        /**
169         * Constructs a new AsaCalculator. Subsequently call {@link #calcSingleAsa(int)} 
170         * to calculate the atom ASAs. The given radius parameter will be taken as the radius for
171         * all points given. No ASA calculation per group will be possible with this constructor, so
172         * usage of {@link #getGroupAsas()} will result in a NullPointerException.
173         * @param atomCoords
174         *                              the coordinates representing the center of atoms
175         * @param probe 
176         *                              the probe size
177         * @param nSpherePoints 
178         *                              the number of points to be used in generating the spherical
179         *                              dot-density, the more points the more accurate (and slower) calculation
180         * @param nThreads 
181         *                              the number of parallel threads to use for the calculation
182         * @param radius 
183         *                              the radius that will be assign to all given coordinates
184         */
185        public AsaCalculator(Point3d[] atomCoords, double probe, int nSpherePoints, int nThreads, double radius) {
186                this.atoms = null;
187                this.atomCoords = atomCoords;
188                this.probe = probe;
189                this.nThreads = nThreads;
190
191                // initialising the radii to the given radius for all atoms
192                radii = new double[atomCoords.length];
193                for (int i=0;i<atomCoords.length;i++) {
194                        radii[i] = radius;
195                }
196
197                // initialising the sphere points to sample
198                spherePoints = generateSpherePoints(nSpherePoints);
199
200                cons = 4.0 * Math.PI / nSpherePoints;
201        }
202
203        /**
204         * Calculates ASA for all atoms and return them as a GroupAsa
205         * array (one element per residue in structure) containing ASAs per residue
206         * and per atom.
207         * The sorting of Groups in returned array is as specified by {@link org.biojava.nbio.structure.ResidueNumber}
208         * @return
209         */
210        public GroupAsa[] getGroupAsas() {
211
212                TreeMap<ResidueNumber, GroupAsa> asas = new TreeMap<ResidueNumber, GroupAsa>();
213
214                double[] asasPerAtom = calculateAsas();
215
216                for (int i=0;i<atomCoords.length;i++) {
217                        Group g = atoms[i].getGroup();
218                        if (!asas.containsKey(g.getResidueNumber())) {
219                                GroupAsa groupAsa = new GroupAsa(g);
220                                groupAsa.addAtomAsaU(asasPerAtom[i]);
221                                asas.put(g.getResidueNumber(), groupAsa);
222                        } else {
223                                GroupAsa groupAsa = asas.get(g.getResidueNumber());
224                                groupAsa.addAtomAsaU(asasPerAtom[i]);
225                        }
226                }
227
228                return asas.values().toArray(new GroupAsa[asas.size()]);
229        }
230
231        /**
232         * Calculates the Accessible Surface Areas for the atoms given in constructor and with parameters given.
233         * Beware that the parallel implementation is quite memory hungry. It scales well as long as there is
234         * enough memory available.
235         * @return an array with asa values corresponding to each atom of the input array
236         */
237        public double[] calculateAsas() {
238
239                double[] asas = new double[atomCoords.length];
240
241                if (nThreads<=1) { // (i.e. it will also be 1 thread if 0 or negative number specified)
242                        for (int i=0;i<atomCoords.length;i++) {
243                                asas[i] = calcSingleAsa(i);
244                        }
245
246                } else {
247                        // NOTE the multithreaded calculation does not scale up well in some systems,
248                        // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help
249
250                        // Following scaling tests are for 3hbx, calculating ASA of full asym unit (6 chains):
251
252                        // SCALING test done in merlinl01 (12 cores, Xeon X5670  @ 2.93GHz, 24GB RAM)
253                        //1 threads, time:  8.8s -- x1.0
254                        //2 threads, time:  4.4s -- x2.0
255                        //3 threads, time:  2.9s -- x3.0
256                        //4 threads, time:  2.2s -- x3.9
257                        //5 threads, time:  1.8s -- x4.9
258                        //6 threads, time:  1.6s -- x5.5
259                        //7 threads, time:  1.4s -- x6.5
260                        //8 threads, time:  1.3s -- x6.9
261
262                        // SCALING test done in pc8201 (4 cores, Core2 Quad Q9550  @ 2.83GHz, 8GB RAM)
263                        //1 threads, time: 17.2s -- x1.0
264                        //2 threads, time:  9.7s -- x1.8
265                        //3 threads, time:  7.7s -- x2.2
266                        //4 threads, time:  7.9s -- x2.2
267
268                        // SCALING test done in eppic01 (16 cores, Xeon E5-2650 0  @ 2.00GHz, 128GB RAM)
269                        //1 threads, time: 10.7s -- x1.0
270                        //2 threads, time:  5.6s -- x1.9
271                        //3 threads, time:  3.6s -- x3.0
272                        //4 threads, time:  2.8s -- x3.9
273                        //5 threads, time:  2.3s -- x4.8
274                        //6 threads, time:  1.8s -- x6.0
275                        //7 threads, time:  1.6s -- x6.8
276                        //8 threads, time:  1.3s -- x8.0
277                        //9 threads, time:  1.3s -- x8.5
278                        //10 threads, time:  1.1s -- x10.0
279                        //11 threads, time:  1.0s -- x10.9
280                        //12 threads, time:  0.9s -- x11.4
281
282
283
284                        ExecutorService threadPool = Executors.newFixedThreadPool(nThreads);
285
286
287                        for (int i=0;i<atomCoords.length;i++) {
288                                threadPool.submit(new AsaCalcWorker(i,asas));
289                        }
290
291                        threadPool.shutdown();
292
293                        while (!threadPool.isTerminated());
294
295                }
296
297                return asas;
298        }
299
300        /**
301         * Returns list of 3d coordinates of points on a sphere using the
302         * Golden Section Spiral algorithm.
303         * @param nSpherePoints the number of points to be used in generating the spherical dot-density
304         * @return
305         */
306        private Point3d[] generateSpherePoints(int nSpherePoints) {
307                Point3d[] points = new Point3d[nSpherePoints];
308                double inc = Math.PI * (3.0 - Math.sqrt(5.0));
309                double offset = 2.0 / nSpherePoints;
310                for (int k=0;k<nSpherePoints;k++) {
311                        double y = k * offset - 1.0 + (offset / 2.0);
312                        double r = Math.sqrt(1.0 - y*y);
313                        double phi = k * inc;
314                        points[k] = new Point3d(Math.cos(phi)*r, y, Math.sin(phi)*r);
315                }
316                return points;
317        }
318
319        /**
320         * Returns list of indices of atoms within probe distance to atom k.
321         * @param k index of atom for which we want neighbor indices
322         */
323        private ArrayList<Integer> findNeighborIndices(int k) {
324                // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
325                // Thus 40 seems to be a good compromise for the starting capacity
326                ArrayList<Integer> neighbor_indices = new ArrayList<Integer>(40);
327
328                double radius = radii[k] + probe + probe;
329
330                for (int i=0;i<atomCoords.length;i++) {
331                        if (i==k) continue;
332
333                        double dist = 0;
334
335                        dist = atomCoords[i].distance(atomCoords[k]);
336
337                        if (dist < radius + radii[i]) {
338                                neighbor_indices.add(i);
339                        }
340
341                }
342
343                return neighbor_indices;
344        }
345
346        private double calcSingleAsa(int i) {
347                Point3d atom_i = atomCoords[i];
348                ArrayList<Integer> neighbor_indices = findNeighborIndices(i);
349                int n_neighbor = neighbor_indices.size();
350                int j_closest_neighbor = 0;
351                double radius = probe + radii[i];
352
353                int n_accessible_point = 0;
354
355                for (Point3d point: spherePoints){
356                        boolean is_accessible = true;
357                        Point3d test_point = new Point3d(point.x*radius + atom_i.x,
358                                        point.y*radius + atom_i.y,
359                                        point.z*radius + atom_i.z);
360
361                        int[] cycled_indices = new int[n_neighbor];
362                        int arind = 0;
363                        for (int ind=j_closest_neighbor;ind<n_neighbor;ind++) {
364                                cycled_indices[arind] = ind;
365                                arind++;
366                        }
367                        for (int ind=0;ind<j_closest_neighbor;ind++){
368                                cycled_indices[arind] = ind;
369                                arind++;
370                        }
371
372                        for (int j: cycled_indices) {
373                                Point3d atom_j = atomCoords[neighbor_indices.get(j)];
374                                double r = radii[neighbor_indices.get(j)] + probe;
375                                double diff_sq = test_point.distanceSquared(atom_j);
376                                if (diff_sq < r*r) {
377                                        j_closest_neighbor = j;
378                                        is_accessible = false;
379                                        break;
380                                }
381                        }
382                        if (is_accessible) {
383                                n_accessible_point++;
384                        }
385                }
386                return cons*n_accessible_point*radius*radius;
387        }
388
389        /**
390         * Gets the radius for given amino acid and atom
391         * @param aa
392         * @param atom
393         * @return
394         */
395        private static double getRadiusForAmino(AminoAcid amino, Atom atom) {
396
397                if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius();
398                // some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids
399                if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius();
400
401                String atomCode = atom.getName();
402                char aa = amino.getAminoType();
403
404                // here we use the values that Chothia gives in his paper (as NACCESS does)
405                if (atom.getElement()==Element.O) {
406                        return OXIGEN_VDW;
407                }
408                else if (atom.getElement()==Element.S) {
409                        return SULFUR_VDW;
410                }
411                else if (atom.getElement()==Element.N) {
412                        if (atomCode.equals("NZ")) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen
413                        return TRIGONAL_NITROGEN_VDW;                                                           // trigonal Nitrogen
414                }
415                else if (atom.getElement()==Element.C) { // it must be a carbon
416                        if (atomCode.equals("C") ||
417                                        atomCode.equals("CE1") || atomCode.equals("CE2") || atomCode.equals("CE3") ||
418                                        atomCode.equals("CH2") ||
419                                        atomCode.equals("CZ") || atomCode.equals("CZ2") || atomCode.equals("CZ3")) {
420                                return TRIGONAL_CARBON_VDW;                                                     // trigonal Carbon
421                        }
422                        else if (atomCode.equals("CA") || atomCode.equals("CB") ||
423                                        atomCode.equals("CE") ||
424                                        atomCode.equals("CG1") || atomCode.equals("CG2")) {
425                                return TETRAHEDRAL_CARBON_VDW;                                                  // tetrahedral Carbon
426                        }
427                        // the rest of the cases (CD, CD1, CD2, CG) depend on amino acid
428                        else {
429                                switch (aa) {
430                                case 'F':
431                                case 'W':
432                                case 'Y':
433                                case 'H':
434                                case 'D':
435                                case 'N':
436                                        return TRIGONAL_CARBON_VDW;
437
438                                case 'P':
439                                case 'K':
440                                case 'R':
441                                case 'M':
442                                case 'I':
443                                case 'L':
444                                        return TETRAHEDRAL_CARBON_VDW;
445
446                                case 'Q':
447                                case 'E':
448                                        if (atomCode.equals("CD")) return TRIGONAL_CARBON_VDW;
449                                        else if (atomCode.equals("CG")) return TETRAHEDRAL_CARBON_VDW;
450
451                                default:
452                                        logger.info("Unexpected carbon atom "+atomCode+" for aminoacid "+aa+", assigning its standard vdw radius");
453                                        return Element.C.getVDWRadius();
454                                }
455                        }
456
457                        // not any of the expected atoms
458                } else {
459                        // non standard aas, (e.g. MSE, LLP) will always have this problem,
460                        logger.info("Unexpected atom "+atomCode+" for aminoacid "+aa+ " ("+amino.getPDBName()+"), assigning its standard vdw radius");
461                        
462
463                        return atom.getElement().getVDWRadius();
464                }
465        }
466
467
468        /**
469         * Gets the radius for given nucleotide atom
470         * @param atom
471         * @return
472         */
473        private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) {
474
475                if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius();
476                if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius();
477
478                if (atom.getElement()==Element.C) return NUC_CARBON_VDW;
479
480                if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW;
481
482                if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW;
483
484                if (atom.getElement()==Element.O) return OXIGEN_VDW;
485
486                logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius");
487                return atom.getElement().getVDWRadius();
488        }
489
490
491        /**
492         * Gets the van der Waals radius of the given atom following the values defined by
493         * Chothia (1976) J.Mol.Biol.105,1-14
494         * NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates"
495         * slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used
496         * in a structure that contains Hydrogens!
497         *
498         * If atom is neither part of a nucleotide nor of a standard aminoacid,
499         * the default vdw radius for the element is returned. If atom is of
500         * unknown type (element) the vdw radius of {@link #Element().N} is returned
501         *
502         * @param atom
503         * @return
504         */
505        public static double getRadius(Atom atom) {
506
507                if (atom.getElement()==null) {
508                        logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+
509                                        ", assigning the default vdw radius (Nitrogen vdw radius).");
510                        return Element.N.getVDWRadius();
511                }
512
513                Group res = atom.getGroup();
514
515                if (res==null) {
516                        logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+
517                                        atom.getPDBserial()+", assigning its default vdw radius");
518                        return atom.getElement().getVDWRadius();
519                }
520
521                GroupType type = res.getType();
522
523                if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom);
524
525                if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom);
526
527
528                return atom.getElement().getVDWRadius();
529        }
530
531}