001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.asa;
022
023import org.biojava.nbio.structure.*;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import javax.vecmath.Point3d;
028import java.util.ArrayList;
029import java.util.TreeMap;
030import java.util.concurrent.ExecutorService;
031import java.util.concurrent.Executors;
032
033
034
035
036/**
037 * Class to calculate Accessible Surface Areas based on
038 * the rolling ball algorithm by Shrake and Rupley.
039 *
040 * The code is adapted from a python implementation at http://boscoh.com/protein/asapy
041 * (now source is available at https://github.com/boscoh/asa).
042 * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog.
043 *
044 * See
045 * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms.
046 * Lysozyme and Insulin." JMB (1973) 79:351-371.
047 * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of
048 * Static Accessibility" JMB (1971) 55:379-400
049 * @author duarte_j
050 *
051 */
052public class AsaCalculator {
053
054        private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class);
055
056        // Bosco uses as default 960, Shrake and Rupley seem to use in their paper 92 (not sure if this is actually the same parameter)
057        public static final int DEFAULT_N_SPHERE_POINTS = 960;
058        public static final double DEFAULT_PROBE_SIZE = 1.4;
059        public static final int DEFAULT_NTHREADS = 1;
060
061
062
063        // Chothia's amino acid atoms vdw radii
064        public static final double TRIGONAL_CARBON_VDW = 1.76;
065        public static final double TETRAHEDRAL_CARBON_VDW = 1.87;
066        public static final double TRIGONAL_NITROGEN_VDW = 1.65;
067        public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50;
068        public static final double SULFUR_VDW = 1.85;
069        public static final double OXIGEN_VDW = 1.40;
070
071        // Chothia's nucleotide atoms vdw radii
072        public static final double NUC_CARBON_VDW = 1.80;
073        public static final double NUC_NITROGEN_VDW = 1.60;
074        public static final double PHOSPHOROUS_VDW = 1.90;
075
076
077
078
079
080        private class AsaCalcWorker implements Runnable {
081
082                private int i;
083                private double[] asas;
084
085                public AsaCalcWorker(int i, double[] asas) {
086                        this.i = i;
087                        this.asas = asas;
088                }
089
090                @Override
091                public void run() {
092                        asas[i] = calcSingleAsa(i);
093                }
094        }
095
096
097        private Atom[] atoms;
098        private double[] radii;
099        private double probe;
100        private int nThreads;
101        private Point3d[] spherePoints;
102        private double cons;
103
104        /**
105         * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()}
106         * or {@link #getGroupAsas()} to calculate the ASAs
107         * Only non-Hydrogen atoms are considered in the calculation.
108         * @param structure
109         * @param probe
110         * @param nSpherePoints
111         * @param nThreads
112         * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to
113         * NACCESS' -h option
114         * @see StructureTools.getAllNonHAtomArray
115         */
116        public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) {
117                this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms);
118                this.probe = probe;
119                this.nThreads = nThreads;
120
121                // initialising the radii by looking them up through AtomRadii
122                radii = new double[atoms.length];
123                for (int i=0;i<atoms.length;i++) {
124                        radii[i] = getRadius(atoms[i]);
125                }
126
127                // initialising the sphere points to sample
128                spherePoints = generateSpherePoints(nSpherePoints);
129
130                cons = 4.0 * Math.PI / nSpherePoints;
131        }
132
133        /**
134         * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()}
135         * or {@link #getGroupAsas()} to calculate the ASAs.
136         * @param atoms an array of atoms not containing Hydrogen atoms
137         * @param probe the probe size
138         * @param nSpherePoints the number of points to be used in generating the spherical
139         * dot-density, the more points the more accurate (and slower) calculation
140         * @param nThreads the number of parallel threads to use for the calculation
141         * @throws IllegalArgumentException if any atom in the array is a Hydrogen atom
142         */
143        public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads) {
144                this.atoms = atoms;
145                this.probe = probe;
146                this.nThreads = nThreads;
147
148                for (Atom atom:atoms) {
149                        if (atom.getElement()==Element.H)
150                                throw new IllegalArgumentException("Can't calculate ASA for an array that contains Hydrogen atoms ");
151                }
152
153                // initialising the radii by looking them up through AtomRadii
154                radii = new double[atoms.length];
155                for (int i=0;i<atoms.length;i++) {
156                        radii[i] = getRadius(atoms[i]);
157                }
158
159                // initialising the sphere points to sample
160                spherePoints = generateSpherePoints(nSpherePoints);
161
162                cons = 4.0 * Math.PI / nSpherePoints;
163        }
164
165        /**
166         * Calculates ASA for all atoms and return them as a GroupAsa
167         * array (one element per residue in structure) containing ASAs per residue
168         * and per atom.
169         * The sorting of Groups in returned array is as specified by {@link org.biojava.nbio.structure.ResidueNumber}
170         * @return
171         */
172        public GroupAsa[] getGroupAsas() {
173
174                TreeMap<ResidueNumber, GroupAsa> asas = new TreeMap<ResidueNumber, GroupAsa>();
175
176                double[] asasPerAtom = calculateAsas();
177
178                for (int i=0;i<atoms.length;i++) {
179                        Group g = atoms[i].getGroup();
180                        if (!asas.containsKey(g.getResidueNumber())) {
181                                GroupAsa groupAsa = new GroupAsa(g);
182                                groupAsa.addAtomAsaU(asasPerAtom[i]);
183                                asas.put(g.getResidueNumber(), groupAsa);
184                        } else {
185                                GroupAsa groupAsa = asas.get(g.getResidueNumber());
186                                groupAsa.addAtomAsaU(asasPerAtom[i]);
187                        }
188                }
189
190                return asas.values().toArray(new GroupAsa[asas.size()]);
191        }
192
193        /**
194         * Calculates the Accessible Surface Areas for the atoms given in constructor and with parameters given.
195         * Beware that the parallel implementation is quite memory hungry. It scales well as long as there is
196         * enough memory available.
197         * @return an array with asa values corresponding to each atom of the input array
198         */
199        public double[] calculateAsas() {
200
201                double[] asas = new double[atoms.length];
202
203                if (nThreads<=1) { // (i.e. it will also be 1 thread if 0 or negative number specified)
204                        for (int i=0;i<atoms.length;i++) {
205                                asas[i] = calcSingleAsa(i);
206                        }
207
208                } else {
209                        // NOTE the multithreaded calculation does not scale up well in some systems,
210                        // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help
211
212                        // Following scaling tests are for 3hbx, calculating ASA of full asym unit (6 chains):
213
214                        // SCALING test done in merlinl01 (12 cores, Xeon X5670  @ 2.93GHz, 24GB RAM)
215                        //1 threads, time:  8.8s -- x1.0
216                        //2 threads, time:  4.4s -- x2.0
217                        //3 threads, time:  2.9s -- x3.0
218                        //4 threads, time:  2.2s -- x3.9
219                        //5 threads, time:  1.8s -- x4.9
220                        //6 threads, time:  1.6s -- x5.5
221                        //7 threads, time:  1.4s -- x6.5
222                        //8 threads, time:  1.3s -- x6.9
223
224                        // SCALING test done in pc8201 (4 cores, Core2 Quad Q9550  @ 2.83GHz, 8GB RAM)
225                        //1 threads, time: 17.2s -- x1.0
226                        //2 threads, time:  9.7s -- x1.8
227                        //3 threads, time:  7.7s -- x2.2
228                        //4 threads, time:  7.9s -- x2.2
229
230                        // SCALING test done in eppic01 (16 cores, Xeon E5-2650 0  @ 2.00GHz, 128GB RAM)
231                        //1 threads, time: 10.7s -- x1.0
232                        //2 threads, time:  5.6s -- x1.9
233                        //3 threads, time:  3.6s -- x3.0
234                        //4 threads, time:  2.8s -- x3.9
235                        //5 threads, time:  2.3s -- x4.8
236                        //6 threads, time:  1.8s -- x6.0
237                        //7 threads, time:  1.6s -- x6.8
238                        //8 threads, time:  1.3s -- x8.0
239                        //9 threads, time:  1.3s -- x8.5
240                        //10 threads, time:  1.1s -- x10.0
241                        //11 threads, time:  1.0s -- x10.9
242                        //12 threads, time:  0.9s -- x11.4
243
244
245
246                        ExecutorService threadPool = Executors.newFixedThreadPool(nThreads);
247
248
249                        for (int i=0;i<atoms.length;i++) {
250                                threadPool.submit(new AsaCalcWorker(i,asas));
251                        }
252
253                        threadPool.shutdown();
254
255                        while (!threadPool.isTerminated());
256
257                }
258
259                return asas;
260        }
261
262        /**
263         * Returns list of 3d coordinates of points on a sphere using the
264         * Golden Section Spiral algorithm.
265         * @param nSpherePoints the number of points to be used in generating the spherical dot-density
266         * @return
267         */
268        private Point3d[] generateSpherePoints(int nSpherePoints) {
269                Point3d[] points = new Point3d[nSpherePoints];
270                double inc = Math.PI * (3.0 - Math.sqrt(5.0));
271                double offset = 2.0 / nSpherePoints;
272                for (int k=0;k<nSpherePoints;k++) {
273                        double y = k * offset - 1.0 + (offset / 2.0);
274                        double r = Math.sqrt(1.0 - y*y);
275                        double phi = k * inc;
276                        points[k] = new Point3d(Math.cos(phi)*r, y, Math.sin(phi)*r);
277                }
278                return points;
279        }
280
281        /**
282         * Returns list of indices of atoms within probe distance to atom k.
283         * @param k index of atom for which we want neighbor indices
284         */
285        private ArrayList<Integer> findNeighborIndices(int k) {
286                // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
287                // Thus 40 seems to be a good compromise for the starting capacity
288                ArrayList<Integer> neighbor_indices = new ArrayList<Integer>(40);
289
290                double radius = radii[k] + probe + probe;
291
292                for (int i=0;i<atoms.length;i++) {
293                        if (i==k) continue;
294
295                        double dist = 0;
296
297                        dist = Calc.getDistance(atoms[i], atoms[k]);
298
299                        if (dist < radius + radii[i]) {
300                                neighbor_indices.add(i);
301                        }
302
303                }
304
305                return neighbor_indices;
306        }
307
308        private double calcSingleAsa(int i) {
309                Atom atom_i = atoms[i];
310                ArrayList<Integer> neighbor_indices = findNeighborIndices(i);
311                int n_neighbor = neighbor_indices.size();
312                int j_closest_neighbor = 0;
313                double radius = probe + radii[i];
314
315                int n_accessible_point = 0;
316
317                for (Point3d point: spherePoints){
318                        boolean is_accessible = true;
319                        Point3d test_point = new Point3d(point.x*radius + atom_i.getX(),
320                                        point.y*radius + atom_i.getY(),
321                                        point.z*radius + atom_i.getZ());
322
323                        int[] cycled_indices = new int[n_neighbor];
324                        int arind = 0;
325                        for (int ind=j_closest_neighbor;ind<n_neighbor;ind++) {
326                                cycled_indices[arind] = ind;
327                                arind++;
328                        }
329                        for (int ind=0;ind<j_closest_neighbor;ind++){
330                                cycled_indices[arind] = ind;
331                                arind++;
332                        }
333
334                        for (int j: cycled_indices) {
335                                Atom atom_j = atoms[neighbor_indices.get(j)];
336                                double r = radii[neighbor_indices.get(j)] + probe;
337                                double diff_sq = test_point.distanceSquared(new Point3d(atom_j.getCoords()));
338                                if (diff_sq < r*r) {
339                                        j_closest_neighbor = j;
340                                        is_accessible = false;
341                                        break;
342                                }
343                        }
344                        if (is_accessible) {
345                                n_accessible_point++;
346                        }
347                }
348                return cons*n_accessible_point*radius*radius;
349        }
350
351        /**
352         * Gets the radius for given amino acid and atom
353         * @param aa
354         * @param atom
355         * @return
356         */
357        private static double getRadiusForAmino(AminoAcid amino, Atom atom) {
358
359                if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius();
360                // some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids
361                if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius();
362
363                String atomCode = atom.getName();
364                char aa = amino.getAminoType();
365
366                // here we use the values that Chothia gives in his paper (as NACCESS does)
367                if (atom.getElement()==Element.O) {
368                        return OXIGEN_VDW;
369                }
370                else if (atom.getElement()==Element.S) {
371                        return SULFUR_VDW;
372                }
373                else if (atom.getElement()==Element.N) {
374                        if (atomCode.equals("NZ")) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen
375                        return TRIGONAL_NITROGEN_VDW;                                                           // trigonal Nitrogen
376                }
377                else if (atom.getElement()==Element.C) { // it must be a carbon
378                        if (atomCode.equals("C") ||
379                                        atomCode.equals("CE1") || atomCode.equals("CE2") || atomCode.equals("CE3") ||
380                                        atomCode.equals("CH2") ||
381                                        atomCode.equals("CZ") || atomCode.equals("CZ2") || atomCode.equals("CZ3")) {
382                                return TRIGONAL_CARBON_VDW;                                                     // trigonal Carbon
383                        }
384                        else if (atomCode.equals("CA") || atomCode.equals("CB") ||
385                                        atomCode.equals("CE") ||
386                                        atomCode.equals("CG1") || atomCode.equals("CG2")) {
387                                return TETRAHEDRAL_CARBON_VDW;                                                  // tetrahedral Carbon
388                        }
389                        // the rest of the cases (CD, CD1, CD2, CG) depend on amino acid
390                        else {
391                                switch (aa) {
392                                case 'F':
393                                case 'W':
394                                case 'Y':
395                                case 'H':
396                                case 'D':
397                                case 'N':
398                                        return TRIGONAL_CARBON_VDW;
399
400                                case 'P':
401                                case 'K':
402                                case 'R':
403                                case 'M':
404                                case 'I':
405                                case 'L':
406                                        return TETRAHEDRAL_CARBON_VDW;
407
408                                case 'Q':
409                                case 'E':
410                                        if (atomCode.equals("CD")) return TRIGONAL_CARBON_VDW;
411                                        else if (atomCode.equals("CG")) return TETRAHEDRAL_CARBON_VDW;
412
413                                default:
414                                        logger.info("Unexpected carbon atom "+atomCode+" for aminoacid "+aa+", assigning its standard vdw radius");
415                                        return Element.C.getVDWRadius();
416                                }
417                        }
418
419                        // not any of the expected atoms
420                } else {
421                        // non standard aas, (e.g. MSE, LLP) will always have this problem,
422                        logger.info("Unexpected atom "+atomCode+" for aminoacid "+aa+ " ("+amino.getPDBName()+"), assigning its standard vdw radius");
423                        
424
425                        return atom.getElement().getVDWRadius();
426                }
427        }
428
429
430        /**
431         * Gets the radius for given nucleotide atom
432         * @param atom
433         * @return
434         */
435        private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) {
436
437                if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius();
438                if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius();
439
440                if (atom.getElement()==Element.C) return NUC_CARBON_VDW;
441
442                if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW;
443
444                if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW;
445
446                if (atom.getElement()==Element.O) return OXIGEN_VDW;
447
448                logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius");
449                return atom.getElement().getVDWRadius();
450        }
451
452
453        /**
454         * Gets the van der Waals radius of the given atom following the values defined by
455         * Chothia (1976) J.Mol.Biol.105,1-14
456         * NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates"
457         * slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used
458         * in a structure that contains Hydrogens!
459         *
460         * If atom is neither part of a nucleotide nor of a standard aminoacid,
461         * the default vdw radius for the element is returned. If atom is of
462         * unknown type (element) the vdw radius of {@link #Element().N} is returned
463         *
464         * @param atom
465         * @return
466         */
467        public static double getRadius(Atom atom) {
468
469                if (atom.getElement()==null) {
470                        logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+
471                                        ", assigning the default vdw radius (Nitrogen vdw radius).");
472                        return Element.N.getVDWRadius();
473                }
474
475                Group res = atom.getGroup();
476
477                if (res==null) {
478                        logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+
479                                        atom.getPDBserial()+", assigning its default vdw radius");
480                        return atom.getElement().getVDWRadius();
481                }
482
483                GroupType type = res.getType();
484
485                if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom);
486
487                if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom);
488
489
490                return atom.getElement().getVDWRadius();
491        }
492
493}