001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.asa; 022 023import org.biojava.nbio.structure.*; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027import javax.vecmath.Point3d; 028import java.util.ArrayList; 029import java.util.TreeMap; 030import java.util.concurrent.ExecutorService; 031import java.util.concurrent.Executors; 032 033 034 035 036/** 037 * Class to calculate Accessible Surface Areas based on 038 * the rolling ball algorithm by Shrake and Rupley. 039 * 040 * The code is adapted from a python implementation at http://boscoh.com/protein/asapy 041 * (now source is available at https://github.com/boscoh/asa). 042 * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog. 043 * 044 * See 045 * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms. 046 * Lysozyme and Insulin." JMB (1973) 79:351-371. 047 * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of 048 * Static Accessibility" JMB (1971) 55:379-400 049 * @author duarte_j 050 * 051 */ 052public class AsaCalculator { 053 054 private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class); 055 056 // Bosco uses as default 960, Shrake and Rupley seem to use in their paper 92 (not sure if this is actually the same parameter) 057 public static final int DEFAULT_N_SPHERE_POINTS = 960; 058 public static final double DEFAULT_PROBE_SIZE = 1.4; 059 public static final int DEFAULT_NTHREADS = 1; 060 061 062 063 // Chothia's amino acid atoms vdw radii 064 public static final double TRIGONAL_CARBON_VDW = 1.76; 065 public static final double TETRAHEDRAL_CARBON_VDW = 1.87; 066 public static final double TRIGONAL_NITROGEN_VDW = 1.65; 067 public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50; 068 public static final double SULFUR_VDW = 1.85; 069 public static final double OXIGEN_VDW = 1.40; 070 071 // Chothia's nucleotide atoms vdw radii 072 public static final double NUC_CARBON_VDW = 1.80; 073 public static final double NUC_NITROGEN_VDW = 1.60; 074 public static final double PHOSPHOROUS_VDW = 1.90; 075 076 077 078 079 080 private class AsaCalcWorker implements Runnable { 081 082 private int i; 083 private double[] asas; 084 085 public AsaCalcWorker(int i, double[] asas) { 086 this.i = i; 087 this.asas = asas; 088 } 089 090 @Override 091 public void run() { 092 asas[i] = calcSingleAsa(i); 093 } 094 } 095 096 097 private Point3d[] atomCoords; 098 private Atom[] atoms; 099 private double[] radii; 100 private double probe; 101 private int nThreads; 102 private Point3d[] spherePoints; 103 private double cons; 104 105 /** 106 * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} 107 * or {@link #getGroupAsas()} to calculate the ASAs 108 * Only non-Hydrogen atoms are considered in the calculation. 109 * @param structure 110 * @param probe 111 * @param nSpherePoints 112 * @param nThreads 113 * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to 114 * NACCESS' -h option 115 * @see StructureTools.getAllNonHAtomArray 116 */ 117 public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) { 118 this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms); 119 this.atomCoords = Calc.atomsToPoints(atoms); 120 this.probe = probe; 121 this.nThreads = nThreads; 122 123 // initialising the radii by looking them up through AtomRadii 124 radii = new double[atomCoords.length]; 125 for (int i=0;i<atomCoords.length;i++) { 126 radii[i] = getRadius(atoms[i]); 127 } 128 129 // initialising the sphere points to sample 130 spherePoints = generateSpherePoints(nSpherePoints); 131 132 cons = 4.0 * Math.PI / nSpherePoints; 133 } 134 135 /** 136 * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} 137 * or {@link #getGroupAsas()} to calculate the ASAs. 138 * @param atoms an array of atoms not containing Hydrogen atoms 139 * @param probe the probe size 140 * @param nSpherePoints the number of points to be used in generating the spherical 141 * dot-density, the more points the more accurate (and slower) calculation 142 * @param nThreads the number of parallel threads to use for the calculation 143 * @throws IllegalArgumentException if any atom in the array is a Hydrogen atom 144 */ 145 public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads) { 146 this.atoms = atoms; 147 this.atomCoords = Calc.atomsToPoints(atoms); 148 this.probe = probe; 149 this.nThreads = nThreads; 150 151 for (Atom atom:atoms) { 152 if (atom.getElement()==Element.H) 153 throw new IllegalArgumentException("Can't calculate ASA for an array that contains Hydrogen atoms "); 154 } 155 156 // initialising the radii by looking them up through AtomRadii 157 radii = new double[atoms.length]; 158 for (int i=0;i<atoms.length;i++) { 159 radii[i] = getRadius(atoms[i]); 160 } 161 162 // initialising the sphere points to sample 163 spherePoints = generateSpherePoints(nSpherePoints); 164 165 cons = 4.0 * Math.PI / nSpherePoints; 166 } 167 168 /** 169 * Constructs a new AsaCalculator. Subsequently call {@link #calcSingleAsa(int)} 170 * to calculate the atom ASAs. The given radius parameter will be taken as the radius for 171 * all points given. No ASA calculation per group will be possible with this constructor, so 172 * usage of {@link #getGroupAsas()} will result in a NullPointerException. 173 * @param atomCoords 174 * the coordinates representing the center of atoms 175 * @param probe 176 * the probe size 177 * @param nSpherePoints 178 * the number of points to be used in generating the spherical 179 * dot-density, the more points the more accurate (and slower) calculation 180 * @param nThreads 181 * the number of parallel threads to use for the calculation 182 * @param radius 183 * the radius that will be assign to all given coordinates 184 */ 185 public AsaCalculator(Point3d[] atomCoords, double probe, int nSpherePoints, int nThreads, double radius) { 186 this.atoms = null; 187 this.atomCoords = atomCoords; 188 this.probe = probe; 189 this.nThreads = nThreads; 190 191 // initialising the radii to the given radius for all atoms 192 radii = new double[atomCoords.length]; 193 for (int i=0;i<atomCoords.length;i++) { 194 radii[i] = radius; 195 } 196 197 // initialising the sphere points to sample 198 spherePoints = generateSpherePoints(nSpherePoints); 199 200 cons = 4.0 * Math.PI / nSpherePoints; 201 } 202 203 /** 204 * Calculates ASA for all atoms and return them as a GroupAsa 205 * array (one element per residue in structure) containing ASAs per residue 206 * and per atom. 207 * The sorting of Groups in returned array is as specified by {@link org.biojava.nbio.structure.ResidueNumber} 208 * @return 209 */ 210 public GroupAsa[] getGroupAsas() { 211 212 TreeMap<ResidueNumber, GroupAsa> asas = new TreeMap<ResidueNumber, GroupAsa>(); 213 214 double[] asasPerAtom = calculateAsas(); 215 216 for (int i=0;i<atomCoords.length;i++) { 217 Group g = atoms[i].getGroup(); 218 if (!asas.containsKey(g.getResidueNumber())) { 219 GroupAsa groupAsa = new GroupAsa(g); 220 groupAsa.addAtomAsaU(asasPerAtom[i]); 221 asas.put(g.getResidueNumber(), groupAsa); 222 } else { 223 GroupAsa groupAsa = asas.get(g.getResidueNumber()); 224 groupAsa.addAtomAsaU(asasPerAtom[i]); 225 } 226 } 227 228 return asas.values().toArray(new GroupAsa[asas.size()]); 229 } 230 231 /** 232 * Calculates the Accessible Surface Areas for the atoms given in constructor and with parameters given. 233 * Beware that the parallel implementation is quite memory hungry. It scales well as long as there is 234 * enough memory available. 235 * @return an array with asa values corresponding to each atom of the input array 236 */ 237 public double[] calculateAsas() { 238 239 double[] asas = new double[atomCoords.length]; 240 241 if (nThreads<=1) { // (i.e. it will also be 1 thread if 0 or negative number specified) 242 for (int i=0;i<atomCoords.length;i++) { 243 asas[i] = calcSingleAsa(i); 244 } 245 246 } else { 247 // NOTE the multithreaded calculation does not scale up well in some systems, 248 // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help 249 250 // Following scaling tests are for 3hbx, calculating ASA of full asym unit (6 chains): 251 252 // SCALING test done in merlinl01 (12 cores, Xeon X5670 @ 2.93GHz, 24GB RAM) 253 //1 threads, time: 8.8s -- x1.0 254 //2 threads, time: 4.4s -- x2.0 255 //3 threads, time: 2.9s -- x3.0 256 //4 threads, time: 2.2s -- x3.9 257 //5 threads, time: 1.8s -- x4.9 258 //6 threads, time: 1.6s -- x5.5 259 //7 threads, time: 1.4s -- x6.5 260 //8 threads, time: 1.3s -- x6.9 261 262 // SCALING test done in pc8201 (4 cores, Core2 Quad Q9550 @ 2.83GHz, 8GB RAM) 263 //1 threads, time: 17.2s -- x1.0 264 //2 threads, time: 9.7s -- x1.8 265 //3 threads, time: 7.7s -- x2.2 266 //4 threads, time: 7.9s -- x2.2 267 268 // SCALING test done in eppic01 (16 cores, Xeon E5-2650 0 @ 2.00GHz, 128GB RAM) 269 //1 threads, time: 10.7s -- x1.0 270 //2 threads, time: 5.6s -- x1.9 271 //3 threads, time: 3.6s -- x3.0 272 //4 threads, time: 2.8s -- x3.9 273 //5 threads, time: 2.3s -- x4.8 274 //6 threads, time: 1.8s -- x6.0 275 //7 threads, time: 1.6s -- x6.8 276 //8 threads, time: 1.3s -- x8.0 277 //9 threads, time: 1.3s -- x8.5 278 //10 threads, time: 1.1s -- x10.0 279 //11 threads, time: 1.0s -- x10.9 280 //12 threads, time: 0.9s -- x11.4 281 282 283 284 ExecutorService threadPool = Executors.newFixedThreadPool(nThreads); 285 286 287 for (int i=0;i<atomCoords.length;i++) { 288 threadPool.submit(new AsaCalcWorker(i,asas)); 289 } 290 291 threadPool.shutdown(); 292 293 while (!threadPool.isTerminated()); 294 295 } 296 297 return asas; 298 } 299 300 /** 301 * Returns list of 3d coordinates of points on a sphere using the 302 * Golden Section Spiral algorithm. 303 * @param nSpherePoints the number of points to be used in generating the spherical dot-density 304 * @return 305 */ 306 private Point3d[] generateSpherePoints(int nSpherePoints) { 307 Point3d[] points = new Point3d[nSpherePoints]; 308 double inc = Math.PI * (3.0 - Math.sqrt(5.0)); 309 double offset = 2.0 / nSpherePoints; 310 for (int k=0;k<nSpherePoints;k++) { 311 double y = k * offset - 1.0 + (offset / 2.0); 312 double r = Math.sqrt(1.0 - y*y); 313 double phi = k * inc; 314 points[k] = new Point3d(Math.cos(phi)*r, y, Math.sin(phi)*r); 315 } 316 return points; 317 } 318 319 /** 320 * Returns list of indices of atoms within probe distance to atom k. 321 * @param k index of atom for which we want neighbor indices 322 */ 323 private ArrayList<Integer> findNeighborIndices(int k) { 324 // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30 325 // Thus 40 seems to be a good compromise for the starting capacity 326 ArrayList<Integer> neighbor_indices = new ArrayList<Integer>(40); 327 328 double radius = radii[k] + probe + probe; 329 330 for (int i=0;i<atomCoords.length;i++) { 331 if (i==k) continue; 332 333 double dist = 0; 334 335 dist = atomCoords[i].distance(atomCoords[k]); 336 337 if (dist < radius + radii[i]) { 338 neighbor_indices.add(i); 339 } 340 341 } 342 343 return neighbor_indices; 344 } 345 346 private double calcSingleAsa(int i) { 347 Point3d atom_i = atomCoords[i]; 348 ArrayList<Integer> neighbor_indices = findNeighborIndices(i); 349 int n_neighbor = neighbor_indices.size(); 350 int j_closest_neighbor = 0; 351 double radius = probe + radii[i]; 352 353 int n_accessible_point = 0; 354 355 for (Point3d point: spherePoints){ 356 boolean is_accessible = true; 357 Point3d test_point = new Point3d(point.x*radius + atom_i.x, 358 point.y*radius + atom_i.y, 359 point.z*radius + atom_i.z); 360 361 int[] cycled_indices = new int[n_neighbor]; 362 int arind = 0; 363 for (int ind=j_closest_neighbor;ind<n_neighbor;ind++) { 364 cycled_indices[arind] = ind; 365 arind++; 366 } 367 for (int ind=0;ind<j_closest_neighbor;ind++){ 368 cycled_indices[arind] = ind; 369 arind++; 370 } 371 372 for (int j: cycled_indices) { 373 Point3d atom_j = atomCoords[neighbor_indices.get(j)]; 374 double r = radii[neighbor_indices.get(j)] + probe; 375 double diff_sq = test_point.distanceSquared(atom_j); 376 if (diff_sq < r*r) { 377 j_closest_neighbor = j; 378 is_accessible = false; 379 break; 380 } 381 } 382 if (is_accessible) { 383 n_accessible_point++; 384 } 385 } 386 return cons*n_accessible_point*radius*radius; 387 } 388 389 /** 390 * Gets the radius for given amino acid and atom 391 * @param aa 392 * @param atom 393 * @return 394 */ 395 private static double getRadiusForAmino(AminoAcid amino, Atom atom) { 396 397 if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); 398 // some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids 399 if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); 400 401 String atomCode = atom.getName(); 402 char aa = amino.getAminoType(); 403 404 // here we use the values that Chothia gives in his paper (as NACCESS does) 405 if (atom.getElement()==Element.O) { 406 return OXIGEN_VDW; 407 } 408 else if (atom.getElement()==Element.S) { 409 return SULFUR_VDW; 410 } 411 else if (atom.getElement()==Element.N) { 412 if (atomCode.equals("NZ")) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen 413 return TRIGONAL_NITROGEN_VDW; // trigonal Nitrogen 414 } 415 else if (atom.getElement()==Element.C) { // it must be a carbon 416 if (atomCode.equals("C") || 417 atomCode.equals("CE1") || atomCode.equals("CE2") || atomCode.equals("CE3") || 418 atomCode.equals("CH2") || 419 atomCode.equals("CZ") || atomCode.equals("CZ2") || atomCode.equals("CZ3")) { 420 return TRIGONAL_CARBON_VDW; // trigonal Carbon 421 } 422 else if (atomCode.equals("CA") || atomCode.equals("CB") || 423 atomCode.equals("CE") || 424 atomCode.equals("CG1") || atomCode.equals("CG2")) { 425 return TETRAHEDRAL_CARBON_VDW; // tetrahedral Carbon 426 } 427 // the rest of the cases (CD, CD1, CD2, CG) depend on amino acid 428 else { 429 switch (aa) { 430 case 'F': 431 case 'W': 432 case 'Y': 433 case 'H': 434 case 'D': 435 case 'N': 436 return TRIGONAL_CARBON_VDW; 437 438 case 'P': 439 case 'K': 440 case 'R': 441 case 'M': 442 case 'I': 443 case 'L': 444 return TETRAHEDRAL_CARBON_VDW; 445 446 case 'Q': 447 case 'E': 448 if (atomCode.equals("CD")) return TRIGONAL_CARBON_VDW; 449 else if (atomCode.equals("CG")) return TETRAHEDRAL_CARBON_VDW; 450 451 default: 452 logger.info("Unexpected carbon atom "+atomCode+" for aminoacid "+aa+", assigning its standard vdw radius"); 453 return Element.C.getVDWRadius(); 454 } 455 } 456 457 // not any of the expected atoms 458 } else { 459 // non standard aas, (e.g. MSE, LLP) will always have this problem, 460 logger.info("Unexpected atom "+atomCode+" for aminoacid "+aa+ " ("+amino.getPDBName()+"), assigning its standard vdw radius"); 461 462 463 return atom.getElement().getVDWRadius(); 464 } 465 } 466 467 468 /** 469 * Gets the radius for given nucleotide atom 470 * @param atom 471 * @return 472 */ 473 private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) { 474 475 if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); 476 if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); 477 478 if (atom.getElement()==Element.C) return NUC_CARBON_VDW; 479 480 if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW; 481 482 if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW; 483 484 if (atom.getElement()==Element.O) return OXIGEN_VDW; 485 486 logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius"); 487 return atom.getElement().getVDWRadius(); 488 } 489 490 491 /** 492 * Gets the van der Waals radius of the given atom following the values defined by 493 * Chothia (1976) J.Mol.Biol.105,1-14 494 * NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates" 495 * slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used 496 * in a structure that contains Hydrogens! 497 * 498 * If atom is neither part of a nucleotide nor of a standard aminoacid, 499 * the default vdw radius for the element is returned. If atom is of 500 * unknown type (element) the vdw radius of {@link #Element().N} is returned 501 * 502 * @param atom 503 * @return 504 */ 505 public static double getRadius(Atom atom) { 506 507 if (atom.getElement()==null) { 508 logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+ 509 ", assigning the default vdw radius (Nitrogen vdw radius)."); 510 return Element.N.getVDWRadius(); 511 } 512 513 Group res = atom.getGroup(); 514 515 if (res==null) { 516 logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+ 517 atom.getPDBserial()+", assigning its default vdw radius"); 518 return atom.getElement().getVDWRadius(); 519 } 520 521 GroupType type = res.getType(); 522 523 if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom); 524 525 if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom); 526 527 528 return atom.getElement().getVDWRadius(); 529 } 530 531}