001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.asa; 022 023import org.biojava.nbio.structure.*; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027import javax.vecmath.Point3d; 028import java.util.ArrayList; 029import java.util.TreeMap; 030import java.util.concurrent.ExecutorService; 031import java.util.concurrent.Executors; 032 033 034 035 036/** 037 * Class to calculate Accessible Surface Areas based on 038 * the rolling ball algorithm by Shrake and Rupley. 039 * 040 * The code is adapted from a python implementation at http://boscoh.com/protein/asapy 041 * (now source is available at https://github.com/boscoh/asa). 042 * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog. 043 * 044 * See 045 * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms. 046 * Lysozyme and Insulin." JMB (1973) 79:351-371. 047 * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of 048 * Static Accessibility" JMB (1971) 55:379-400 049 * @author duarte_j 050 * 051 */ 052public class AsaCalculator { 053 054 private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class); 055 056 // Bosco uses as default 960, Shrake and Rupley seem to use in their paper 92 (not sure if this is actually the same parameter) 057 public static final int DEFAULT_N_SPHERE_POINTS = 960; 058 public static final double DEFAULT_PROBE_SIZE = 1.4; 059 public static final int DEFAULT_NTHREADS = 1; 060 061 062 063 // Chothia's amino acid atoms vdw radii 064 public static final double TRIGONAL_CARBON_VDW = 1.76; 065 public static final double TETRAHEDRAL_CARBON_VDW = 1.87; 066 public static final double TRIGONAL_NITROGEN_VDW = 1.65; 067 public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50; 068 public static final double SULFUR_VDW = 1.85; 069 public static final double OXIGEN_VDW = 1.40; 070 071 // Chothia's nucleotide atoms vdw radii 072 public static final double NUC_CARBON_VDW = 1.80; 073 public static final double NUC_NITROGEN_VDW = 1.60; 074 public static final double PHOSPHOROUS_VDW = 1.90; 075 076 077 078 079 080 private class AsaCalcWorker implements Runnable { 081 082 private int i; 083 private double[] asas; 084 085 public AsaCalcWorker(int i, double[] asas) { 086 this.i = i; 087 this.asas = asas; 088 } 089 090 @Override 091 public void run() { 092 asas[i] = calcSingleAsa(i); 093 } 094 } 095 096 097 private Atom[] atoms; 098 private double[] radii; 099 private double probe; 100 private int nThreads; 101 private Point3d[] spherePoints; 102 private double cons; 103 104 /** 105 * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} 106 * or {@link #getGroupAsas()} to calculate the ASAs 107 * Only non-Hydrogen atoms are considered in the calculation. 108 * @param structure 109 * @param probe 110 * @param nSpherePoints 111 * @param nThreads 112 * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to 113 * NACCESS' -h option 114 * @see StructureTools.getAllNonHAtomArray 115 */ 116 public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) { 117 this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms); 118 this.probe = probe; 119 this.nThreads = nThreads; 120 121 // initialising the radii by looking them up through AtomRadii 122 radii = new double[atoms.length]; 123 for (int i=0;i<atoms.length;i++) { 124 radii[i] = getRadius(atoms[i]); 125 } 126 127 // initialising the sphere points to sample 128 spherePoints = generateSpherePoints(nSpherePoints); 129 130 cons = 4.0 * Math.PI / nSpherePoints; 131 } 132 133 /** 134 * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} 135 * or {@link #getGroupAsas()} to calculate the ASAs. 136 * @param atoms an array of atoms not containing Hydrogen atoms 137 * @param probe the probe size 138 * @param nSpherePoints the number of points to be used in generating the spherical 139 * dot-density, the more points the more accurate (and slower) calculation 140 * @param nThreads the number of parallel threads to use for the calculation 141 * @throws IllegalArgumentException if any atom in the array is a Hydrogen atom 142 */ 143 public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads) { 144 this.atoms = atoms; 145 this.probe = probe; 146 this.nThreads = nThreads; 147 148 for (Atom atom:atoms) { 149 if (atom.getElement()==Element.H) 150 throw new IllegalArgumentException("Can't calculate ASA for an array that contains Hydrogen atoms "); 151 } 152 153 // initialising the radii by looking them up through AtomRadii 154 radii = new double[atoms.length]; 155 for (int i=0;i<atoms.length;i++) { 156 radii[i] = getRadius(atoms[i]); 157 } 158 159 // initialising the sphere points to sample 160 spherePoints = generateSpherePoints(nSpherePoints); 161 162 cons = 4.0 * Math.PI / nSpherePoints; 163 } 164 165 /** 166 * Calculates ASA for all atoms and return them as a GroupAsa 167 * array (one element per residue in structure) containing ASAs per residue 168 * and per atom. 169 * The sorting of Groups in returned array is as specified by {@link org.biojava.nbio.structure.ResidueNumber} 170 * @return 171 */ 172 public GroupAsa[] getGroupAsas() { 173 174 TreeMap<ResidueNumber, GroupAsa> asas = new TreeMap<ResidueNumber, GroupAsa>(); 175 176 double[] asasPerAtom = calculateAsas(); 177 178 for (int i=0;i<atoms.length;i++) { 179 Group g = atoms[i].getGroup(); 180 if (!asas.containsKey(g.getResidueNumber())) { 181 GroupAsa groupAsa = new GroupAsa(g); 182 groupAsa.addAtomAsaU(asasPerAtom[i]); 183 asas.put(g.getResidueNumber(), groupAsa); 184 } else { 185 GroupAsa groupAsa = asas.get(g.getResidueNumber()); 186 groupAsa.addAtomAsaU(asasPerAtom[i]); 187 } 188 } 189 190 return asas.values().toArray(new GroupAsa[asas.size()]); 191 } 192 193 /** 194 * Calculates the Accessible Surface Areas for the atoms given in constructor and with parameters given. 195 * Beware that the parallel implementation is quite memory hungry. It scales well as long as there is 196 * enough memory available. 197 * @return an array with asa values corresponding to each atom of the input array 198 */ 199 public double[] calculateAsas() { 200 201 double[] asas = new double[atoms.length]; 202 203 if (nThreads<=1) { // (i.e. it will also be 1 thread if 0 or negative number specified) 204 for (int i=0;i<atoms.length;i++) { 205 asas[i] = calcSingleAsa(i); 206 } 207 208 } else { 209 // NOTE the multithreaded calculation does not scale up well in some systems, 210 // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help 211 212 // Following scaling tests are for 3hbx, calculating ASA of full asym unit (6 chains): 213 214 // SCALING test done in merlinl01 (12 cores, Xeon X5670 @ 2.93GHz, 24GB RAM) 215 //1 threads, time: 8.8s -- x1.0 216 //2 threads, time: 4.4s -- x2.0 217 //3 threads, time: 2.9s -- x3.0 218 //4 threads, time: 2.2s -- x3.9 219 //5 threads, time: 1.8s -- x4.9 220 //6 threads, time: 1.6s -- x5.5 221 //7 threads, time: 1.4s -- x6.5 222 //8 threads, time: 1.3s -- x6.9 223 224 // SCALING test done in pc8201 (4 cores, Core2 Quad Q9550 @ 2.83GHz, 8GB RAM) 225 //1 threads, time: 17.2s -- x1.0 226 //2 threads, time: 9.7s -- x1.8 227 //3 threads, time: 7.7s -- x2.2 228 //4 threads, time: 7.9s -- x2.2 229 230 // SCALING test done in eppic01 (16 cores, Xeon E5-2650 0 @ 2.00GHz, 128GB RAM) 231 //1 threads, time: 10.7s -- x1.0 232 //2 threads, time: 5.6s -- x1.9 233 //3 threads, time: 3.6s -- x3.0 234 //4 threads, time: 2.8s -- x3.9 235 //5 threads, time: 2.3s -- x4.8 236 //6 threads, time: 1.8s -- x6.0 237 //7 threads, time: 1.6s -- x6.8 238 //8 threads, time: 1.3s -- x8.0 239 //9 threads, time: 1.3s -- x8.5 240 //10 threads, time: 1.1s -- x10.0 241 //11 threads, time: 1.0s -- x10.9 242 //12 threads, time: 0.9s -- x11.4 243 244 245 246 ExecutorService threadPool = Executors.newFixedThreadPool(nThreads); 247 248 249 for (int i=0;i<atoms.length;i++) { 250 threadPool.submit(new AsaCalcWorker(i,asas)); 251 } 252 253 threadPool.shutdown(); 254 255 while (!threadPool.isTerminated()); 256 257 } 258 259 return asas; 260 } 261 262 /** 263 * Returns list of 3d coordinates of points on a sphere using the 264 * Golden Section Spiral algorithm. 265 * @param nSpherePoints the number of points to be used in generating the spherical dot-density 266 * @return 267 */ 268 private Point3d[] generateSpherePoints(int nSpherePoints) { 269 Point3d[] points = new Point3d[nSpherePoints]; 270 double inc = Math.PI * (3.0 - Math.sqrt(5.0)); 271 double offset = 2.0 / nSpherePoints; 272 for (int k=0;k<nSpherePoints;k++) { 273 double y = k * offset - 1.0 + (offset / 2.0); 274 double r = Math.sqrt(1.0 - y*y); 275 double phi = k * inc; 276 points[k] = new Point3d(Math.cos(phi)*r, y, Math.sin(phi)*r); 277 } 278 return points; 279 } 280 281 /** 282 * Returns list of indices of atoms within probe distance to atom k. 283 * @param k index of atom for which we want neighbor indices 284 */ 285 private ArrayList<Integer> findNeighborIndices(int k) { 286 // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30 287 // Thus 40 seems to be a good compromise for the starting capacity 288 ArrayList<Integer> neighbor_indices = new ArrayList<Integer>(40); 289 290 double radius = radii[k] + probe + probe; 291 292 for (int i=0;i<atoms.length;i++) { 293 if (i==k) continue; 294 295 double dist = 0; 296 297 dist = Calc.getDistance(atoms[i], atoms[k]); 298 299 if (dist < radius + radii[i]) { 300 neighbor_indices.add(i); 301 } 302 303 } 304 305 return neighbor_indices; 306 } 307 308 private double calcSingleAsa(int i) { 309 Atom atom_i = atoms[i]; 310 ArrayList<Integer> neighbor_indices = findNeighborIndices(i); 311 int n_neighbor = neighbor_indices.size(); 312 int j_closest_neighbor = 0; 313 double radius = probe + radii[i]; 314 315 int n_accessible_point = 0; 316 317 for (Point3d point: spherePoints){ 318 boolean is_accessible = true; 319 Point3d test_point = new Point3d(point.x*radius + atom_i.getX(), 320 point.y*radius + atom_i.getY(), 321 point.z*radius + atom_i.getZ()); 322 323 int[] cycled_indices = new int[n_neighbor]; 324 int arind = 0; 325 for (int ind=j_closest_neighbor;ind<n_neighbor;ind++) { 326 cycled_indices[arind] = ind; 327 arind++; 328 } 329 for (int ind=0;ind<j_closest_neighbor;ind++){ 330 cycled_indices[arind] = ind; 331 arind++; 332 } 333 334 for (int j: cycled_indices) { 335 Atom atom_j = atoms[neighbor_indices.get(j)]; 336 double r = radii[neighbor_indices.get(j)] + probe; 337 double diff_sq = test_point.distanceSquared(new Point3d(atom_j.getCoords())); 338 if (diff_sq < r*r) { 339 j_closest_neighbor = j; 340 is_accessible = false; 341 break; 342 } 343 } 344 if (is_accessible) { 345 n_accessible_point++; 346 } 347 } 348 return cons*n_accessible_point*radius*radius; 349 } 350 351 /** 352 * Gets the radius for given amino acid and atom 353 * @param aa 354 * @param atom 355 * @return 356 */ 357 private static double getRadiusForAmino(AminoAcid amino, Atom atom) { 358 359 if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); 360 // some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids 361 if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); 362 363 String atomCode = atom.getName(); 364 char aa = amino.getAminoType(); 365 366 // here we use the values that Chothia gives in his paper (as NACCESS does) 367 if (atom.getElement()==Element.O) { 368 return OXIGEN_VDW; 369 } 370 else if (atom.getElement()==Element.S) { 371 return SULFUR_VDW; 372 } 373 else if (atom.getElement()==Element.N) { 374 if (atomCode.equals("NZ")) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen 375 return TRIGONAL_NITROGEN_VDW; // trigonal Nitrogen 376 } 377 else if (atom.getElement()==Element.C) { // it must be a carbon 378 if (atomCode.equals("C") || 379 atomCode.equals("CE1") || atomCode.equals("CE2") || atomCode.equals("CE3") || 380 atomCode.equals("CH2") || 381 atomCode.equals("CZ") || atomCode.equals("CZ2") || atomCode.equals("CZ3")) { 382 return TRIGONAL_CARBON_VDW; // trigonal Carbon 383 } 384 else if (atomCode.equals("CA") || atomCode.equals("CB") || 385 atomCode.equals("CE") || 386 atomCode.equals("CG1") || atomCode.equals("CG2")) { 387 return TETRAHEDRAL_CARBON_VDW; // tetrahedral Carbon 388 } 389 // the rest of the cases (CD, CD1, CD2, CG) depend on amino acid 390 else { 391 switch (aa) { 392 case 'F': 393 case 'W': 394 case 'Y': 395 case 'H': 396 case 'D': 397 case 'N': 398 return TRIGONAL_CARBON_VDW; 399 400 case 'P': 401 case 'K': 402 case 'R': 403 case 'M': 404 case 'I': 405 case 'L': 406 return TETRAHEDRAL_CARBON_VDW; 407 408 case 'Q': 409 case 'E': 410 if (atomCode.equals("CD")) return TRIGONAL_CARBON_VDW; 411 else if (atomCode.equals("CG")) return TETRAHEDRAL_CARBON_VDW; 412 413 default: 414 logger.info("Unexpected carbon atom "+atomCode+" for aminoacid "+aa+", assigning its standard vdw radius"); 415 return Element.C.getVDWRadius(); 416 } 417 } 418 419 // not any of the expected atoms 420 } else { 421 // non standard aas, (e.g. MSE, LLP) will always have this problem, 422 logger.info("Unexpected atom "+atomCode+" for aminoacid "+aa+ " ("+amino.getPDBName()+"), assigning its standard vdw radius"); 423 424 425 return atom.getElement().getVDWRadius(); 426 } 427 } 428 429 430 /** 431 * Gets the radius for given nucleotide atom 432 * @param atom 433 * @return 434 */ 435 private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) { 436 437 if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); 438 if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); 439 440 if (atom.getElement()==Element.C) return NUC_CARBON_VDW; 441 442 if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW; 443 444 if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW; 445 446 if (atom.getElement()==Element.O) return OXIGEN_VDW; 447 448 logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius"); 449 return atom.getElement().getVDWRadius(); 450 } 451 452 453 /** 454 * Gets the van der Waals radius of the given atom following the values defined by 455 * Chothia (1976) J.Mol.Biol.105,1-14 456 * NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates" 457 * slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used 458 * in a structure that contains Hydrogens! 459 * 460 * If atom is neither part of a nucleotide nor of a standard aminoacid, 461 * the default vdw radius for the element is returned. If atom is of 462 * unknown type (element) the vdw radius of {@link #Element().N} is returned 463 * 464 * @param atom 465 * @return 466 */ 467 public static double getRadius(Atom atom) { 468 469 if (atom.getElement()==null) { 470 logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+ 471 ", assigning the default vdw radius (Nitrogen vdw radius)."); 472 return Element.N.getVDWRadius(); 473 } 474 475 Group res = atom.getGroup(); 476 477 if (res==null) { 478 logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+ 479 atom.getPDBserial()+", assigning its default vdw radius"); 480 return atom.getElement().getVDWRadius(); 481 } 482 483 GroupType type = res.getType(); 484 485 if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom); 486 487 if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom); 488 489 490 return atom.getElement().getVDWRadius(); 491 } 492 493}