001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.xtal; 022 023 024import org.biojava.nbio.structure.*; 025import org.biojava.nbio.structure.contact.AtomContactSet; 026import org.biojava.nbio.structure.contact.StructureInterface; 027import org.biojava.nbio.structure.contact.StructureInterfaceList; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import javax.vecmath.Matrix4d; 032import javax.vecmath.Point3i; 033import javax.vecmath.Vector3d; 034import java.util.ArrayList; 035import java.util.Iterator; 036 037 038 039/** 040 * A class containing methods to find interfaces in a given crystallographic Structure by 041 * reconstructing the crystal lattice through application of symmetry operators 042 * 043 * @author duarte_j 044 * 045 */ 046public class CrystalBuilder { 047 048 // Default number of cell neighbors to try in interface search (in 3 directions of space). 049 // In the search, only bounding box overlaps are tried, thus there's not so much overhead in adding 050 // more cells. We actually tested it and using numCells from 1 to 10 didn't change runtimes at all. 051 // Examples with interfaces in distant neighbor cells: 052 // 2nd neighbors: 3hz3, 1wqj, 2de3, 1jcd 053 // 3rd neighbors: 3bd3, 1men, 2gkp, 1wui 054 // 5th neighbors: 2ahf, 2h2z 055 // 6th neighbors: 1was (in fact interfaces appear only at 5th neighbors for it) 056 // Maybe this could be avoided by previously translating the given molecule to the first cell, 057 // BUT! some bona fide cases exist, e.g. 2d3e: it is properly placed at the origin but the molecule 058 // is enormously long in comparison with the dimensions of the unit cell, some interfaces come at the 7th neighbor. 059 // After a scan of the whole PDB (Oct 2013) using numCells=50, the highest one was 4jgc with 060 // interfaces up to the 11th neighbor. Other high ones (9th neighbors) are 4jbm and 4k3t. 061 // We set the default value to 12 based on that (having not seen any difference in runtime) 062 public static final int DEF_NUM_CELLS = 12; 063 064 /** 065 * Default maximum distance between two chains to be considered an interface. 066 * @see #getUniqueInterfaces(double) 067 */ 068 public static final double DEFAULT_INTERFACE_DISTANCE_CUTOFF = 5.5; 069 070 public static final Matrix4d IDENTITY = new Matrix4d(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1); 071 072 073 /** 074 * Whether to consider HETATOMs in contact calculations 075 */ 076 private static final boolean INCLUDE_HETATOMS = true; 077 078 private Structure structure; 079 private PDBCrystallographicInfo crystallographicInfo; 080 private int numChainsAu; 081 private int numOperatorsSg; 082 083 private static final Logger logger = LoggerFactory.getLogger(CrystalBuilder.class); 084 085 private int numCells; 086 087 private ArrayList<CrystalTransform> visited; 088 089 private boolean isCrystallographic; 090 091 092 093 public CrystalBuilder(Structure structure) { 094 this.structure = structure; 095 096 this.crystallographicInfo = structure.getCrystallographicInfo(); 097 098 this.numChainsAu = structure.getChains().size(); 099 this.numOperatorsSg = 1; 100 101 102 if (structure.isCrystallographic()) { 103 104 this.isCrystallographic = true; 105 106 // we need to check space group not null for the cases where the entry is crystallographic but 107 // the space group is not a standard one recognized by biojava, e.g. 1mnk (SG: 'I 21') 108 if (this.crystallographicInfo.isNonStandardSg()) { 109 logger.warn("Space group is non-standard, will only calculate asymmetric unit interfaces."); 110 this.isCrystallographic = false; 111 112 } else if (this.crystallographicInfo.getSpaceGroup()==null) { 113 // just in case we still check for space group null (a user pdb file could potentially be crystallographic and have no space group) 114 logger.warn("Space group is null, will only calculate asymmetric unit interfaces."); 115 this.isCrystallographic = false; 116 } else { 117 this.numOperatorsSg = this.crystallographicInfo.getSpaceGroup().getMultiplicity(); 118 } 119 120 // we need to check crystal cell not null for the rare cases where the entry is crystallographic but 121 // the crystal cell is not given, e.g. 2i68, 2xkm, 4bpq 122 if (this.crystallographicInfo.getCrystalCell()==null) { 123 logger.warn("Could not find a crystal cell definition, will only calculate asymmetric unit interfaces."); 124 this.isCrystallographic = false; 125 } 126 127 // check for cases like 4hhb that are in a non-standard coordinate frame convention, see https://github.com/eppic-team/owl/issues/4 128 if (this.crystallographicInfo.isNonStandardCoordFrameConvention()) { 129 logger.warn("Non-standard coordinate frame convention, will only calculate asymmetric unit interfaces."); 130 this.isCrystallographic = false; 131 this.numOperatorsSg = 1; // we force here to 1 or otherwise it gets filled from sg above 132 } 133 134 } else { 135 this.isCrystallographic = false; 136 } 137 138 this.numCells = DEF_NUM_CELLS; 139 140 } 141 142 /** 143 * Set the number of neighboring crystal cells that will be used in the search for contacts 144 * @param numCells 145 */ 146 public void setNumCells(int numCells) { 147 this.numCells = numCells; 148 } 149 150 private void initialiseVisited() { 151 visited = new ArrayList<CrystalTransform>(); 152 } 153 154 155 156 /** 157 * Returns the list of unique interfaces that the given Structure has upon 158 * generation of all crystal symmetry mates. An interface is defined as any pair of chains 159 * that contact, i.e. for which there is at least a pair of atoms (one from each chain) within 160 * the default cutoff distance. 161 * @return 162 * @see #DEFAULT_INTERFACE_DISTANCE_CUTOFF 163 */ 164 public StructureInterfaceList getUniqueInterfaces() { 165 return getUniqueInterfaces(DEFAULT_INTERFACE_DISTANCE_CUTOFF); 166 } 167 168 /** 169 * Returns the list of unique interfaces that the given Structure has upon 170 * generation of all crystal symmetry mates. An interface is defined as any pair of chains 171 * that contact, i.e. for which there is at least a pair of atoms (one from each chain) within 172 * the given cutoff distance. 173 * @param cutoff the distance cutoff for 2 chains to be considered in contact 174 * @return 175 */ 176 public StructureInterfaceList getUniqueInterfaces(double cutoff) { 177 178 179 StructureInterfaceList set = new StructureInterfaceList(); 180 181 // certain structures in the PDB are not macromolecules (contain no polymeric chains at all), e.g. 1ao2 182 // with the current mmCIF parsing, those will be empty since purely non-polymeric chains are removed 183 // see commit e9562781f23da0ebf3547146a307d7edd5741090 184 if (structure.getChains().size()==0) { 185 logger.warn("No chains present in the structure! No interfaces will be calculated"); 186 return set; 187 } 188 189 190 191 // initialising the visited ArrayList for keeping track of symmetry redundancy 192 initialiseVisited(); 193 194 195 196 // the isCrystallographic() condition covers 3 cases: 197 // a) entries with expMethod X-RAY/other diffraction and defined crystalCell (most usual case) 198 // b) entries with expMethod null but defined crystalCell (e.g. PDB file with CRYST1 record but no expMethod annotation) 199 // c) entries with expMethod not X-RAY (e.g. NMR) and defined crystalCell (NMR entries do have a dummy CRYST1 record "1 1 1 90 90 90 P1") 200 // d) isCrystallographic will be false if the structure is crystallographic but the space group was not recognized 201 202 203 calcInterfacesCrystal(set, cutoff); 204 205 206 return set; 207 } 208 209 /** 210 * Calculate interfaces between original asymmetric unit and neighboring 211 * whole unit cells, including the original full unit cell i.e. i=0,j=0,k=0 212 * @param set 213 * @param cutoff 214 */ 215 private void calcInterfacesCrystal(StructureInterfaceList set, double cutoff) { 216 217 218 // initialising debugging vars 219 long start = -1; 220 long end = -1; 221 int trialCount = 0; 222 int skippedRedundant = 0; 223 int skippedAUsNoOverlap = 0; 224 int skippedChainsNoOverlap = 0; 225 int skippedSelfEquivalent = 0; 226 227 228 Matrix4d[] ops = null; 229 if (isCrystallographic) { 230 ops = structure.getCrystallographicInfo().getTransformationsOrthonormal(); 231 } else { 232 ops = new Matrix4d[1]; 233 ops[0] = new Matrix4d(IDENTITY); 234 } 235 236 // The bounding boxes of all AUs of the unit cell 237 UnitCellBoundingBox bbGrid = new UnitCellBoundingBox(numOperatorsSg, numChainsAu);; 238 // we calculate all the bounds of each of the asym units, those will then be reused and translated 239 bbGrid.setBbs(structure, ops, INCLUDE_HETATOMS); 240 241 242 // if not crystallographic there's no search to do in other cells, only chains within "AU" will be checked 243 if (!isCrystallographic) numCells = 0; 244 245 boolean verbose = logger.isDebugEnabled(); 246 247 if (verbose) { 248 trialCount = 0; 249 start= System.currentTimeMillis(); 250 int neighbors = (2*numCells+1)*(2*numCells+1)*(2*numCells+1)-1; 251 int auTrials = (numChainsAu*(numChainsAu-1))/2; 252 int trials = numChainsAu*numOperatorsSg*numChainsAu*neighbors; 253 logger.debug("Chain clash trials within original AU: "+auTrials); 254 logger.debug( 255 "Chain clash trials between the original AU and the neighbouring "+neighbors+ 256 " whole unit cells ("+numCells+" neighbours)" + 257 "(2x"+numChainsAu+"chains x "+numOperatorsSg+"AUs x "+neighbors+"cells) : "+trials); 258 logger.debug("Total trials: "+(auTrials+trials)); 259 } 260 261 262 for (int a=-numCells;a<=numCells;a++) { 263 for (int b=-numCells;b<=numCells;b++) { 264 for (int c=-numCells;c<=numCells;c++) { 265 266 Point3i trans = new Point3i(a,b,c); 267 Vector3d transOrth = new Vector3d(a,b,c); 268 if (a!=0 || b!=0 || c!=0) 269 // we avoid doing the transformation for 0,0,0 (in case it's not crystallographic) 270 this.crystallographicInfo.getCrystalCell().transfToOrthonormal(transOrth); 271 UnitCellBoundingBox bbGridTrans = bbGrid.getTranslatedBbs(transOrth); 272 273 for (int n=0;n<numOperatorsSg;n++) { 274 275 // short-cut strategies 276 // 1) we skip first of all if the bounding boxes of the AUs don't overlap 277 if (!bbGrid.getAuBoundingBox(0).overlaps(bbGridTrans.getAuBoundingBox(n), cutoff)) { 278 skippedAUsNoOverlap++; 279 continue; 280 } 281 282 // 2) we check if we didn't already see its equivalent symmetry operator partner 283 CrystalTransform tt = new CrystalTransform(this.crystallographicInfo.getSpaceGroup(), n); 284 tt.translate(trans); 285 if (isRedundant(tt)) { 286 skippedRedundant++; 287 continue; 288 } 289 addVisited(tt); 290 291 292 boolean selfEquivalent = false; 293 294 // 3) an operator can be "self redundant" if it is the inverse of itself (involutory, e.g. all pure 2-folds with no translation) 295 if (tt.isEquivalent(tt)) { 296 logger.debug("Transform "+tt+" is equivalent to itself, will skip half of i-chains to j-chains comparisons"); 297 // in this case we can't skip the operator, but we can skip half of the matrix comparisons e.g. j>i 298 // we set a flag and do that within the loop below 299 selfEquivalent = true; 300 } 301 302 StringBuilder builder = null; 303 if (verbose) builder = new StringBuilder(tt+" "); 304 305 // Now that we know that boxes overlap and operator is not redundant, we have to go to the details 306 int contactsFound = 0; 307 308 for (int j=0;j<numChainsAu;j++) { 309 for (int i=0;i<numChainsAu;i++) { // we only have to compare the original asymmetric unit to every full cell around 310 311 if(selfEquivalent && (j>i)) { 312 // in case of self equivalency of the operator we can safely skip half of the matrix 313 skippedSelfEquivalent++; 314 continue; 315 } 316 // special case of original AU, we don't compare a chain to itself 317 if (n==0 && a==0 && b==0 && c==0 && i==j) continue; 318 319 // before calculating the AtomContactSet we check for overlap, then we save putting atoms into the grid 320 if (!bbGrid.getChainBoundingBox(0,i).overlaps(bbGridTrans.getChainBoundingBox(n,j),cutoff)) { 321 skippedChainsNoOverlap++; 322 if (verbose) { 323 builder.append("."); 324 } 325 continue; 326 } 327 trialCount++; 328 329 // finally we've gone through all short-cuts and the 2 chains seem to be close enough: 330 // we do the calculation of contacts 331 Chain chainj = null; 332 Chain chaini = structure.getChain(i); 333 334 if (n==0 && a==0 && b==0 && c==0) { 335 chainj = structure.getChain(j); 336 } else { 337 chainj = (Chain)structure.getChain(j).clone(); 338 Matrix4d m = new Matrix4d(ops[n]); 339 translate(m, transOrth); 340 Calc.transform(chainj,m); 341 } 342 343 StructureInterface interf = calcContacts(chaini, chainj, cutoff, tt, builder); 344 345 if (interf!=null) { 346 contactsFound++; 347 set.add(interf); 348 } 349 } 350 } 351 352 if( verbose ) { 353 if (a==0 && b==0 && c==0 && n==0) 354 builder.append(" "+contactsFound+"("+(numChainsAu*(numChainsAu-1))/2+")"); 355 else if (selfEquivalent) 356 builder.append(" "+contactsFound+"("+(numChainsAu*(numChainsAu+1))/2+")"); 357 else 358 builder.append(" "+contactsFound+"("+numChainsAu*numChainsAu+")"); 359 360 logger.debug(builder.toString()); 361 } 362 } 363 } 364 } 365 } 366 367 end = System.currentTimeMillis(); 368 logger.debug("\n"+trialCount+" chain-chain clash trials done. Time "+(end-start)/1000+"s"); 369 logger.debug(" skipped (not overlapping AUs) : "+skippedAUsNoOverlap); 370 logger.debug(" skipped (not overlapping chains) : "+skippedChainsNoOverlap); 371 logger.debug(" skipped (sym redundant op pairs) : "+skippedRedundant); 372 logger.debug(" skipped (sym redundant self op) : "+skippedSelfEquivalent); 373 374 logger.debug("Found "+set.size()+" interfaces."); 375 } 376 377 private StructureInterface calcContacts(Chain chaini, Chain chainj, double cutoff, CrystalTransform tt, StringBuilder builder) { 378 // note that we don't consider hydrogens when calculating contacts 379 AtomContactSet graph = StructureTools.getAtomsInContact(chaini, chainj, cutoff, INCLUDE_HETATOMS); 380 381 if (graph.size()>0) { 382 if (builder != null) builder.append("x"); 383 384 CrystalTransform transf = new CrystalTransform(this.crystallographicInfo.getSpaceGroup()); 385 StructureInterface interf = new StructureInterface( 386 StructureTools.getAllAtomArray(chaini), StructureTools.getAllAtomArray(chainj), 387 chaini.getChainID(), chainj.getChainID(), 388 graph, 389 transf, tt); 390 391 return interf; 392 393 } else { 394 if (builder != null) builder.append("o"); 395 return null; 396 } 397 } 398 399 private void addVisited(CrystalTransform tt) { 400 visited.add(tt); 401 } 402 403 /** 404 * Checks whether given transformId/translation is symmetry redundant 405 * Two transformations are symmetry redundant if their matrices (4d) multiplication gives the identity, i.e. 406 * if one is the inverse of the other. 407 * @param tt 408 * @return 409 */ 410 private boolean isRedundant(CrystalTransform tt) { 411 412 Iterator<CrystalTransform> it = visited.iterator(); 413 while (it.hasNext()) { 414 CrystalTransform v = it.next(); 415 416 if (tt.isEquivalent(v)) { 417 418 logger.debug("Skipping redundant transformation: "+tt+", equivalent to "+v); 419 420 // there's only 1 possible equivalent partner for each visited matrix 421 // (since the equivalent is its inverse matrix and the inverse matrix is unique) 422 // thus once the partner has been seen, we don't need to check it ever again 423 it.remove(); 424 425 return true; 426 } 427 } 428 429 return false; 430 } 431 432 public void translate(Matrix4d m, Vector3d translation) { 433 m.m03 = m.m03+translation.x; 434 m.m13 = m.m13+translation.y; 435 m.m23 = m.m23+translation.z; 436 } 437 438// /** 439// * If NCS operators are given in MTRIX records, a bigger AU has to be constructed based on those. 440// * Later they have to be removed with {@link #removeExtraChains()} 441// */ 442// private void constructFullStructure() { 443// 444// if (this.crystallographicInfo.getNcsOperators()==null || 445// this.crystallographicInfo.getNcsOperators().length==0) { 446// // normal case: nothing to do 447// return; 448// } 449// 450// // first we store the original chains in a new list to be able to restore the structure to its original state afterwards 451// origChains = new ArrayList<Chain>(); 452// for (Chain chain:structure.getChains()) { 453// origChains.add(chain); 454// } 455// 456// // if we are here, it means that the NCS operators exist and we have to complete the given AU by applying them 457// Matrix4d[] ncsOps = this.crystallographicInfo.getNcsOperators(); 458// 459// if (verbose) 460// System.out.println(ncsOps.length+" NCS operators found, generating new AU..."); 461// 462// 463// for (int i=0;i<ncsOps.length;i++) { 464// Structure transformedStruct = (Structure)structure.clone(); 465// Calc.transform(transformedStruct, ncsOps[i]); 466// 467// for (Chain chain: transformedStruct.getChains()) { 468// // we assign a new AU id (chain ID) consisting in original chain ID + an operator index from 1 to n 469// chain.setChainID(chain.getChainID()+(i+1)); 470// structure.addChain(chain); 471// } 472// } 473// 474// // now we have more chains in AU, we have to update the value 475// this.numChainsAu = structure.getChains().size(); 476// } 477// 478// /** 479// * Removes the extra chains that were added to the original structure in {@link #constructFullStructure()} 480// */ 481// private void removeExtraChains() { 482// structure.setChains(origChains); 483// } 484}