001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 2013-05-23 021 * 022 */ 023package org.biojava.nbio.structure.symmetry.core; 024 025import org.biojava.nbio.structure.Structure; 026import org.biojava.nbio.structure.symmetry.utils.CombinationGenerator; 027import org.jgrapht.UndirectedGraph; 028import org.jgrapht.alg.ConnectivityInspector; 029import org.jgrapht.graph.DefaultEdge; 030import org.jgrapht.graph.UndirectedSubgraph; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import javax.vecmath.Point3d; 035 036import java.math.BigInteger; 037import java.util.*; 038 039/** 040 * Detects global and local quaternary protein structure symmetry in a structure. 041 * 042 * The QuatSymmetryParameter settings affect the calculated results. 043 * 044 * @author Peter Rose 045 * 046 */ 047public class QuatSymmetryDetector { 048 049 private static final Logger logger = LoggerFactory 050 .getLogger(QuatSymmetryDetector.class); 051 052 private Structure structure = null; 053 private QuatSymmetryParameters parameters = null; 054 055 private List<QuatSymmetryResults> globalSymmetry = new ArrayList<QuatSymmetryResults>(); 056 private List<List<QuatSymmetryResults>> localSymmetries = new ArrayList<List<QuatSymmetryResults>>(); 057 private int proteinChainCount = 0; 058 private boolean complete = false; 059 060 public QuatSymmetryDetector(Structure structure, QuatSymmetryParameters parameters) { 061 this.structure = structure; 062 this.parameters = parameters; 063 } 064 065 /** 066 * Returns true if structure contains protein subunits. The other methods 067 * in this class will only return data if protein subunits are present. 068 * Always use this method first, before retrieving global or local symmetry 069 * results. 070 * 071 * @return true if protein subunits are present 072 */ 073 public boolean hasProteinSubunits() { 074 run(); 075 return proteinChainCount > 0; 076 } 077 078 /** 079 * Returns list of global quaternary structure symmetry results 080 * 081 * @return list of global quaternary structure symmetry results 082 */ 083 public List<QuatSymmetryResults> getGlobalSymmetry() { 084 run(); 085 return globalSymmetry; 086 } 087 088 /** 089 * Returns a list of lists of local quaternary structure symmetry results 090 * 091 * @return list of lists of local quaternary structure symmetry results 092 */ 093 public List<List<QuatSymmetryResults>> getLocalSymmetries() { 094 run(); 095 return localSymmetries; 096 } 097 098 private void run() { 099 if (complete) { 100 return; 101 } 102 complete = true; 103 //Cluster chains by sequence 104 ClusterProteinChains clusterer = new ClusterProteinChains(structure, parameters); 105 proteinChainCount = clusterer.getProteinChainCount(); 106 107 if (! hasProteinSubunits()) { 108 return; 109 } 110 111 int nucleicAcidChainCount = clusterer.getNucleicAcidChainCount(); 112 113 // sort seq. identity thresholds from smallest to largest. This reduces the total number of calculations necessary. 114 double[] thresholds = parameters.getSequenceIdentityThresholds().clone(); 115 Arrays.sort(thresholds); 116 117 for (int index = 0; index < thresholds.length; index++) { 118 // Map structure info to the sequence cluster 119 ChainClusterer chainClusterer = new ChainClusterer(clusterer.getSequenceAlignmentClusters(thresholds[index])); 120 121 // determine global symmetry 122 Subunits globalSubunits = createGlobalSubunits(chainClusterer, nucleicAcidChainCount); 123 QuatSymmetryResults gSymmetry = calcQuatSymmetry(globalSubunits); 124 gSymmetry.setSequenceIdentityThreshold(thresholds[index]); 125 globalSymmetry.add(gSymmetry); 126// SymmetryDeviation sd = new SymmetryDeviation(globalSubunits, gSymmetry.getRotationGroup()); 127 128 129 // determine local symmetry if global structure is 130 // (1) asymmetric (C1) 131 // (2) heteromeric (belongs to more than 1 sequence cluster) 132 // (3) more than 2 chains (heteromers with just 2 chains cannot have local symmetry) 133 134 // TODO example 2PT7: global C2, but local C6 symm., should that be included here ...? 135 // i.e., include all heteromers here, for example if higher symmetry is possible by stoichiometry? A6B2 -> local A6 can have higher symmetry 136 if (parameters.isLocalSymmetry() && globalSubunits.getSubunitCount() <= parameters.getMaximumLocalSubunits()) { 137 if (gSymmetry.getSymmetry().equals("C1") && proteinChainCount > 2) { 138 List<QuatSymmetryResults> lSymmetry = new ArrayList<QuatSymmetryResults>(); 139 140 long start = System.nanoTime(); 141 142 for (Subunits subunits: createLocalSubunits(chainClusterer)) { 143 QuatSymmetryResults result = calcQuatSymmetry(subunits); 144 addToLocalSymmetry(result, lSymmetry); 145 146 double time = (System.nanoTime()- start)/1000000000; 147 if (time > parameters.getLocalTimeLimit()) { 148 logger.warn("Exceeded time limit for local symmetry calculations: " + time + 149 " seconds. Quat symmetry results may be incomplete"); 150 break; 151 } 152 } 153 localSymmetries.add(lSymmetry); 154 } 155 } 156 157 if (! gSymmetry.getSubunits().isPseudoStoichiometric()) { 158 break; 159 } 160 } 161 162 163 trimGlobalSymmetryResults(); 164 trimLocalSymmetryResults(); 165 setPseudoSymmetry(); 166 setPreferredResults(); 167 } 168 169 170 /** 171 * trims asymmetric global symmetry results that are C1 and have pseudoStoichiometry 172 */ 173 private void trimGlobalSymmetryResults() { 174 for (Iterator<QuatSymmetryResults> iter = globalSymmetry.iterator(); iter.hasNext();) { 175 QuatSymmetryResults result = iter.next(); 176 if (result.getSymmetry().equals("C1") && result.getSubunits().isPseudoStoichiometric()) { 177 iter.remove(); 178 } 179 } 180 } 181 182 /** 183 * trims local symmetry results if any global symmetry is found. This only happens in special cases. 184 */ 185 private void trimLocalSymmetryResults() { 186 boolean hasGlobalSymmetry = false; 187 for (QuatSymmetryResults result: globalSymmetry) { 188 if (! result.getSymmetry().equals("C1")) { 189 hasGlobalSymmetry = true; 190 break; 191 } 192 } 193 194 if (hasGlobalSymmetry) { 195 localSymmetries.clear(); 196 } 197 } 198 199 /** 200 * Sets pseudosymmetry flag for results that have pseudosymmetry 201 * @param thresholds sequence identity thresholds 202 */ 203 private void setPseudoSymmetry() { 204 setPseudoSymmetry(globalSymmetry); 205 for (List<QuatSymmetryResults> localSymmetry: localSymmetries) { 206 setPseudoSymmetry(localSymmetry); 207 } 208 } 209 210 /** 211 * Sets pseudosymmetry based on the analysis of all symmetry results 212 */ 213 private void setPseudoSymmetry(List<QuatSymmetryResults> results) { 214 // find maximum symmetry order for cases of non-pseudostoichiometry 215 int maxOrder = 0; 216 for (QuatSymmetryResults result: results) { 217 if (result.getRotationGroup() != null && !result.getSubunits().isPseudoStoichiometric()) { 218 if (result.getRotationGroup().getOrder() > maxOrder) { 219 maxOrder = result.getRotationGroup().getOrder(); 220 } 221 } 222 } 223 224 // if the order of symmetry for a pseudstoichiometric case is higher 225 // than the order for a non-pseudostoichiometry, set the pseudoSymmetry flag to true (i.e. 4HHB) 226 for (QuatSymmetryResults result: results) { 227 if (result.getRotationGroup() != null) { 228 if (result.getRotationGroup().getOrder() > maxOrder) { 229 result.getSubunits().setPseudoSymmetric(true); 230 } 231 } 232 } 233 } 234 235 /** 236 * Sets preferred results flag for symmetry result that should be shown by default in visualization programs 237 * @param thresholds sequence identity thresholds 238 */ 239 private void setPreferredResults() { 240 int[] score = new int[globalSymmetry.size()]; 241 242 // check global symmetry 243 for (int i = 0; i < globalSymmetry.size(); i++) { 244 QuatSymmetryResults result = globalSymmetry.get(i); 245 if (! result.getSymmetry().equals("C1")) { 246 score[i] += 2; 247 } 248 if (! result.getSubunits().isPseudoStoichiometric()) { 249 score[i]++; 250 } 251 } 252 253 int bestGlobal = 0; 254 int bestScore = 0; 255 for (int i = 0; i < score.length; i++) { 256 if (score[i] > bestScore) { 257 bestScore = score[i]; 258 bestGlobal = i; 259 } 260 } 261 if (bestScore >= 2) { 262 QuatSymmetryResults g = globalSymmetry.get(bestGlobal); 263 g.setPreferredResult(true); 264 return; 265 } 266 267 // check local symmetry 268 score = new int[localSymmetries.size()]; 269 270 for (int i = 0; i < localSymmetries.size(); i++) { 271 List<QuatSymmetryResults> results = localSymmetries.get(i); 272 for (QuatSymmetryResults result: results) { 273 if (! result.getSymmetry().equals("C1")) { 274 score[i] += 2; 275 } 276 if (! result.getSubunits().isPseudoStoichiometric()) { 277 score[i]++; 278 } 279 } 280 } 281 282 int bestLocal = 0; 283 bestScore = 0; 284 for (int i = 0; i < score.length; i++) { 285 if (score[i] > bestScore) { 286 bestScore = score[i]; 287 bestLocal = i; 288 } 289 } 290 if (bestScore > 0) { 291 List<QuatSymmetryResults> results = localSymmetries.get(bestLocal); 292 for (QuatSymmetryResults result: results) { 293 result.setPreferredResult(true); 294 } 295 } else { 296 QuatSymmetryResults g = globalSymmetry.get(bestGlobal); 297 g.setPreferredResult(true); 298 } 299 } 300 301 private void addToLocalSymmetry(QuatSymmetryResults testResults, List<QuatSymmetryResults> localSymmetry) { 302 if (testResults.getSymmetry().equals("C1")) { 303 return; 304 } 305 306 for (QuatSymmetryResults results: localSymmetry) { 307 if (results.getSubunits().overlaps(testResults.getSubunits())) { 308 return; 309 } 310 } 311 testResults.setLocal(true); 312 localSymmetry.add(testResults); 313 } 314 315 private Subunits createGlobalSubunits(ChainClusterer chainClusterer, int nucleicAcidChainCount) { 316 Subunits subunits = new Subunits(chainClusterer.getCalphaCoordinates(), 317 chainClusterer.getSequenceClusterIds(), 318 chainClusterer.getPseudoStoichiometry(), 319 chainClusterer.getMinSequenceIdentity(), 320 chainClusterer.getMaxSequenceIdentity(), 321 chainClusterer.getFolds(), 322 chainClusterer.getChainIds(), 323 chainClusterer.getModelNumbers()); 324 subunits.setNucleicAcidChainCount(nucleicAcidChainCount); 325 return subunits; 326 } 327 328 private List<Subunits> createLocalSubunits(ChainClusterer chainClusterer) { 329 List<Subunits> subunits = new ArrayList<Subunits>(); 330 List<List<Integer>> subClusters = decomposeClusters(chainClusterer.getCalphaCoordinates(), chainClusterer.getSequenceClusterIds()); 331 for (List<Integer> subCluster: subClusters) { 332 subunits.add(createLocalSubunit(subCluster, chainClusterer)); 333 } 334 return subunits; 335 } 336 337 private Subunits createLocalSubunit(List<Integer> subCluster, ChainClusterer chainClusterer) { 338 List<Point3d[]> subCalphaCoordinates = new ArrayList<Point3d[]>(subCluster.size()); 339 List<Integer> subSequenceIds = new ArrayList<Integer>(subCluster.size()); 340 List<Boolean> subPseudoStoichiometry = new ArrayList<Boolean>(subCluster.size()); 341 List<Double> subMinSequenceIdentity = new ArrayList<Double>(subCluster.size()); 342 List<Double> subMaxSequenceIdentity = new ArrayList<Double>(subCluster.size()); 343 List<String> subChainIds = new ArrayList<String>(subCluster.size()); 344 List<Integer> subModelNumbers = new ArrayList<Integer>(subCluster.size()); 345 346 for (int index: subCluster) { 347 subCalphaCoordinates.add(chainClusterer.getCalphaCoordinates().get(index)); 348 subSequenceIds.add(chainClusterer.getSequenceClusterIds().get(index)); 349 subPseudoStoichiometry.add(chainClusterer.getPseudoStoichiometry().get(index)); 350 subMinSequenceIdentity.add(chainClusterer.getMinSequenceIdentity().get(index)); 351 subMaxSequenceIdentity.add(chainClusterer.getMaxSequenceIdentity().get(index)); 352 subChainIds.add(chainClusterer.getChainIds().get(index)); 353 subModelNumbers.add(chainClusterer.getModelNumbers().get(index)); 354 } 355 356 standardizeSequenceIds(subSequenceIds); 357 358 Integer[] array = subSequenceIds.toArray(new Integer[subSequenceIds.size()]); 359 List<Integer> subFolds = getFolds(array, subSequenceIds.size()); 360 Subunits subunits = new Subunits(subCalphaCoordinates, 361 subSequenceIds, 362 subPseudoStoichiometry, 363 subMinSequenceIdentity, 364 subMaxSequenceIdentity, 365 subFolds, 366 subChainIds, 367 subModelNumbers); 368 return subunits; 369 } 370 371 /** 372 * Resets list of arbitrary sequence ids into integer order: 0, 1, ... 373 * @param subSequenceIds 374 */ 375 private static void standardizeSequenceIds(List<Integer> subSequenceIds) { 376 int count = 0; 377 int current = subSequenceIds.get(0); 378 for (int i = 0; i < subSequenceIds.size(); i++) { 379 if (subSequenceIds.get(i) > current) { 380 current = subSequenceIds.get(i); 381 count++; 382 } 383 subSequenceIds.set(i, count); 384 } 385 } 386 387 private List<List<Integer>> decomposeClusters(List<Point3d[]> caCoords, List<Integer> clusterIds) { 388 List<List<Integer>> subClusters = new ArrayList<List<Integer>>(); 389 390 int last = getLastMultiSubunit(clusterIds); 391 List<Point3d[]> subList = caCoords; 392 if (last < caCoords.size()) { 393 subList = caCoords.subList(0, last); 394 } else { 395 last = caCoords.size(); 396 } 397 398 SubunitGraph subunitGraph = new SubunitGraph(subList); 399 UndirectedGraph<Integer, DefaultEdge> graph = subunitGraph.getProteinGraph(); 400 logger.debug("Graph: {}", graph.toString()); 401 402 for (int i = last; i > 1; i--) { 403 CombinationGenerator generator = new CombinationGenerator(last, i); 404 int[] indices = null; 405 Integer[] subCluster = new Integer[i]; 406 407 // avoid combinatorial explosion, i.e. for 1FNT 408 BigInteger maxCombinations = BigInteger.valueOf(parameters.getMaximumLocalCombinations()); 409 logger.debug("Number of combinations: {}", generator.getTotal()); 410 if (generator.getTotal().compareTo(maxCombinations) > 0) { 411 logger.warn("Number of combinations exceeds limit for biounit with {} subunits in groups of {} subunits. Will not check local symmetry for them", last, i); 412 continue; 413 } 414 415 while (generator.hasNext()) { 416 indices = generator.getNext(); 417 418 419 // only consider sub clusters that can have rotational symmetry based on the number of subunits 420 // TODO this however may exclude a few cases of helical symmetry. Checking all combinations for 421 // helical symmetry would be prohibitively slow. 422 for (int j = 0; j < indices.length; j++) { 423 subCluster[j] = clusterIds.get(indices[j]); 424 } 425 List<Integer> folds = getFolds(subCluster, last); 426 427 if (folds.size() < 2) { 428 continue; 429 } 430 431 List<Integer> subSet = new ArrayList<Integer>(indices.length); 432 for (int index: indices) { 433 subSet.add(index); 434 } 435 436 // check if this subset of subunits interact with each other 437 UndirectedGraph<Integer, DefaultEdge> subGraph = new UndirectedSubgraph<Integer, DefaultEdge>(graph, new HashSet<Integer>(subSet), null); 438 if (isConnectedGraph(subGraph)) { 439 subClusters.add(subSet); 440 if (subClusters.size() > parameters.getMaximumLocalResults()) { 441 return subClusters; 442 } 443 } 444 } 445 } 446 447 return subClusters; 448 } 449 450 private static int getLastMultiSubunit(List<Integer> clusterIds) { 451 for (int i = 0, n = clusterIds.size(); i < n; i++) { 452 if (i < n-2) { 453 if (clusterIds.get(i)!=clusterIds.get(i+1) && 454 clusterIds.get(i+1) != clusterIds.get(i+2)) { 455 return i+1; 456 } 457 } 458 if (i == n-2) { 459 if (clusterIds.get(i)!=clusterIds.get(i+1)) { 460 return i+1; 461 } 462 } 463 } 464 return clusterIds.size(); 465 } 466 467 private static boolean isConnectedGraph(UndirectedGraph<Integer, DefaultEdge> graph) { 468 ConnectivityInspector<Integer, DefaultEdge> inspector = new ConnectivityInspector<Integer, DefaultEdge>(graph); 469 return inspector.isGraphConnected(); 470 } 471 472 private static List<Integer> getFolds(Integer[] subCluster, int size) { 473 List<Integer> denominators = new ArrayList<Integer>(); 474 int[] counts = new int[size]; 475 for (int element: subCluster) { 476 counts[element]++; 477 } 478 479 for (int d = 1; d <= subCluster.length; d++) { 480 int count = 0; 481 for (int i = 0; i < size; i++) { 482 if (counts[i] > 0 && (counts[i] % d == 0)) { 483 count += counts[i]; 484 } 485 } 486 if (count == subCluster.length) { 487 denominators.add(d); 488 } 489 } 490 491 Collections.sort(denominators); 492 return denominators; 493 } 494 495 private QuatSymmetryResults calcQuatSymmetry(Subunits subunits){ 496 return calcQuatSymmetry(subunits, parameters); 497 } 498 499 public static QuatSymmetryResults calcQuatSymmetry(Subunits subunits, 500 QuatSymmetryParameters parameters) { 501 if (subunits.getSubunitCount() == 0) { 502 return null; 503 } 504 505 RotationGroup rotationGroup = null; 506 String method = null; 507 if (subunits.getFolds().size() == 1) { 508 // no symmetry possible, create empty ("C1") rotation group 509 method = "norotation"; 510 rotationGroup = new RotationGroup(); 511 rotationGroup.setC1(subunits.getSubunitCount()); 512 } else if (subunits.getSubunitCount() == 2 && subunits.getFolds().contains(2)) { 513 method = "C2rotation"; 514 QuatSymmetrySolver solver = new C2RotationSolver(subunits, parameters); 515 rotationGroup = solver.getSymmetryOperations(); 516 } else { 517 method = "rotation"; 518 QuatSymmetrySolver solver = new RotationSolver(subunits, parameters); 519 rotationGroup = solver.getSymmetryOperations(); 520 } 521 522 QuatSymmetryResults results = new QuatSymmetryResults(subunits, rotationGroup, method); 523 524 // asymmetric structures cannot be pseudosymmetric 525 String symmetry = results.getSymmetry(); 526 if (symmetry.equals("C1")) { 527 subunits.setPseudoSymmetric(false); 528 } 529 530 // Check structures with Cn symmetry (n = 1, ...) for helical symmetry 531 if (symmetry.startsWith("C")) { 532 HelixSolver hc = new HelixSolver(subunits, rotationGroup.getOrder(), parameters); 533 HelixLayers helixLayers = hc.getSymmetryOperations(); 534 535 if (helixLayers.size() > 0) { 536 // if the complex has no symmetry (c1) or has Cn (n>=2) symmetry and the 537 // helical symmetry has a lower RMSD than the cyclic symmetry, set helical symmetry 538 // If the RMSD for helical and cyclic symmetry is similar, a slight preference is 539 // given to the helical symmetry by the helixRmsdThreshold parameter. 540 double cRmsd = rotationGroup.getScores().getRmsd(); 541 double hRmsd = helixLayers.getScores().getRmsd(); 542// System.out.println("cRMSD: " + cRmsd + " hRMSD: " + hRmsd); 543 double deltaRmsd = hRmsd - cRmsd; 544 if (symmetry.equals("C1") || 545 (!symmetry.equals("C1") && deltaRmsd <= parameters.getHelixRmsdThreshold())) { 546 method = "rottranslation"; 547 results = new QuatSymmetryResults(subunits, helixLayers, method); 548 } 549 } 550 } 551 552 return results; 553 } 554}