001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.cluster; 022 023import org.biojava.nbio.alignment.Alignments; 024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType; 025import org.biojava.nbio.alignment.SimpleGapPenalty; 026import org.biojava.nbio.alignment.template.GapPenalty; 027import org.biojava.nbio.alignment.template.PairwiseSequenceAligner; 028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 029import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 030import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 031import org.biojava.nbio.core.sequence.ProteinSequence; 032import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.Chain; 035import org.biojava.nbio.structure.EntityInfo; 036import org.biojava.nbio.structure.Group; 037import org.biojava.nbio.structure.Structure; 038import org.biojava.nbio.structure.StructureException; 039import org.biojava.nbio.structure.align.StructureAlignment; 040import org.biojava.nbio.structure.align.StructureAlignmentFactory; 041import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams; 042import org.biojava.nbio.structure.align.model.AFPChain; 043import org.biojava.nbio.structure.align.multiple.Block; 044import org.biojava.nbio.structure.align.multiple.BlockImpl; 045import org.biojava.nbio.structure.align.multiple.BlockSet; 046import org.biojava.nbio.structure.align.multiple.BlockSetImpl; 047import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 048import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl; 049import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; 050import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 051import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer; 052import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 053import org.biojava.nbio.structure.symmetry.core.QuatSymmetrySubunits; 054import org.biojava.nbio.structure.symmetry.internal.CESymmParameters; 055import org.biojava.nbio.structure.symmetry.internal.CeSymm; 056import org.biojava.nbio.structure.symmetry.internal.CeSymmResult; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import java.lang.reflect.InvocationTargetException; 061import java.lang.reflect.Method; 062import java.util.*; 063import java.util.stream.Collectors; 064 065/** 066 * A SubunitCluster contains a set of equivalent {@link QuatSymmetrySubunits}, 067 * the set of equivalent residues (EQR) between {@link Subunit} and a 068 * {@link Subunit} representative. It also stores the method used for 069 * clustering. 070 * <p> 071 * This class allows the comparison and merging of SubunitClusters. 072 * 073 * @author Aleix Lafita 074 * @since 5.0.0 075 * 076 */ 077public class SubunitCluster { 078 079 private static final Logger logger = LoggerFactory.getLogger(SubunitCluster.class); 080 081 private List<Subunit> subunits = new ArrayList<>(); 082 private List<List<Integer>> subunitEQR = new ArrayList<>(); 083 private int representative = -1; 084 085 private SubunitClustererMethod method = SubunitClustererMethod.SEQUENCE; 086 private boolean pseudoStoichiometric = false; 087 088 /** 089 * A letter that is assigned to this cluster in stoichiometry. 090 */ 091 private String alpha = ""; 092 093 /** 094 * A letter that is assigned to this cluster in stoichiometry. 095 * 096 * @return alpha 097 * String 098 */ 099 100 public String getAlpha() { 101 return alpha; 102 } 103 104 /** 105 * A letter that is assigned to this cluster in stoichiometry. 106 * 107 * @param alpha 108 * String 109 */ 110 public void setAlpha(String alpha) { 111 this.alpha = alpha; 112 } 113 114 /** 115 * A constructor from a single Subunit. To obtain a 116 * SubunitCluster with multiple Subunits, initialize different 117 * SubunitClusters and merge them. 118 * 119 * @param subunit 120 * initial Subunit 121 */ 122 public SubunitCluster(Subunit subunit) { 123 124 subunits.add(subunit); 125 126 List<Integer> identity = new ArrayList<>(); 127 for (int i = 0; i < subunit.size(); i++) 128 identity.add(i); 129 subunitEQR.add(identity); 130 131 representative = 0; 132 } 133 134 /** 135 * A copy constructor with the possibility of removing subunits. 136 * No re-clustering is done. 137 * 138 * @param other 139 * reference SubunitCluster 140 * @param subunitsToRetain 141 * which subunits to copy to this cluster 142 */ 143 public SubunitCluster(SubunitCluster other, List<Integer> subunitsToRetain) { 144 method = other.method; 145 pseudoStoichiometric = other.pseudoStoichiometric; 146 for (int i = 0; i < other.subunits.size(); i++) { 147 if(subunitsToRetain.contains(i)) { 148 subunits.add(other.subunits.get(i)); 149 subunitEQR.add(other.subunitEQR.get(i)); 150 } 151 } 152 representative = 0; 153 for (int i=1; i<subunits.size(); i++) { 154 if (subunits.get(i).size() > subunits.get(representative).size()) { 155 representative = i; 156 } 157 } 158 setAlpha(other.getAlpha()); 159 } 160 161 /** 162 * Subunits contained in the SubunitCluster. 163 * 164 * @return an unmodifiable view of the original List 165 */ 166 public List<Subunit> getSubunits() { 167 return Collections.unmodifiableList(subunits); 168 } 169 170 /** 171 * Tells whether the other SubunitCluster contains exactly the same Subunit. 172 * This is checked by String equality of their residue one-letter sequences. 173 * 174 * @param other 175 * SubunitCluster 176 * @return true if the SubunitClusters are identical, false otherwise 177 */ 178 public boolean isIdenticalTo(SubunitCluster other) { 179 String thisSequence = this.subunits.get(this.representative) 180 .getProteinSequenceString(); 181 String otherSequence = other.subunits.get(other.representative) 182 .getProteinSequenceString(); 183 return thisSequence.equals(otherSequence); 184 } 185 186 /** 187 * Tells whether the other SubunitCluster contains exactly the same Subunit. 188 * This is checked by equality of their entity identifiers if they are present. 189 * 190 * @param other 191 * SubunitCluster 192 * @return true if the SubunitClusters are identical, false otherwise 193 */ 194 public boolean isIdenticalByEntityIdTo(SubunitCluster other) { 195 Subunit thisSub = this.subunits.get(this.representative); 196 Subunit otherSub = other.subunits.get(other.representative); 197 String thisName = thisSub.getName(); 198 String otherName = otherSub.getName(); 199 200 Structure thisStruct = thisSub.getStructure(); 201 Structure otherStruct = otherSub.getStructure(); 202 if (thisStruct == null || otherStruct == null) { 203 logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id", 204 thisName, 205 otherName); 206 return false; 207 } 208 if (thisStruct != otherStruct) { 209 // different object references: will not cluster even if entity id is same 210 return false; 211 } 212 Chain thisChain = thisStruct.getChain(thisName); 213 Chain otherChain = otherStruct.getChain(otherName); 214 if (thisChain == null || otherChain == null) { 215 logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", 216 thisName, 217 otherName); 218 return false; 219 } 220 if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) { 221 logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", 222 thisName, 223 otherName); 224 return false; 225 } 226 int thisEntityId = thisChain.getEntityInfo().getMolId(); 227 int otherEntityId = otherChain.getEntityInfo().getMolId(); 228 return thisEntityId == otherEntityId; 229 } 230 231 /** 232 * Merges the other SubunitCluster into this one if it contains exactly the 233 * same Subunit. This is checked by {@link #isIdenticalTo(SubunitCluster)}. 234 * 235 * @param other 236 * SubunitCluster 237 * @return true if the SubunitClusters were merged, false otherwise 238 */ 239 public boolean mergeIdentical(SubunitCluster other) { 240 241 if (!isIdenticalTo(other)) 242 return false; 243 244 logger.info("SubunitClusters {}-{} are identical in sequence", 245 this.subunits.get(this.representative).getName(), 246 other.subunits.get(other.representative).getName()); 247 248 this.subunits.addAll(other.subunits); 249 this.subunitEQR.addAll(other.subunitEQR); 250 251 return true; 252 } 253 254 /** 255 * Merges the other SubunitCluster into this one if it contains exactly the 256 * same Subunit. This is checked by comparing the entity identifiers of the subunits 257 * if one can be found. 258 * Thus this only makes sense when the subunits are complete chains of a 259 * deposited PDB entry. 260 * 261 * @param other 262 * SubunitCluster 263 * @return true if the SubunitClusters were merged, false otherwise 264 */ 265 public boolean mergeIdenticalByEntityId(SubunitCluster other) { 266 267 if (!isIdenticalByEntityIdTo(other)) 268 return false; 269 270 Subunit thisSub = this.subunits.get(this.representative); 271 Subunit otherSub = other.subunits.get(other.representative); 272 String thisName = thisSub.getName(); 273 String otherName = otherSub.getName(); 274 275 logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical", 276 thisName, 277 otherName); 278 279 List<Integer> thisAligned = new ArrayList<>(); 280 List<Integer> otherAligned = new ArrayList<>(); 281 282 // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo()) 283 Structure thisStruct = thisSub.getStructure(); 284 Structure otherStruct = otherSub.getStructure(); 285 Chain thisChain = thisStruct.getChain(thisName); 286 Chain otherChain = otherStruct.getChain(otherName); 287 EntityInfo entityInfo = thisChain.getEntityInfo(); 288 289 // Extract the aligned residues of both Subunits 290 for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) { 291 292 Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup(); 293 294 int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain); 295 296 if (seqresIndex == -1) { 297 // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing 298 continue; 299 } 300 301 // note the seqresindex is 1-based 302 Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1); 303 304 int otherIndex = otherChain.getAtomGroups().indexOf(otherG); 305 if (otherIndex == -1) { 306 // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment) 307 continue; 308 } 309 310 // Only consider residues that are part of the SubunitCluster 311 if (this.subunitEQR.get(this.representative).contains(thisIndex) 312 && other.subunitEQR.get(other.representative).contains(otherIndex)) { 313 thisAligned.add(thisIndex); 314 otherAligned.add(otherIndex); 315 } 316 } 317 318 if (thisAligned.size() == 0 && otherAligned.size() == 0) { 319 logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName); 320 } 321 322 updateEquivResidues(other, thisAligned, otherAligned); 323 324 return true; 325 } 326 327 /** 328 * Merges the other SubunitCluster into this one if their representatives 329 * sequences are similar (according to the criteria in params). 330 * <p> 331 * The sequence alignment is performed using linear {@link SimpleGapPenalty} and 332 * BLOSUM62 as scoring matrix. 333 * 334 * @param other 335 * SubunitCluster 336 * @param params 337 * SubunitClustererParameters, with information whether to use local 338 * or global alignment, sequence identity and coverage thresholds. 339 * Threshold values lower than 0.7 are not recommended. 340 * Use {@link #mergeStructure} for lower values. 341 * @return true if the SubunitClusters were merged, false otherwise 342 * @throws CompoundNotFoundException 343 */ 344 345 public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params) throws CompoundNotFoundException { 346 PairwiseSequenceAlignerType alignerType = PairwiseSequenceAlignerType.LOCAL; 347 if (params.isUseGlobalMetrics()) { 348 alignerType = PairwiseSequenceAlignerType.GLOBAL; 349 } 350 return mergeSequence(other, params,alignerType 351 , new SimpleGapPenalty(), 352 SubstitutionMatrixHelper.getBlosum62()); 353 } 354 355 /** 356 * Merges the other SubunitCluster into this one if their representatives 357 * sequences are similar (according to the criteria in params). 358 * <p> 359 * The sequence alignment is performed using linear {@link SimpleGapPenalty} and 360 * BLOSUM62 as scoring matrix. 361 * 362 * @param other 363 * SubunitCluster 364 * @param params 365 * {@link SubunitClustererParameters}, with information whether to use local 366 * or global alignment, sequence identity and coverage thresholds. 367 * Threshold values lower than 0.7 are not recommended. 368 * Use {@link #mergeStructure} for lower values. 369 * @param alignerType 370 * parameter for the sequence alignment algorithm 371 * @param gapPenalty 372 * parameter for the sequence alignment algorithm 373 * @param subsMatrix 374 * parameter for the sequence alignment algorithm 375 * @return true if the SubunitClusters were merged, false otherwise 376 * @throws CompoundNotFoundException 377 */ 378 379 public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params, 380 PairwiseSequenceAlignerType alignerType, 381 GapPenalty gapPenalty, 382 SubstitutionMatrix<AminoAcidCompound> subsMatrix) 383 throws CompoundNotFoundException { 384 385 // Extract the protein sequences as BioJava alignment objects 386 ProteinSequence thisSequence = this.subunits.get(this.representative) 387 .getProteinSequence(); 388 ProteinSequence otherSequence = other.subunits 389 .get(other.representative).getProteinSequence(); 390 391 // Perform the alignment with provided parameters 392 PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> aligner = Alignments 393 .getPairwiseAligner(thisSequence, otherSequence, alignerType, 394 gapPenalty, subsMatrix); 395 396 double sequenceIdentity; 397 if(params.isUseGlobalMetrics()) { 398 sequenceIdentity = aligner.getPair().getPercentageOfIdentity(true); 399 } else { 400 sequenceIdentity = aligner.getPair().getPercentageOfIdentity(false); 401 } 402 403 if (sequenceIdentity < params.getSequenceIdentityThreshold()) 404 return false; 405 406 double sequenceCoverage = 0; 407 if(params.isUseSequenceCoverage()) { 408 // Calculate real coverage (subtract gaps in both sequences) 409 double gaps1 = aligner.getPair().getAlignedSequence(1) 410 .getNumGapPositions(); 411 double gaps2 = aligner.getPair().getAlignedSequence(2) 412 .getNumGapPositions(); 413 double lengthAlignment = aligner.getPair().getLength(); 414 double lengthThis = aligner.getQuery().getLength(); 415 double lengthOther = aligner.getTarget().getLength(); 416 sequenceCoverage = (lengthAlignment - gaps1 - gaps2) 417 / Math.max(lengthThis, lengthOther); 418 419 if (sequenceCoverage < params.getSequenceCoverageThreshold()) 420 return false; 421 } 422 423 logger.info(String.format("SubunitClusters %s-%s are similar in sequence " 424 + "with %.2f sequence identity and %.2f coverage", 425 this.subunits.get(this.representative).getName(), 426 other.subunits.get(other.representative).getName(), 427 sequenceIdentity, sequenceCoverage)); 428 429 // If coverage and sequence identity sufficient, merge other and this 430 List<Integer> thisAligned = new ArrayList<>(); 431 List<Integer> otherAligned = new ArrayList<>(); 432 433 // Extract the aligned residues of both Subunit 434 for (int p = 1; p < aligner.getPair().getLength() + 1; p++) { 435 436 // Skip gaps in any of the two sequences 437 if (aligner.getPair().getAlignedSequence(1).isGap(p)) 438 continue; 439 if (aligner.getPair().getAlignedSequence(2).isGap(p)) 440 continue; 441 442 int thisIndex = aligner.getPair().getIndexInQueryAt(p) - 1; 443 int otherIndex = aligner.getPair().getIndexInTargetAt(p) - 1; 444 445 // Only consider residues that are part of the SubunitCluster 446 if (this.subunitEQR.get(this.representative).contains(thisIndex) 447 && other.subunitEQR.get(other.representative).contains(otherIndex)) { 448 thisAligned.add(thisIndex); 449 otherAligned.add(otherIndex); 450 } 451 } 452 453 updateEquivResidues(other, thisAligned, otherAligned); 454 455 this.method = SubunitClustererMethod.SEQUENCE; 456 pseudoStoichiometric = !params.isHighConfidenceScores(sequenceIdentity,sequenceCoverage); 457 458 return true; 459 } 460 461 /** 462 * Merges the other SubunitCluster into this one if their representative 463 * Atoms are structurally similar (according to the criteria in params). 464 * <p> 465 * 466 * @param other 467 * SubunitCluster 468 * @param params 469 * {@link SubunitClustererParameters}, with information on what alignment 470 * algorithm to use, RMSD/TMScore and structure coverage thresholds. 471 * @return true if the SubunitClusters were merged, false otherwise 472 * @throws StructureException 473 */ 474 475 public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters params) throws StructureException { 476 477 StructureAlignment aligner = StructureAlignmentFactory.getAlgorithm(params.getSuperpositionAlgorithm()); 478 ConfigStrucAligParams aligner_params = aligner.getParameters(); 479 480 Method setOptimizeAlignment = null; 481 try { 482 setOptimizeAlignment = aligner_params.getClass().getMethod("setOptimizeAlignment", boolean.class); 483 } catch (NoSuchMethodException e) { 484 //alignment algorithm does not have an optimization switch, moving on 485 } 486 if (setOptimizeAlignment != null) { 487 try { 488 setOptimizeAlignment.invoke(aligner_params, params.isOptimizeAlignment()); 489 } catch (IllegalAccessException|InvocationTargetException e) { 490 logger.warn("Could not set alignment optimisation switch"); 491 } 492 } 493 494 AFPChain afp = aligner.align(this.subunits.get(this.representative) 495 .getRepresentativeAtoms(), 496 other.subunits.get(other.representative) 497 .getRepresentativeAtoms()); 498 499 // Convert AFPChain to MultipleAlignment for convenience 500 MultipleAlignment msa = new MultipleAlignmentEnsembleImpl( 501 afp, 502 this.subunits.get(this.representative).getRepresentativeAtoms(), 503 other.subunits.get(other.representative) 504 .getRepresentativeAtoms(), false) 505 .getMultipleAlignment(0); 506 507 double structureCoverage = Math.min(msa.getCoverages().get(0), msa 508 .getCoverages().get(1)); 509 510 if(params.isUseStructureCoverage() && structureCoverage < params.getStructureCoverageThreshold()) { 511 return false; 512 } 513 514 double rmsd = afp.getTotalRmsdOpt(); 515 if (params.isUseRMSD() && rmsd > params.getRMSDThreshold()) { 516 return false; 517 } 518 519 double tmScore = afp.getTMScore(); 520 if (params.isUseTMScore() && tmScore < params.getTMThreshold()) { 521 return false; 522 } 523 524 logger.info(String.format("SubunitClusters are structurally similar with " 525 + "%.2f RMSD %.2f coverage", rmsd, structureCoverage)); 526 527 // Merge clusters 528 List<List<Integer>> alignedRes = msa.getBlock(0).getAlignRes(); 529 List<Integer> thisAligned = new ArrayList<>(); 530 List<Integer> otherAligned = new ArrayList<>(); 531 532 // Extract the aligned residues of both Subunit 533 for (int p = 0; p < msa.length(); p++) { 534 535 // Skip gaps in any of the two sequences 536 if (alignedRes.get(0).get(p) == null) 537 continue; 538 if (alignedRes.get(1).get(p) == null) 539 continue; 540 541 int thisIndex = alignedRes.get(0).get(p); 542 int otherIndex = alignedRes.get(1).get(p); 543 544 // Only consider residues that are part of the SubunitCluster 545 if (this.subunitEQR.get(this.representative).contains(thisIndex) 546 && other.subunitEQR.get(other.representative).contains( 547 otherIndex)) { 548 thisAligned.add(thisIndex); 549 otherAligned.add(otherIndex); 550 } 551 } 552 553 updateEquivResidues(other, thisAligned, otherAligned); 554 555 this.method = SubunitClustererMethod.STRUCTURE; 556 pseudoStoichiometric = true; 557 558 return true; 559 } 560 561 private void updateEquivResidues(SubunitCluster other, List<Integer> thisAligned, List<Integer> otherAligned) { 562 // Do a List intersection to find out which EQR columns to remove 563 List<Integer> thisRemove = new ArrayList<>(); 564 List<Integer> otherRemove = new ArrayList<>(); 565 566 for (int t = 0; t < this.subunitEQR.get(this.representative).size(); t++) { 567 // If the index is aligned do nothing, otherwise mark as removing 568 if (!thisAligned.contains(this.subunitEQR.get(this.representative).get(t))) 569 thisRemove.add(t); 570 } 571 572 for (int t = 0; t < other.subunitEQR.get(other.representative).size(); t++) { 573 // If the index is aligned do nothing, otherwise mark as removing 574 if (!otherAligned.contains(other.subunitEQR.get(other.representative).get(t))) 575 otherRemove.add(t); 576 } 577 // Now remove unaligned columns, from end to start 578 Collections.sort(thisRemove); 579 Collections.reverse(thisRemove); 580 Collections.sort(otherRemove); 581 Collections.reverse(otherRemove); 582 583 for (int t = 0; t < thisRemove.size(); t++) { 584 for (List<Integer> eqr : this.subunitEQR) { 585 int column = thisRemove.get(t); 586 eqr.remove(column); 587 } 588 } 589 590 for (int t = 0; t < otherRemove.size(); t++) { 591 for (List<Integer> eqr : other.subunitEQR) { 592 int column = otherRemove.get(t); 593 eqr.remove(column); 594 } 595 } 596 597 // The representative is the longest sequence 598 if (this.subunits.get(this.representative).size() < other.subunits.get(other.representative).size()) 599 this.representative = other.representative + subunits.size(); 600 601 this.subunits.addAll(other.subunits); 602 this.subunitEQR.addAll(other.subunitEQR); 603 604 } 605 606 /** 607 * Analyze the internal symmetry of the SubunitCluster and divide its 608 * {@link Subunit} into the internal repeats (domains) if they are 609 * internally symmetric. 610 * 611 * @param clusterParams {@link SubunitClustererParameters} with fields used as follows: 612 * structureCoverageThreshold 613 * the minimum coverage of all repeats in the Subunit 614 * rmsdThreshold 615 * the maximum allowed RMSD between the repeats 616 * minimumSequenceLength 617 * the minimum length of the repeating units 618 * @return true if the cluster was internally symmetric, false otherwise 619 * @throws StructureException 620 */ 621 public boolean divideInternally(SubunitClustererParameters clusterParams) 622 throws StructureException { 623 624 CESymmParameters cesym_params = new CESymmParameters(); 625 cesym_params.setMinCoreLength(clusterParams.getMinimumSequenceLength()); 626 cesym_params.setGaps(false); // We want no gaps between the repeats 627 628 // Analyze the internal symmetry of the representative subunit 629 CeSymmResult result = CeSymm.analyze(subunits.get(representative) 630 .getRepresentativeAtoms(), cesym_params); 631 632 if (!result.isSignificant()) 633 return false; 634 635 double rmsd = result.getMultipleAlignment().getScore( 636 MultipleAlignmentScorer.RMSD); 637 if (rmsd > clusterParams.getRMSDThreshold()) 638 return false; 639 640 double coverage = result.getMultipleAlignment().getCoverages().get(0) 641 * result.getNumRepeats(); 642 if (coverage < clusterParams.getStructureCoverageThreshold()) 643 return false; 644 645 logger.info("SubunitCluster is internally symmetric with {} repeats, " 646 + "{} RMSD and {} coverage", result.getNumRepeats(), rmsd, 647 coverage); 648 649 // Divide if symmety was significant with RMSD and coverage sufficient 650 List<List<Integer>> alignedRes = result.getMultipleAlignment() 651 .getBlock(0).getAlignRes(); 652 653 List<List<Integer>> columns = new ArrayList<>(); 654 for (int s = 0; s < alignedRes.size(); s++) 655 columns.add(new ArrayList<>(alignedRes.get(s).size())); 656 657 // Extract the aligned columns of each repeat in the Subunit 658 for (int col = 0; col < alignedRes.get(0).size(); col++) { 659 660 // Check that all aligned residues are part of the Cluster 661 boolean missing = false; 662 for (int s = 0; s < alignedRes.size(); s++) { 663 if (!subunitEQR.get(representative).contains( 664 alignedRes.get(s).get(col))) { 665 missing = true; 666 break; 667 } 668 } 669 670 // Skip the column if any residue was not part of the cluster 671 if (missing) 672 continue; 673 674 for (int s = 0; s < alignedRes.size(); s++) { 675 columns.get(s).add( 676 subunitEQR.get(representative).indexOf( 677 alignedRes.get(s).get(col))); 678 } 679 } 680 681 // Divide the Subunits in their repeats 682 List<Subunit> newSubunits = new ArrayList<Subunit>(subunits.size() 683 * columns.size()); 684 List<List<Integer>> newSubunitEQR = new ArrayList<List<Integer>>( 685 subunits.size() * columns.size()); 686 687 for (int s = 0; s < subunits.size(); s++) { 688 for (int r = 0; r < columns.size(); r++) { 689 690 // Calculate start and end residues of the new Subunit 691 int start = subunitEQR.get(s).get(columns.get(r).get(0)); 692 int end = subunitEQR.get(s).get( 693 columns.get(r).get(columns.get(r).size() - 1)); 694 695 Atom[] reprAtoms = Arrays.copyOfRange(subunits.get(s) 696 .getRepresentativeAtoms(), start, end + 1); 697 698 newSubunits.add(new Subunit(reprAtoms, subunits.get(s) 699 .getName(), subunits.get(s).getIdentifier(), subunits 700 .get(s).getStructure())); 701 702 // Recalculate equivalent residues 703 List<Integer> eqr = new ArrayList<Integer>(); 704 for (int p = 0; p < columns.get(r).size(); p++) { 705 eqr.add(subunitEQR.get(s).get(columns.get(r).get(p)) 706 - start); 707 } 708 newSubunitEQR.add(eqr); 709 } 710 } 711 712 subunits = newSubunits; 713 subunitEQR = newSubunitEQR; 714 715 // Update representative 716 for (int s = 0; s < subunits.size(); s++) { 717 if (subunits.get(s).size() > subunits.get(representative).size()) 718 representative = s; 719 } 720 721 method = SubunitClustererMethod.STRUCTURE; 722 pseudoStoichiometric = true; 723 return true; 724 } 725 726 /** 727 * @return the number of Subunits in the cluster 728 */ 729 public int size() { 730 return subunits.size(); 731 } 732 733 /** 734 * @return the number of aligned residues between Subunits of the cluster 735 */ 736 public int length() { 737 return subunitEQR.get(representative).size(); 738 } 739 740 /** 741 * @return the {@link SubunitClustererMethod} used for clustering the 742 * Subunits 743 */ 744 public SubunitClustererMethod getClustererMethod() { 745 return method; 746 } 747 748 /** 749 * @return A List of size {@link #size()} of Atom arrays of length 750 * {@link #length()} with the aligned Atoms for each Subunit in the 751 * cluster 752 */ 753 public List<Atom[]> getAlignedAtomsSubunits() { 754 755 List<Atom[]> alignedAtoms = new ArrayList<>(); 756 757 // Loop through all subunits and add the aligned positions 758 for (int s = 0; s < subunits.size(); s++) 759 alignedAtoms.add(getAlignedAtomsSubunit(s)); 760 761 return alignedAtoms; 762 } 763 764 /** 765 * @param index 766 * Subunit index in the Cluster 767 * @return An Atom array of length {@link #length()} with the aligned Atoms 768 * from the selected Subunit in the Cluster 769 */ 770 public Atom[] getAlignedAtomsSubunit(int index) { 771 772 Atom[] aligned = new Atom[subunitEQR.get(index).size()]; 773 774 // Add only the aligned positions of the Subunit in the Cluster 775 for (int p = 0; p < subunitEQR.get(index).size(); p++) { 776 aligned[p] = subunits.get(index).getRepresentativeAtoms()[subunitEQR 777 .get(index).get(p)]; 778 } 779 780 return aligned; 781 } 782 783 /** 784 * The multiple alignment is calculated from the equivalent residues in the 785 * SubunitCluster. The alignment is recalculated every time the method is 786 * called (no caching). 787 * 788 * @return MultipleAlignment representation of the aligned residues in this 789 * Subunit Cluster 790 * @throws StructureException 791 */ 792 public MultipleAlignment getMultipleAlignment() throws StructureException { 793 794 // Create a multiple alignment with the atom arrays of the Subunits 795 MultipleAlignment msa = new MultipleAlignmentImpl(); 796 msa.setEnsemble(new MultipleAlignmentEnsembleImpl()); 797 msa.getEnsemble().setAtomArrays( 798 subunits.stream().map(s -> s.getRepresentativeAtoms()) 799 .collect(Collectors.toList())); 800 801 // Fill in the alignment information 802 BlockSet bs = new BlockSetImpl(msa); 803 Block b = new BlockImpl(bs); 804 b.setAlignRes(subunitEQR); 805 806 // Fill in the transformation matrices 807 new ReferenceSuperimposer(representative).superimpose(msa); 808 809 // Calculate some scores 810 MultipleAlignmentScorer.calculateScores(msa); 811 812 return msa; 813 814 } 815 816 @Override 817 public String toString() { 818 return "SubunitCluster [Size=" + size() + ", Length=" + length() 819 + ", Representative=" + representative + ", Method=" + method 820 + "]"; 821 } 822 823 /** 824 * @return true if this cluster is considered pseudo-stoichiometric (i.e., 825 * was either clustered by structure, or by sequence with low scores), 826 * false otherwise. 827 */ 828 public boolean isPseudoStoichiometric() { 829 return pseudoStoichiometric; 830 } 831 832}