001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.cluster; 022 023import org.biojava.nbio.alignment.Alignments; 024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType; 025import org.biojava.nbio.alignment.SimpleGapPenalty; 026import org.biojava.nbio.alignment.template.GapPenalty; 027import org.biojava.nbio.alignment.template.PairwiseSequenceAligner; 028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 029import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 030import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 031import org.biojava.nbio.core.sequence.ProteinSequence; 032import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.Chain; 035import org.biojava.nbio.structure.EntityInfo; 036import org.biojava.nbio.structure.Group; 037import org.biojava.nbio.structure.Structure; 038import org.biojava.nbio.structure.StructureException; 039import org.biojava.nbio.structure.align.StructureAlignment; 040import org.biojava.nbio.structure.align.StructureAlignmentFactory; 041import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams; 042import org.biojava.nbio.structure.align.model.AFPChain; 043import org.biojava.nbio.structure.align.multiple.Block; 044import org.biojava.nbio.structure.align.multiple.BlockImpl; 045import org.biojava.nbio.structure.align.multiple.BlockSet; 046import org.biojava.nbio.structure.align.multiple.BlockSetImpl; 047import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 048import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl; 049import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; 050import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 051import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer; 052import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 053import org.biojava.nbio.structure.symmetry.core.QuatSymmetrySubunits; 054import org.biojava.nbio.structure.symmetry.internal.CESymmParameters; 055import org.biojava.nbio.structure.symmetry.internal.CeSymm; 056import org.biojava.nbio.structure.symmetry.internal.CeSymmResult; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import java.lang.reflect.InvocationTargetException; 061import java.lang.reflect.Method; 062import java.util.*; 063import java.util.stream.Collectors; 064 065/** 066 * A SubunitCluster contains a set of equivalent {@link QuatSymmetrySubunits}, 067 * the set of equivalent residues (EQR) between {@link Subunit} and a 068 * {@link Subunit} representative. It also stores the method used for 069 * clustering. 070 * <p> 071 * This class allows the comparison and merging of SubunitClusters. 072 * 073 * @author Aleix Lafita 074 * @since 5.0.0 075 * 076 */ 077public class SubunitCluster { 078 079 private static final Logger logger = LoggerFactory.getLogger(SubunitCluster.class); 080 081 private List<Subunit> subunits = new ArrayList<>(); 082 private List<List<Integer>> subunitEQR = new ArrayList<>(); 083 private int representative = -1; 084 085 private SubunitClustererMethod method = SubunitClustererMethod.SEQUENCE; 086 private boolean pseudoStoichiometric = false; 087 088 /** 089 * A letter that is assigned to this cluster in stoichiometry. 090 */ 091 private String alpha = ""; 092 093 /** 094 * A letter that is assigned to this cluster in stoichiometry. 095 * 096 * @return alpha 097 * String 098 */ 099 100 public String getAlpha() { 101 return alpha; 102 } 103 104 /** 105 * A letter that is assigned to this cluster in stoichiometry. 106 * 107 * @param alpha 108 * String 109 */ 110 public void setAlpha(String alpha) { 111 this.alpha = alpha; 112 } 113 114 /** 115 * A constructor from a single Subunit. To obtain a 116 * SubunitCluster with multiple Subunits, initialize different 117 * SubunitClusters and merge them. 118 * 119 * @param subunit 120 * initial Subunit 121 */ 122 public SubunitCluster(Subunit subunit) { 123 124 subunits.add(subunit); 125 126 List<Integer> identity = new ArrayList<>(); 127 for (int i = 0; i < subunit.size(); i++) 128 identity.add(i); 129 subunitEQR.add(identity); 130 131 representative = 0; 132 } 133 134 /** 135 * A copy constructor with the possibility of removing subunits. 136 * No re-clustering is done. 137 * 138 * @param other 139 * reference SubunitCluster 140 * @param subunitsToRetain 141 * which subunits to copy to this cluster 142 */ 143 public SubunitCluster(SubunitCluster other, List<Integer> subunitsToRetain) { 144 method = other.method; 145 pseudoStoichiometric = other.pseudoStoichiometric; 146 for (int i = 0; i < other.subunits.size(); i++) { 147 if(subunitsToRetain.contains(i)) { 148 subunits.add(other.subunits.get(i)); 149 subunitEQR.add(other.subunitEQR.get(i)); 150 } 151 } 152 representative = 0; 153 for (int i=1; i<subunits.size(); i++) { 154 if (subunits.get(i).size() > subunits.get(representative).size()) { 155 representative = i; 156 } 157 } 158 setAlpha(other.getAlpha()); 159 } 160 161 /** 162 * Create the cluster manually by specifying subunits and the equivalent residues 163 * @param subunits List of aligned subunits 164 * @param subunitEQR Double list giving the aligned residue indices in each subunit 165 */ 166 public SubunitCluster(List<Subunit> subunits, List<List<Integer>> subunitEQR) { 167 if(subunits.size() != subunitEQR.size()) { 168 throw new IllegalArgumentException("Mismatched subunit length"); 169 } 170 this.subunits = subunits; 171 this.subunitEQR = subunitEQR; 172 this.representative = 0; 173 this.method = SubunitClustererMethod.MANUAL; 174 this.pseudoStoichiometric = false; 175 } 176 177 /** 178 * Subunits contained in the SubunitCluster. 179 * 180 * @return an unmodifiable view of the original List 181 */ 182 public List<Subunit> getSubunits() { 183 return Collections.unmodifiableList(subunits); 184 } 185 186 /** 187 * Tells whether the other SubunitCluster contains exactly the same Subunit. 188 * This is checked by String equality of their residue one-letter sequences. 189 * 190 * @param other 191 * SubunitCluster 192 * @return true if the SubunitClusters are identical, false otherwise 193 */ 194 public boolean isIdenticalTo(SubunitCluster other) { 195 String thisSequence = this.subunits.get(this.representative) 196 .getProteinSequenceString(); 197 String otherSequence = other.subunits.get(other.representative) 198 .getProteinSequenceString(); 199 return thisSequence.equals(otherSequence); 200 } 201 202 /** 203 * Tells whether the other SubunitCluster contains exactly the same Subunit. 204 * This is checked by equality of their entity identifiers if they are present. 205 * 206 * @param other 207 * SubunitCluster 208 * @return true if the SubunitClusters are identical, false otherwise 209 */ 210 public boolean isIdenticalByEntityIdTo(SubunitCluster other) { 211 Subunit thisSub = this.subunits.get(this.representative); 212 Subunit otherSub = other.subunits.get(other.representative); 213 String thisName = thisSub.getName(); 214 String otherName = otherSub.getName(); 215 216 Structure thisStruct = thisSub.getStructure(); 217 Structure otherStruct = otherSub.getStructure(); 218 if (thisStruct == null || otherStruct == null) { 219 logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id", 220 thisName, 221 otherName); 222 return false; 223 } 224 if (thisStruct != otherStruct) { 225 // different object references: will not cluster even if entity id is same 226 return false; 227 } 228 Chain thisChain = thisStruct.getChain(thisName); 229 Chain otherChain = otherStruct.getChain(otherName); 230 if (thisChain == null || otherChain == null) { 231 logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", 232 thisName, 233 otherName); 234 return false; 235 } 236 if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) { 237 logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", 238 thisName, 239 otherName); 240 return false; 241 } 242 int thisEntityId = thisChain.getEntityInfo().getMolId(); 243 int otherEntityId = otherChain.getEntityInfo().getMolId(); 244 return thisEntityId == otherEntityId; 245 } 246 247 /** 248 * Merges the other SubunitCluster into this one if it contains exactly the 249 * same Subunit. This is checked by {@link #isIdenticalTo(SubunitCluster)}. 250 * 251 * @param other 252 * SubunitCluster 253 * @return true if the SubunitClusters were merged, false otherwise 254 */ 255 public boolean mergeIdentical(SubunitCluster other) { 256 257 if (!isIdenticalTo(other)) 258 return false; 259 260 logger.info("SubunitClusters {}-{} are identical in sequence", 261 this.subunits.get(this.representative).getName(), 262 other.subunits.get(other.representative).getName()); 263 264 this.subunits.addAll(other.subunits); 265 this.subunitEQR.addAll(other.subunitEQR); 266 267 return true; 268 } 269 270 /** 271 * Merges the other SubunitCluster into this one if it contains exactly the 272 * same Subunit. This is checked by comparing the entity identifiers of the subunits 273 * if one can be found. 274 * Thus this only makes sense when the subunits are complete chains of a 275 * deposited PDB entry. 276 * 277 * @param other 278 * SubunitCluster 279 * @return true if the SubunitClusters were merged, false otherwise 280 */ 281 public boolean mergeIdenticalByEntityId(SubunitCluster other) { 282 283 if (!isIdenticalByEntityIdTo(other)) 284 return false; 285 286 Subunit thisSub = this.subunits.get(this.representative); 287 Subunit otherSub = other.subunits.get(other.representative); 288 String thisName = thisSub.getName(); 289 String otherName = otherSub.getName(); 290 291 logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical", 292 thisName, 293 otherName); 294 295 List<Integer> thisAligned = new ArrayList<>(); 296 List<Integer> otherAligned = new ArrayList<>(); 297 298 // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo()) 299 Structure thisStruct = thisSub.getStructure(); 300 Structure otherStruct = otherSub.getStructure(); 301 Chain thisChain = thisStruct.getChain(thisName); 302 Chain otherChain = otherStruct.getChain(otherName); 303 EntityInfo entityInfo = thisChain.getEntityInfo(); 304 305 // Extract the aligned residues of both Subunits 306 for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) { 307 308 Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup(); 309 310 int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain); 311 312 if (seqresIndex == -1) { 313 // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing 314 continue; 315 } 316 317 // note the seqresindex is 1-based 318 Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1); 319 320 int otherIndex = otherChain.getAtomGroups().indexOf(otherG); 321 if (otherIndex == -1) { 322 // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment) 323 continue; 324 } 325 326 // Only consider residues that are part of the SubunitCluster 327 if (this.subunitEQR.get(this.representative).contains(thisIndex) 328 && other.subunitEQR.get(other.representative).contains(otherIndex)) { 329 thisAligned.add(thisIndex); 330 otherAligned.add(otherIndex); 331 } 332 } 333 334 if (thisAligned.size() == 0 && otherAligned.size() == 0) { 335 logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName); 336 } 337 338 updateEquivResidues(other, thisAligned, otherAligned); 339 340 return true; 341 } 342 343 /** 344 * Merges the other SubunitCluster into this one if their representatives 345 * sequences are similar (according to the criteria in params). 346 * <p> 347 * The sequence alignment is performed using linear {@link SimpleGapPenalty} and 348 * BLOSUM62 as scoring matrix. 349 * 350 * @param other 351 * SubunitCluster 352 * @param params 353 * SubunitClustererParameters, with information whether to use local 354 * or global alignment, sequence identity and coverage thresholds. 355 * Threshold values lower than 0.7 are not recommended. 356 * Use {@link #mergeStructure} for lower values. 357 * @return true if the SubunitClusters were merged, false otherwise 358 * @throws CompoundNotFoundException 359 */ 360 361 public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params) throws CompoundNotFoundException { 362 PairwiseSequenceAlignerType alignerType = PairwiseSequenceAlignerType.LOCAL; 363 if (params.isUseGlobalMetrics()) { 364 alignerType = PairwiseSequenceAlignerType.GLOBAL; 365 } 366 return mergeSequence(other, params,alignerType 367 , new SimpleGapPenalty(), 368 SubstitutionMatrixHelper.getBlosum62()); 369 } 370 371 /** 372 * Merges the other SubunitCluster into this one if their representatives 373 * sequences are similar (according to the criteria in params). 374 * <p> 375 * The sequence alignment is performed using linear {@link SimpleGapPenalty} and 376 * BLOSUM62 as scoring matrix. 377 * 378 * @param other 379 * SubunitCluster 380 * @param params 381 * {@link SubunitClustererParameters}, with information whether to use local 382 * or global alignment, sequence identity and coverage thresholds. 383 * Threshold values lower than 0.7 are not recommended. 384 * Use {@link #mergeStructure} for lower values. 385 * @param alignerType 386 * parameter for the sequence alignment algorithm 387 * @param gapPenalty 388 * parameter for the sequence alignment algorithm 389 * @param subsMatrix 390 * parameter for the sequence alignment algorithm 391 * @return true if the SubunitClusters were merged, false otherwise 392 * @throws CompoundNotFoundException 393 */ 394 395 public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params, 396 PairwiseSequenceAlignerType alignerType, 397 GapPenalty gapPenalty, 398 SubstitutionMatrix<AminoAcidCompound> subsMatrix) 399 throws CompoundNotFoundException { 400 401 // Extract the protein sequences as BioJava alignment objects 402 ProteinSequence thisSequence = this.subunits.get(this.representative) 403 .getProteinSequence(); 404 ProteinSequence otherSequence = other.subunits 405 .get(other.representative).getProteinSequence(); 406 407 // Perform the alignment with provided parameters 408 PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> aligner = Alignments 409 .getPairwiseAligner(thisSequence, otherSequence, alignerType, 410 gapPenalty, subsMatrix); 411 412 double sequenceIdentity; 413 if(params.isUseGlobalMetrics()) { 414 sequenceIdentity = aligner.getPair().getPercentageOfIdentity(true); 415 } else { 416 sequenceIdentity = aligner.getPair().getPercentageOfIdentity(false); 417 } 418 419 if (sequenceIdentity < params.getSequenceIdentityThreshold()) 420 return false; 421 422 double sequenceCoverage = 0; 423 if(params.isUseSequenceCoverage()) { 424 // Calculate real coverage (subtract gaps in both sequences) 425 double gaps1 = aligner.getPair().getAlignedSequence(1) 426 .getNumGapPositions(); 427 double gaps2 = aligner.getPair().getAlignedSequence(2) 428 .getNumGapPositions(); 429 double lengthAlignment = aligner.getPair().getLength(); 430 double lengthThis = aligner.getQuery().getLength(); 431 double lengthOther = aligner.getTarget().getLength(); 432 sequenceCoverage = (lengthAlignment - gaps1 - gaps2) 433 / Math.max(lengthThis, lengthOther); 434 435 if (sequenceCoverage < params.getSequenceCoverageThreshold()) 436 return false; 437 } 438 439 logger.info(String.format("SubunitClusters %s-%s are similar in sequence " 440 + "with %.2f sequence identity and %.2f coverage", 441 this.subunits.get(this.representative).getName(), 442 other.subunits.get(other.representative).getName(), 443 sequenceIdentity, sequenceCoverage)); 444 445 // If coverage and sequence identity sufficient, merge other and this 446 List<Integer> thisAligned = new ArrayList<>(); 447 List<Integer> otherAligned = new ArrayList<>(); 448 449 // Extract the aligned residues of both Subunit 450 for (int p = 1; p < aligner.getPair().getLength() + 1; p++) { 451 452 // Skip gaps in any of the two sequences 453 if (aligner.getPair().getAlignedSequence(1).isGap(p)) 454 continue; 455 if (aligner.getPair().getAlignedSequence(2).isGap(p)) 456 continue; 457 458 int thisIndex = aligner.getPair().getIndexInQueryAt(p) - 1; 459 int otherIndex = aligner.getPair().getIndexInTargetAt(p) - 1; 460 461 // Only consider residues that are part of the SubunitCluster 462 if (this.subunitEQR.get(this.representative).contains(thisIndex) 463 && other.subunitEQR.get(other.representative).contains(otherIndex)) { 464 thisAligned.add(thisIndex); 465 otherAligned.add(otherIndex); 466 } 467 } 468 469 updateEquivResidues(other, thisAligned, otherAligned); 470 471 this.method = SubunitClustererMethod.SEQUENCE; 472 pseudoStoichiometric = !params.isHighConfidenceScores(sequenceIdentity,sequenceCoverage); 473 474 return true; 475 } 476 477 /** 478 * Merges the other SubunitCluster into this one if their representative 479 * Atoms are structurally similar (according to the criteria in params). 480 * <p> 481 * 482 * @param other 483 * SubunitCluster 484 * @param params 485 * {@link SubunitClustererParameters}, with information on what alignment 486 * algorithm to use, RMSD/TMScore and structure coverage thresholds. 487 * @return true if the SubunitClusters were merged, false otherwise 488 * @throws StructureException 489 */ 490 491 public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters params) throws StructureException { 492 493 StructureAlignment aligner = StructureAlignmentFactory.getAlgorithm(params.getSuperpositionAlgorithm()); 494 ConfigStrucAligParams aligner_params = aligner.getParameters(); 495 496 Method setOptimizeAlignment = null; 497 try { 498 setOptimizeAlignment = aligner_params.getClass().getMethod("setOptimizeAlignment", boolean.class); 499 } catch (NoSuchMethodException e) { 500 //alignment algorithm does not have an optimization switch, moving on 501 } 502 if (setOptimizeAlignment != null) { 503 try { 504 setOptimizeAlignment.invoke(aligner_params, params.isOptimizeAlignment()); 505 } catch (IllegalAccessException|InvocationTargetException e) { 506 logger.warn("Could not set alignment optimisation switch"); 507 } 508 } 509 510 AFPChain afp = aligner.align(this.subunits.get(this.representative) 511 .getRepresentativeAtoms(), 512 other.subunits.get(other.representative) 513 .getRepresentativeAtoms()); 514 515 if (afp.getOptLength() < 1) { 516 // alignment failed (eg if chains were too short) 517 throw new StructureException( 518 String.format("Subunits failed to align using %s", params.getSuperpositionAlgorithm())); 519 } 520 521 // Convert AFPChain to MultipleAlignment for convenience 522 MultipleAlignment msa = new MultipleAlignmentEnsembleImpl( 523 afp, 524 this.subunits.get(this.representative).getRepresentativeAtoms(), 525 other.subunits.get(other.representative) 526 .getRepresentativeAtoms(), false) 527 .getMultipleAlignment(0); 528 529 double structureCoverage = Math.min(msa.getCoverages().get(0), msa 530 .getCoverages().get(1)); 531 532 if(params.isUseStructureCoverage() && structureCoverage < params.getStructureCoverageThreshold()) { 533 return false; 534 } 535 536 double rmsd = afp.getTotalRmsdOpt(); 537 if (params.isUseRMSD() && rmsd > params.getRMSDThreshold()) { 538 return false; 539 } 540 541 double tmScore = afp.getTMScore(); 542 if (params.isUseTMScore() && tmScore < params.getTMThreshold()) { 543 return false; 544 } 545 546 logger.info(String.format("SubunitClusters are structurally similar with " 547 + "%.2f RMSD %.2f coverage", rmsd, structureCoverage)); 548 549 // Merge clusters 550 List<List<Integer>> alignedRes = msa.getBlock(0).getAlignRes(); 551 List<Integer> thisAligned = new ArrayList<>(); 552 List<Integer> otherAligned = new ArrayList<>(); 553 554 // Extract the aligned residues of both Subunit 555 for (int p = 0; p < msa.length(); p++) { 556 557 // Skip gaps in any of the two sequences 558 if (alignedRes.get(0).get(p) == null) 559 continue; 560 if (alignedRes.get(1).get(p) == null) 561 continue; 562 563 int thisIndex = alignedRes.get(0).get(p); 564 int otherIndex = alignedRes.get(1).get(p); 565 566 // Only consider residues that are part of the SubunitCluster 567 if (this.subunitEQR.get(this.representative).contains(thisIndex) 568 && other.subunitEQR.get(other.representative).contains( 569 otherIndex)) { 570 thisAligned.add(thisIndex); 571 otherAligned.add(otherIndex); 572 } 573 } 574 575 updateEquivResidues(other, thisAligned, otherAligned); 576 577 this.method = SubunitClustererMethod.STRUCTURE; 578 pseudoStoichiometric = true; 579 580 return true; 581 } 582 583 private void updateEquivResidues(SubunitCluster other, List<Integer> thisAligned, List<Integer> otherAligned) { 584 // Do a List intersection to find out which EQR columns to remove 585 List<Integer> thisRemove = new ArrayList<>(); 586 List<Integer> otherRemove = new ArrayList<>(); 587 588 for (int t = 0; t < this.subunitEQR.get(this.representative).size(); t++) { 589 // If the index is aligned do nothing, otherwise mark as removing 590 if (!thisAligned.contains(this.subunitEQR.get(this.representative).get(t))) 591 thisRemove.add(t); 592 } 593 594 for (int t = 0; t < other.subunitEQR.get(other.representative).size(); t++) { 595 // If the index is aligned do nothing, otherwise mark as removing 596 if (!otherAligned.contains(other.subunitEQR.get(other.representative).get(t))) 597 otherRemove.add(t); 598 } 599 // Now remove unaligned columns, from end to start 600 Collections.sort(thisRemove); 601 Collections.reverse(thisRemove); 602 Collections.sort(otherRemove); 603 Collections.reverse(otherRemove); 604 605 for (int t = 0; t < thisRemove.size(); t++) { 606 for (List<Integer> eqr : this.subunitEQR) { 607 int column = thisRemove.get(t); 608 eqr.remove(column); 609 } 610 } 611 612 for (int t = 0; t < otherRemove.size(); t++) { 613 for (List<Integer> eqr : other.subunitEQR) { 614 int column = otherRemove.get(t); 615 eqr.remove(column); 616 } 617 } 618 619 // The representative is the longest sequence 620 if (this.subunits.get(this.representative).size() < other.subunits.get(other.representative).size()) 621 this.representative = other.representative + subunits.size(); 622 623 this.subunits.addAll(other.subunits); 624 this.subunitEQR.addAll(other.subunitEQR); 625 626 } 627 628 /** 629 * Analyze the internal symmetry of the SubunitCluster and divide its 630 * {@link Subunit} into the internal repeats (domains) if they are 631 * internally symmetric. 632 * 633 * @param clusterParams {@link SubunitClustererParameters} with fields used as follows: 634 * structureCoverageThreshold 635 * the minimum coverage of all repeats in the Subunit 636 * rmsdThreshold 637 * the maximum allowed RMSD between the repeats 638 * minimumSequenceLength 639 * the minimum length of the repeating units 640 * @return true if the cluster was internally symmetric, false otherwise 641 * @throws StructureException 642 */ 643 public boolean divideInternally(SubunitClustererParameters clusterParams) 644 throws StructureException { 645 646 CESymmParameters cesym_params = new CESymmParameters(); 647 cesym_params.setMinCoreLength(clusterParams.getMinimumSequenceLength()); 648 cesym_params.setGaps(false); // We want no gaps between the repeats 649 650 // Analyze the internal symmetry of the representative subunit 651 CeSymmResult result = CeSymm.analyze(subunits.get(representative) 652 .getRepresentativeAtoms(), cesym_params); 653 654 if (!result.isSignificant()) 655 return false; 656 657 double rmsd = result.getMultipleAlignment().getScore( 658 MultipleAlignmentScorer.RMSD); 659 if (rmsd > clusterParams.getRMSDThreshold()) 660 return false; 661 662 double coverage = result.getMultipleAlignment().getCoverages().get(0) 663 * result.getNumRepeats(); 664 if (coverage < clusterParams.getStructureCoverageThreshold()) 665 return false; 666 667 logger.info("SubunitCluster is internally symmetric with {} repeats, " 668 + "{} RMSD and {} coverage", result.getNumRepeats(), rmsd, 669 coverage); 670 671 // Divide if symmety was significant with RMSD and coverage sufficient 672 List<List<Integer>> alignedRes = result.getMultipleAlignment() 673 .getBlock(0).getAlignRes(); 674 675 List<List<Integer>> columns = new ArrayList<>(); 676 for (int s = 0; s < alignedRes.size(); s++) 677 columns.add(new ArrayList<>(alignedRes.get(s).size())); 678 679 // Extract the aligned columns of each repeat in the Subunit 680 for (int col = 0; col < alignedRes.get(0).size(); col++) { 681 682 // Check that all aligned residues are part of the Cluster 683 boolean missing = false; 684 for (int s = 0; s < alignedRes.size(); s++) { 685 if (!subunitEQR.get(representative).contains( 686 alignedRes.get(s).get(col))) { 687 missing = true; 688 break; 689 } 690 } 691 692 // Skip the column if any residue was not part of the cluster 693 if (missing) 694 continue; 695 696 for (int s = 0; s < alignedRes.size(); s++) { 697 columns.get(s).add( 698 subunitEQR.get(representative).indexOf( 699 alignedRes.get(s).get(col))); 700 } 701 } 702 703 // Divide the Subunits in their repeats 704 List<Subunit> newSubunits = new ArrayList<>(subunits.size() 705 * columns.size()); 706 List<List<Integer>> newSubunitEQR = new ArrayList<>( 707 subunits.size() * columns.size()); 708 709 for (int s = 0; s < subunits.size(); s++) { 710 for (int r = 0; r < columns.size(); r++) { 711 712 // Calculate start and end residues of the new Subunit 713 int start = subunitEQR.get(s).get(columns.get(r).get(0)); 714 int end = subunitEQR.get(s).get( 715 columns.get(r).get(columns.get(r).size() - 1)); 716 717 Atom[] reprAtoms = Arrays.copyOfRange(subunits.get(s) 718 .getRepresentativeAtoms(), start, end + 1); 719 720 newSubunits.add(new Subunit(reprAtoms, subunits.get(s) 721 .getName(), subunits.get(s).getIdentifier(), subunits 722 .get(s).getStructure())); 723 724 // Recalculate equivalent residues 725 List<Integer> eqr = new ArrayList<>(); 726 for (int p = 0; p < columns.get(r).size(); p++) { 727 eqr.add(subunitEQR.get(s).get(columns.get(r).get(p)) 728 - start); 729 } 730 newSubunitEQR.add(eqr); 731 } 732 } 733 734 subunits = newSubunits; 735 subunitEQR = newSubunitEQR; 736 737 // Update representative 738 for (int s = 0; s < subunits.size(); s++) { 739 if (subunits.get(s).size() > subunits.get(representative).size()) 740 representative = s; 741 } 742 743 method = SubunitClustererMethod.STRUCTURE; 744 pseudoStoichiometric = true; 745 return true; 746 } 747 748 /** 749 * @return the number of Subunits in the cluster 750 */ 751 public int size() { 752 return subunits.size(); 753 } 754 755 /** 756 * @return the number of aligned residues between Subunits of the cluster 757 */ 758 public int length() { 759 return subunitEQR.get(representative).size(); 760 } 761 762 /** 763 * @return the {@link SubunitClustererMethod} used for clustering the 764 * Subunits 765 */ 766 public SubunitClustererMethod getClustererMethod() { 767 return method; 768 } 769 770 /** 771 * @return A List of size {@link #size()} of Atom arrays of length 772 * {@link #length()} with the aligned Atoms for each Subunit in the 773 * cluster 774 */ 775 public List<Atom[]> getAlignedAtomsSubunits() { 776 777 List<Atom[]> alignedAtoms = new ArrayList<>(); 778 779 // Loop through all subunits and add the aligned positions 780 for (int s = 0; s < subunits.size(); s++) 781 alignedAtoms.add(getAlignedAtomsSubunit(s)); 782 783 return alignedAtoms; 784 } 785 786 /** 787 * @param index 788 * Subunit index in the Cluster 789 * @return An Atom array of length {@link #length()} with the aligned Atoms 790 * from the selected Subunit in the Cluster 791 */ 792 public Atom[] getAlignedAtomsSubunit(int index) { 793 794 Atom[] aligned = new Atom[subunitEQR.get(index).size()]; 795 796 // Add only the aligned positions of the Subunit in the Cluster 797 for (int p = 0; p < subunitEQR.get(index).size(); p++) { 798 aligned[p] = subunits.get(index).getRepresentativeAtoms()[subunitEQR 799 .get(index).get(p)]; 800 } 801 802 return aligned; 803 } 804 805 /** 806 * The multiple alignment is calculated from the equivalent residues in the 807 * SubunitCluster. The alignment is recalculated every time the method is 808 * called (no caching). 809 * 810 * @return MultipleAlignment representation of the aligned residues in this 811 * Subunit Cluster 812 * @throws StructureException 813 */ 814 public MultipleAlignment getMultipleAlignment() throws StructureException { 815 816 // Create a multiple alignment with the atom arrays of the Subunits 817 MultipleAlignment msa = new MultipleAlignmentImpl(); 818 msa.setEnsemble(new MultipleAlignmentEnsembleImpl()); 819 msa.getEnsemble().setAtomArrays( 820 subunits.stream().map(s -> s.getRepresentativeAtoms()) 821 .collect(Collectors.toList())); 822 823 // Fill in the alignment information 824 BlockSet bs = new BlockSetImpl(msa); 825 Block b = new BlockImpl(bs); 826 b.setAlignRes(subunitEQR); 827 828 // Fill in the transformation matrices 829 new ReferenceSuperimposer(representative).superimpose(msa); 830 831 // Calculate some scores 832 MultipleAlignmentScorer.calculateScores(msa); 833 834 return msa; 835 836 } 837 838 @Override 839 public String toString() { 840 return "SubunitCluster [Size=" + size() + ", Length=" + length() 841 + ", Representative=" + representative + ", Method=" + method 842 + "]"; 843 } 844 845 /** 846 * @return true if this cluster is considered pseudo-stoichiometric (i.e., 847 * was either clustered by structure, or by sequence with low scores), 848 * false otherwise. 849 */ 850 public boolean isPseudoStoichiometric() { 851 return pseudoStoichiometric; 852 } 853 854}