001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 12, 2010 021 * Author: Jianjiong Gao 022 * 023 */ 024 025package org.biojava.nbio.protmod.structure; 026 027import org.biojava.nbio.protmod.*; 028import org.biojava.nbio.structure.*; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import java.util.*; 033 034/** 035 * Identify attachment modification in a 3-D structure. 036 * 037 * @author Jianjiong Gao 038 * @since 3.0 039 */ 040public class ProteinModificationIdentifier { 041 042 private static final Logger logger = LoggerFactory.getLogger(ProteinModificationIdentifier.class); 043 044 private double bondLengthTolerance ; 045 private boolean recordUnidentifiableModifiedCompounds ; 046 private boolean recordAdditionalAttachments ; 047 048 private Set<ModifiedCompound> identifiedModifiedCompounds = null; 049 private Set<StructureAtomLinkage> unidentifiableAtomLinkages = null; 050 private Set<StructureGroup> unidentifiableModifiedResidues = null; 051 052 /** 053 * Temporary save the amino acids for each call of identify(). 054 */ 055 private List<Group> residues; 056 057 058 public ProteinModificationIdentifier(){ 059 060 bondLengthTolerance = 0.4; 061 recordUnidentifiableModifiedCompounds = false; 062 recordAdditionalAttachments = true; 063 064 reset(); 065 } 066 067 068 public void destroy(){ 069 if ( identifiedModifiedCompounds != null) 070 identifiedModifiedCompounds.clear(); 071 if ( unidentifiableAtomLinkages != null) 072 unidentifiableAtomLinkages.clear(); 073 if ( unidentifiableModifiedResidues != null) 074 unidentifiableModifiedResidues.clear(); 075 076 unidentifiableAtomLinkages = null; 077 unidentifiableAtomLinkages = null; 078 unidentifiableModifiedResidues = null; 079 080 081 } 082 083 /** 084 * 085 * @param bondLengthTolerance tolerance of error (in Angstroms) of the 086 * covalent bond length, when calculating the atom distance threshold. 087 */ 088 public void setbondLengthTolerance(final double bondLengthTolerance) { 089 if (bondLengthTolerance<0) { 090 throw new IllegalArgumentException("bondLengthTolerance " + 091 "must be positive."); 092 } 093 this.bondLengthTolerance = bondLengthTolerance; 094 } 095 096 /** 097 * 098 * @param recordUnidentifiableModifiedCompounds true if choosing to record unidentifiable 099 * atoms; false, otherwise. 100 * @see #getRecordUnidentifiableCompounds 101 * @see #getUnidentifiableModifiedResidues 102 * @see #getUnidentifiableAtomLinkages 103 */ 104 public void setRecordUnidentifiableCompounds(boolean recordUnidentifiableModifiedCompounds) { 105 this.recordUnidentifiableModifiedCompounds = recordUnidentifiableModifiedCompounds; 106 } 107 108 /** 109 * 110 * @return true if choosing to record unidentifiable 111 * atoms; false, otherwise. 112 * @see #setRecordUnidentifiableCompounds 113 * @see #getUnidentifiableModifiedResidues 114 * @see #getUnidentifiableAtomLinkages 115 */ 116 public boolean getRecordUnidentifiableCompounds() { 117 return recordUnidentifiableModifiedCompounds; 118 } 119 120 /** 121 * 122 * @param recordAdditionalAttachments true if choosing to record additional attachments 123 * that are not directly attached to a modified residue. 124 * @see #getRecordAdditionalAttachments 125 */ 126 public void setRecordAdditionalAttachments(boolean recordAdditionalAttachments) { 127 this.recordAdditionalAttachments = recordAdditionalAttachments; 128 } 129 130 /** 131 * 132 * @return true if choosing to record additional attachments 133 * that are not directly attached to a modified residue. 134 * @see #setRecordAdditionalAttachments 135 */ 136 public boolean getRecordAdditionalAttachments() { 137 return recordAdditionalAttachments; 138 } 139 140 /** 141 * 142 * @return a set of identified {@link ModifiedCompound}s from 143 * the last parse result. 144 * @see ModifiedCompound 145 */ 146 public Set<ModifiedCompound> getIdentifiedModifiedCompound() { 147 if (identifiedModifiedCompounds==null) { 148 throw new IllegalStateException("No result available. Please call parse() first."); 149 } 150 151 return identifiedModifiedCompounds; 152 } 153 154 /** 155 * 156 * @return a set of atom linkages, which represent the 157 * atom bonds that were not covered by the identified 158 * {@link ModifiedCompound}s from the last parse result. 159 * Each element of the list is a array containing two atoms. 160 * @see StructureAtomLinkage 161 * @see #setRecordUnidentifiableCompounds 162 */ 163 public Set<StructureAtomLinkage> getUnidentifiableAtomLinkages() { 164 if (!recordUnidentifiableModifiedCompounds) { 165 throw new UnsupportedOperationException("Recording unidentified atom linkages" + 166 "is not supported. Please setRecordUnidentifiableCompounds(true) first."); 167 } 168 169 if (identifiedModifiedCompounds==null) { 170 throw new IllegalStateException("No result available. Please call parse() first."); 171 } 172 173 return unidentifiableAtomLinkages; 174 } 175 176 /** 177 * 178 * @return a set of modified residues that were not covered by 179 * the identified ModifiedCompounds from the last parse 180 * result. 181 * @see StructureGroup 182 * @see #setRecordUnidentifiableCompounds 183 * @see #getIdentifiedModifiedCompound 184 */ 185 public Set<StructureGroup> getUnidentifiableModifiedResidues() { 186 if (!recordUnidentifiableModifiedCompounds) { 187 throw new UnsupportedOperationException("Recording unidentified atom linkages" + 188 "is not supported. Please setRecordUnidentifiableCompounds(true) first."); 189 } 190 191 if (identifiedModifiedCompounds==null) { 192 throw new IllegalStateException("No result available. Please call parse() first."); 193 } 194 195 return unidentifiableModifiedResidues; 196 } 197 198 /** 199 * Identify all registered modifications in a structure. 200 * @param structure 201 */ 202 public void identify(final Structure structure) { 203 identify(structure, ProteinModificationRegistry.allModifications()); 204 } 205 206 /** 207 * Identify a set of modifications in a structure. 208 * @param structure query {@link Structure}. 209 * @param potentialModifications query {@link ProteinModification}s. 210 */ 211 public void identify(final Structure structure, 212 final Set<ProteinModification> potentialModifications) { 213 if (structure==null) { 214 throw new IllegalArgumentException("Null structure."); 215 } 216 217 identify(structure.getChains(), potentialModifications); 218 } 219 220 /** 221 * Identify all registered modifications in a chain. 222 * @param chain query {@link Chain}. 223 */ 224 public void identify(final Chain chain) { 225 identify(Collections.singletonList(chain)); 226 } 227 228 /** 229 * Identify all registered modifications in chains. 230 * @param chains query {@link Chain}s. 231 */ 232 public void identify(final List<Chain> chains) { 233 identify(chains, ProteinModificationRegistry.allModifications()); 234 } 235 236 /** 237 * Identify a set of modifications in a a chains. 238 * @param chain query {@link Chain}. 239 * @param potentialModifications query {@link ProteinModification}s. 240 */ 241 public void identify(final Chain chain, 242 final Set<ProteinModification> potentialModifications) { 243 identify(Collections.singletonList(chain), potentialModifications); 244 } 245 246 /** 247 * Identify a set of modifications in a a list of chains. 248 * @param chains query {@link Chain}s. 249 * @param potentialModifications query {@link ProteinModification}s. 250 */ 251 public void identify(final List<Chain> chains, 252 final Set<ProteinModification> potentialModifications) { 253 254 if (chains==null) { 255 throw new IllegalArgumentException("Null structure."); 256 } 257 258 if (potentialModifications==null) { 259 throw new IllegalArgumentException("Null potentialModifications."); 260 } 261 262 263 reset(); 264 265 if (potentialModifications.isEmpty()) { 266 return; 267 } 268 269 270 residues = new ArrayList<>(); 271 List<Group> ligands = new ArrayList<>(); 272 Map<Component, Set<Group>> mapCompGroups = new HashMap<>(); 273 274 for (Chain chain : chains) { 275 276 List<Group> ress = StructureUtil.getAminoAcids(chain); 277 278 //List<Group> ligs = chain.getAtomLigands(); 279 List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); 280 residues.addAll(ress); 281 residues.removeAll(ligs); 282 ligands.addAll(ligs); 283 addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); 284 } 285 286 if (residues.isEmpty()) { 287 String pdbId = "?"; 288 if ( chains.size() > 0) { 289 Structure struc = chains.get(0).getStructure(); 290 if ( struc != null) 291 pdbId = struc.getPDBCode(); 292 } 293 logger.warn("No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); 294 } 295 List<ModifiedCompound> modComps = new ArrayList<>(); 296 297 for (ProteinModification mod : potentialModifications) { 298 ModificationCondition condition = mod.getCondition(); 299 List<Component> components = condition.getComponents(); 300 if (!mapCompGroups.keySet().containsAll(components)) { 301 // not all components exist for this mod. 302 continue; 303 } 304 305 int sizeComps = components.size(); 306 if (sizeComps==1) { 307 308 processCrosslink1(mapCompGroups, modComps, mod, components); 309 310 } else { 311 312 processMultiCrosslink(mapCompGroups, modComps, mod, condition); 313 } 314 } 315 316 if (recordAdditionalAttachments) { 317 // identify additional groups that are not directly attached to amino acids. 318 for (ModifiedCompound mc : modComps) { 319 identifyAdditionalAttachments(mc, ligands, chains); 320 } 321 } 322 323 mergeModComps(modComps); 324 325 identifiedModifiedCompounds.addAll(modComps); 326 327 328 // record unidentifiable linkage 329 if (recordUnidentifiableModifiedCompounds) { 330 recordUnidentifiableAtomLinkages(modComps, ligands); 331 recordUnidentifiableModifiedResidues(modComps); 332 } 333 } 334 335 private void reset() { 336 identifiedModifiedCompounds = new LinkedHashSet<>(); 337 if (recordUnidentifiableModifiedCompounds) { 338 unidentifiableAtomLinkages = new LinkedHashSet<>(); 339 unidentifiableModifiedResidues = new LinkedHashSet<>(); 340 } 341 342 } 343 344 private void processMultiCrosslink( 345 Map<Component, Set<Group>> mapCompGroups, 346 List<ModifiedCompound> modComps, ProteinModification mod, 347 ModificationCondition condition) { 348 // for multiple components 349 350 // find linkages first 351 List<List<Atom[]>> matchedAtomsOfLinkages = 352 getMatchedAtomsOfLinkages(condition, mapCompGroups); 353 354 if (matchedAtomsOfLinkages.size() != condition.getLinkages().size()) { 355 return; 356 } 357 358 assembleLinkages(matchedAtomsOfLinkages, mod, modComps); 359 360 } 361 362 private void processCrosslink1(Map<Component, Set<Group>> mapCompGroups, 363 List<ModifiedCompound> modComps, ProteinModification mod, 364 List<Component> components) { 365 // modified residue 366 // TODO: is this the correct logic for CROSS_LINK_1? 367 Set<Group> modifiedResidues = mapCompGroups.get(components.get(0)); 368 if (modifiedResidues != null) { 369 for (Group residue : modifiedResidues) { 370 StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true); 371 ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup); 372 modComps.add(modRes); 373 } 374 } 375 } 376 377 /** 378 * identify additional groups that are not directly attached to amino acids. 379 * @param mc {@link ModifiedCompound} 380 * @param ligands {@link Group} 381 * @param chains List of {@link Chain}s 382 * @return a list of added groups 383 */ 384 private void identifyAdditionalAttachments(ModifiedCompound mc, 385 List<Group> ligands, List<Chain> chains) { 386 if (ligands.isEmpty()) { 387 return; 388 } 389 390 // TODO: should the additional groups only be allowed to the identified 391 // ligands or both amino acids and ligands? Currently only on ligands 392 // ligands to amino acid bonds for same modification of unknown category 393 // will be combined in mergeModComps() 394 // TODO: how about chain-chain links? 395 List<Group> identifiedGroups = new ArrayList<>(); 396 for (StructureGroup num : mc.getGroups(false)) { 397 Group group; 398 try { 399 //String numIns = "" + num.getResidueNumber(); 400 //if (num.getInsCode() != null) { 401 // numIns += num.getInsCode(); 402 //} 403 ResidueNumber resNum = new ResidueNumber(); 404 resNum.setChainName(num.getChainId()); 405 resNum.setSeqNum(num.getResidueNumber()); 406 resNum.setInsCode(num.getInsCode()); 407 //group = chain.getGroupByPDB(numIns); 408 409 group = getGroup(num,chains); 410 //group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum); 411 } catch (StructureException e) { 412 logger.error("Exception: ", e); 413 // should not happen 414 continue; 415 } 416 identifiedGroups.add(group); 417 } 418 419 int start = 0; 420 421 int n = identifiedGroups.size(); 422 while (n > start) { 423 for (Group group1 : ligands) { 424 for (int i=start; i<n; i++) { 425 Group group2 = identifiedGroups.get(i); 426 if (!identifiedGroups.contains(group1)) { 427 List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages( 428 group1, group2, false, bondLengthTolerance); 429 if (!linkedAtoms.isEmpty()) { 430 for (Atom[] atoms : linkedAtoms) { 431 mc.addAtomLinkage(StructureUtil.getStructureAtomLinkage(atoms[0], 432 false, atoms[1], false)); 433 } 434 identifiedGroups.add(group1); 435 break; 436 } 437 } 438 } 439 } 440 441 start = n; 442 n = identifiedGroups.size(); 443 } 444 } 445 446 private Group getGroup(StructureGroup num, List<Chain> chains) throws StructureException { 447 for (Chain c : chains){ 448 if ( c.getId().equals(num.getChainId())){ 449 450 ResidueNumber resNum = new ResidueNumber(); 451 452 resNum.setSeqNum(num.getResidueNumber()); 453 resNum.setInsCode(num.getInsCode()); 454 455 456 return c.getGroupByPDB(resNum); 457 } 458 } 459 460 throw new StructureException("Could not find residue " + num); 461 } 462 463 /** 464 * Merge identified modified compounds if linked. 465 */ 466 private void mergeModComps(List<ModifiedCompound> modComps) { 467 TreeSet<Integer> remove = new TreeSet<>(); 468 int n = modComps.size(); 469 for (int icurr=1; icurr<n; icurr++) { 470 ModifiedCompound curr = modComps.get(icurr); 471 472 String id = curr.getModification().getId(); 473 if (ProteinModificationRegistry.getById(id).getCategory() 474 !=ModificationCategory.UNDEFINED) 475 continue; 476 477 // find linked compounds that before curr 478 //List<Integer> merging = new ArrayList<Integer>(); 479 int ipre = 0; 480 for (; ipre<icurr; ipre++) { 481 if (remove.contains(ipre)) continue; 482 ModifiedCompound pre = modComps.get(ipre); 483 if (!Collections.disjoint(pre.getGroups(false), 484 curr.getGroups(false))) { 485 break; 486 } 487 } 488 489 if (ipre<icurr) { 490 ModifiedCompound mcKeep = modComps.get(ipre); 491 492 // merge modifications of the same type 493 if (mcKeep.getModification().getId().equals(id)) { 494 // merging the current one to the previous one 495 mcKeep.addAtomLinkages(curr.getAtomLinkages()); 496 remove.add(icurr); 497 } 498 } 499 } 500 501 Iterator<Integer> it = remove.descendingIterator(); 502 while (it.hasNext()) { 503 modComps.remove(it.next().intValue()); 504 } 505 } 506 507 /** 508 * Record unidentifiable atom linkages in a chain. Only linkages between two 509 * residues or one residue and one ligand will be recorded. 510 */ 511 private void recordUnidentifiableAtomLinkages(List<ModifiedCompound> modComps, 512 List<Group> ligands) { 513 514 // first put identified linkages in a map for fast query 515 Set<StructureAtomLinkage> identifiedLinkages = new HashSet<>(); 516 for (ModifiedCompound mc : modComps) { 517 identifiedLinkages.addAll(mc.getAtomLinkages()); 518 } 519 520 // record 521 // cross link 522 int nRes = residues.size(); 523 for (int i=0; i<nRes-1; i++) { 524 Group group1 = residues.get(i); 525 for (int j=i+1; j<nRes; j++) { 526 Group group2 = residues.get(j); 527 List<Atom[]> linkages = StructureUtil.findAtomLinkages( 528 group1, group2, true, bondLengthTolerance); 529 for (Atom[] atoms : linkages) { 530 StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], 531 true, atoms[1], true); 532 unidentifiableAtomLinkages.add(link); 533 } 534 } 535 } 536 537 // attachment 538 int nLig = ligands.size(); 539 for (int i=0; i<nRes; i++) { 540 Group group1 = residues.get(i); 541 for (int j=0; j<nLig; j++) { 542 Group group2 = ligands.get(j); 543 if (group1.equals(group2)) { // overlap between residues and ligands 544 continue; 545 } 546 List<Atom[]> linkages = StructureUtil.findAtomLinkages( 547 group1, group2, false, bondLengthTolerance); 548 for (Atom[] atoms : linkages) { 549 StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], 550 true, atoms[1], false); 551 unidentifiableAtomLinkages.add(link); 552 } 553 } 554 } 555 } 556 557 private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) { 558 Set<StructureGroup> identifiedComps = new HashSet<>(); 559 for (ModifiedCompound mc : modComps) { 560 identifiedComps.addAll(mc.getGroups(true)); 561 } 562 563 // TODO: use the ModifiedAminoAcid after Andreas add that. 564 for (Group group : residues) { 565 if (group.getType().equals(GroupType.HETATM)) { 566 StructureGroup strucGroup = StructureUtil.getStructureGroup( 567 group, true); 568 strucGroup.setChainId(group.getChainId()); 569 570 if (!identifiedComps.contains(strucGroup)) { 571 unidentifiableModifiedResidues.add(strucGroup); 572 } 573 } 574 } 575 } 576 577 /** 578 * 579 * @param modifications a set of {@link ProteinModification}s. 580 * @param residues 581 * @param ligands 582 * @param saveTo save result to 583 * @return map from component to list of corresponding residues 584 * in the chain. 585 */ 586 private void addModificationGroups( 587 final Set<ProteinModification> modifications, 588 final List<Group> residues, 589 final List<Group> ligands, 590 final Map<Component, Set<Group>> saveTo) { 591 if (residues==null || ligands==null || modifications==null) { 592 throw new IllegalArgumentException("Null argument(s)."); 593 } 594 595 Map<Component,Set<Component>> mapSingleMultiComps = new HashMap<>(); 596 for (ProteinModification mod : modifications) { 597 ModificationCondition condition = mod.getCondition(); 598 for (Component comp : condition.getComponents()) { 599 for (String pdbccId : comp.getPdbccIds()) { 600 Component single = Component.of(Collections.singleton(pdbccId), 601 comp.isNTerminal(), comp.isCTerminal()); 602 Set<Component> mult = mapSingleMultiComps.get(single); 603 if (mult == null) { 604 mult = new HashSet<>(); 605 mapSingleMultiComps.put(single, mult); 606 } 607 mult.add(comp); 608 } 609 } 610 } 611 612 { 613 // ligands 614 Set<Component> ligandsWildCard = mapSingleMultiComps.get( 615 Component.of("*")); 616 for (Group group : ligands) { 617 String pdbccId = group.getPDBName().trim(); 618 Set<Component> comps = mapSingleMultiComps.get( 619 Component.of(pdbccId)); 620 621 for (Component comp : unionComponentSet(ligandsWildCard, comps)) { 622 Set<Group> gs = saveTo.get(comp); 623 if (gs==null) { 624 gs = new LinkedHashSet<>(); 625 saveTo.put(comp, gs); 626 } 627 gs.add(group); 628 } 629 } 630 } 631 632 { 633 // residues 634 if (residues.isEmpty()) { 635 return; 636 } 637 638 Set<Component> residuesWildCard = mapSingleMultiComps.get( 639 Component.of("*")); 640 641 // for all residues 642 for (Group group : residues) { 643 String pdbccId = group.getPDBName().trim(); 644 Set<Component> comps = mapSingleMultiComps.get( 645 Component.of(pdbccId)); 646 647 for (Component comp : unionComponentSet(residuesWildCard, comps)) { 648 Set<Group> gs = saveTo.get(comp); 649 if (gs==null) { 650 gs = new LinkedHashSet<>(); 651 saveTo.put(comp, gs); 652 } 653 gs.add(group); 654 } 655 } 656 657 // for N-terminal 658 int nRes = residues.size(); 659 int iRes = 0; 660 Group res; 661 do { 662 // for all ligands on N terminal and the first residue 663 res = residues.get(iRes++); 664 665 Set<Component> nTermWildCard = mapSingleMultiComps.get( 666 Component.of("*", true, false)); 667 668 Set<Component> comps = mapSingleMultiComps.get( 669 Component.of(res.getPDBName(), true, false)); 670 671 for (Component comp : unionComponentSet(nTermWildCard, comps)) { 672 Set<Group> gs = saveTo.get(comp); 673 if (gs==null) { 674 gs = new LinkedHashSet<>(); 675 saveTo.put(comp, gs); 676 } 677 gs.add(res); 678 } 679 } while (iRes<nRes && ligands.contains(res)); 680 681 // for C-terminal 682 iRes = residues.size()-1; 683 do { 684 // for all ligands on C terminal and the last residue 685 res = residues.get(iRes--); 686 687 Set<Component> cTermWildCard = mapSingleMultiComps.get( 688 Component.of("*", false, true)); 689 690 Set<Component> comps = mapSingleMultiComps.get( 691 Component.of(res.getPDBName(), false, true)); 692 693 for (Component comp : unionComponentSet(cTermWildCard, comps)) { 694 Set<Group> gs = saveTo.get(comp); 695 if (gs==null) { 696 gs = new LinkedHashSet<>(); 697 saveTo.put(comp, gs); 698 } 699 gs.add(res); 700 } 701 } while (iRes>=0 && ligands.contains(res)); 702 } 703 } 704 705 private Set<Component> unionComponentSet(Set<Component> set1, Set<Component> set2) { 706 if (set1 == null && set2 == null) 707 return Collections.emptySet(); 708 709 if (set1 == null) 710 return set2; 711 712 if (set2 == null) 713 return set1; 714 715 Set<Component> set = new HashSet<>(set1.size()+set2.size()); 716 set.addAll(set1); 717 set.addAll(set2); 718 719 return set; 720 } 721 722 /** 723 * Get matched atoms for all linkages. 724 */ 725 private List<List<Atom[]>> getMatchedAtomsOfLinkages( 726 ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) { 727 List<ModificationLinkage> linkages = condition.getLinkages(); 728 int nLink = linkages.size(); 729 730 List<List<Atom[]>> matchedAtomsOfLinkages = 731 new ArrayList<>(nLink); 732 733 for (int iLink=0; iLink<nLink; iLink++) { 734 ModificationLinkage linkage = linkages.get(iLink); 735 Component comp1 = linkage.getComponent1(); 736 Component comp2 = linkage.getComponent2(); 737 738// boolean isAA1 = comp1.; 739// boolean isAA2 = comp2.getType()==true; 740 741 Set<Group> groups1 = mapCompGroups.get(comp1); 742 Set<Group> groups2 = mapCompGroups.get(comp2); 743 744 List<Atom[]> list = new ArrayList<>(); 745 746 List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1(); 747 for (String name : potentialNamesOfAtomOnGroup1) { 748 if ("*".equals(name)) { 749 // wildcard 750 potentialNamesOfAtomOnGroup1 = null; // search all atoms 751 break; 752 } 753 } 754 755 List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2(); 756 for (String name : potentialNamesOfAtomOnGroup2) { 757 if ("*".equals(name)) { 758 // wildcard 759 potentialNamesOfAtomOnGroup2 = null; // search all atoms 760 break; 761 } 762 } 763 764 for (Group g1 : groups1) { 765 for (Group g2 : groups2) { 766 if (g1.equals(g2)) { 767 continue; 768 } 769 770 // only for wildcard match of two residues 771 boolean ignoreNCLinkage = 772 potentialNamesOfAtomOnGroup1 == null && 773 potentialNamesOfAtomOnGroup2 == null && 774 residues.contains(g1) && 775 residues.contains(g2); 776 777 Atom[] atoms = StructureUtil.findNearestAtomLinkage( 778 g1, g2, 779 potentialNamesOfAtomOnGroup1, 780 potentialNamesOfAtomOnGroup2, 781 ignoreNCLinkage, 782 bondLengthTolerance); 783 if (atoms!=null) { 784 list.add(atoms); 785 } 786 } 787 } 788 789 if (list.isEmpty()) { 790 // broken linkage 791 break; 792 } 793 794 matchedAtomsOfLinkages.add(list); 795 } 796 797 return matchedAtomsOfLinkages; 798 } 799 800 /** Assembly the matched linkages 801 * 802 * @param matchedAtomsOfLinkages 803 * @param mod 804 * @param ret ModifiedCompound will be stored here 805 */ 806 private void assembleLinkages(List<List<Atom[]>> matchedAtomsOfLinkages, 807 ProteinModification mod, List<ModifiedCompound> ret) { 808 ModificationCondition condition = mod.getCondition(); 809 List<ModificationLinkage> modLinks = condition.getLinkages(); 810 811 int nLink = matchedAtomsOfLinkages.size(); 812 int[] indices = new int[nLink]; 813 Set<ModifiedCompound> identifiedCompounds = new HashSet<>(); 814 while (indices[0]<matchedAtomsOfLinkages.get(0).size()) { 815 List<Atom[]> atomLinkages = new ArrayList<>(nLink); 816 for (int iLink=0; iLink<nLink; iLink++) { 817 Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]); 818 atomLinkages.add(atoms); 819 } 820 if (matchLinkages(modLinks, atomLinkages)) { 821 // matched 822 823 int n = atomLinkages.size(); 824 List<StructureAtomLinkage> linkages = new ArrayList<>(n); 825 for (int i=0; i<n; i++) { 826 Atom[] linkage = atomLinkages.get(i); 827 StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage( 828 linkage[0], residues.contains(linkage[0].getGroup()), 829 linkage[1], residues.contains(linkage[1].getGroup())); 830 linkages.add(link); 831 } 832 833 ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages); 834 if (!identifiedCompounds.contains(mc)) { 835 ret.add(mc); 836 identifiedCompounds.add(mc); 837 } 838 } 839 840 // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0]) 841 int i = nLink-1; 842 while (i>=0) { 843 if (i==0 || indices[i]<matchedAtomsOfLinkages.get(i).size()-1) { 844 indices[i]++; 845 break; 846 } else { 847 indices[i] = 0; 848 i--; 849 } 850 } 851 } 852 } 853 854 /** 855 * 856 * @param linkages 857 * @param atomLinkages 858 * @return true if atomLinkages satisfy the condition; false, otherwise. 859 */ 860 private boolean matchLinkages(List<ModificationLinkage> linkages, 861 List<Atom[]> atomLinkages) { 862 int nLink = linkages.size(); 863 if (nLink != atomLinkages.size()) { 864 return false; 865 } 866 for (int i=0; i<nLink-1; i++) { 867 ModificationLinkage link1 = linkages.get(i); 868 Atom[] atoms1 = atomLinkages.get(i); 869 for (int j=i+1; j<nLink; j++) { 870 ModificationLinkage link2 = linkages.get(j); 871 Atom[] atoms2 = atomLinkages.get(j); 872 873 // check components 874 if (((link1.getIndexOfComponent1()==link2.getIndexOfComponent1()) 875 != (atoms1[0].getGroup().equals(atoms2[0].getGroup()))) 876 || ((link1.getIndexOfComponent1()==link2.getIndexOfComponent2()) 877 != (atoms1[0].getGroup().equals(atoms2[1].getGroup()))) 878 || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent1()) 879 != (atoms1[1].getGroup().equals(atoms2[0].getGroup()))) 880 || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent2()) 881 != (atoms1[1].getGroup().equals(atoms2[1].getGroup())))) { 882 return false; 883 } 884 885 // check atoms 886 String label11 = link1.getLabelOfAtomOnComponent1(); 887 String label12 = link1.getLabelOfAtomOnComponent2(); 888 String label21 = link2.getLabelOfAtomOnComponent1(); 889 String label22 = link2.getLabelOfAtomOnComponent2(); 890 if ((label11!=null && label21!=null && label11.equals(label21)) 891 != (atoms1[0].equals(atoms2[0])) 892 || (label11!=null && label22!=null && label11.equals(label22)) 893 != (atoms1[0].equals(atoms2[1])) 894 || (label12!=null && label21!=null && label12.equals(label21)) 895 != (atoms1[1].equals(atoms2[0])) 896 || (label12!=null && label22!=null && label12.equals(label22)) 897 != (atoms1[1].equals(atoms2[1]))) { 898 return false; 899 } 900 } 901 } 902 903 return true; 904 } 905}