001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 12, 2010 021 * Author: Jianjiong Gao 022 * 023 */ 024 025package org.biojava.nbio.protmod.structure; 026 027import org.biojava.nbio.protmod.*; 028import org.biojava.nbio.structure.*; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import java.util.*; 033 034/** 035 * Identify attachment modification in a 3-D structure. 036 * 037 * @author Jianjiong Gao 038 * @since 3.0 039 */ 040public class ProteinModificationIdentifier { 041 042 private static final Logger logger = LoggerFactory.getLogger(ProteinModificationIdentifier.class); 043 044 private double bondLengthTolerance ; 045 private boolean recordUnidentifiableModifiedCompounds ; 046 private boolean recordAdditionalAttachments ; 047 048 private Set<ModifiedCompound> identifiedModifiedCompounds = null; 049 private Set<StructureAtomLinkage> unidentifiableAtomLinkages = null; 050 private Set<StructureGroup> unidentifiableModifiedResidues = null; 051 052 /** 053 * Temporary save the amino acids for each call of identify(). 054 */ 055 private List<Group> residues; 056 057 058 public ProteinModificationIdentifier(){ 059 060 bondLengthTolerance = 0.4; 061 recordUnidentifiableModifiedCompounds = false; 062 recordAdditionalAttachments = true; 063 064 reset(); 065 } 066 067 068 public void destroy(){ 069 if ( identifiedModifiedCompounds != null) 070 identifiedModifiedCompounds.clear(); 071 if ( unidentifiableAtomLinkages != null) 072 unidentifiableAtomLinkages.clear(); 073 if ( unidentifiableModifiedResidues != null) 074 unidentifiableModifiedResidues.clear(); 075 076 unidentifiableAtomLinkages = null; 077 unidentifiableAtomLinkages = null; 078 unidentifiableModifiedResidues = null; 079 080 081 } 082 083 /** 084 * 085 * @param bondLengthTolerance tolerance of error (in Angstroms) of the 086 * covalent bond length, when calculating the atom distance threshold. 087 */ 088 public void setbondLengthTolerance(final double bondLengthTolerance) { 089 if (bondLengthTolerance<0) { 090 throw new IllegalArgumentException("bondLengthTolerance " + 091 "must be positive."); 092 } 093 this.bondLengthTolerance = bondLengthTolerance; 094 } 095 096 /** 097 * 098 * @param recordUnidentifiableModifiedCompounds true if choosing to record unidentifiable 099 * atoms; false, otherwise. 100 * @see #getRecordUnidentifiableCompounds 101 * @see #getUnidentifiableModifiedResidues 102 * @see #getUnidentifiableAtomLinkages 103 */ 104 public void setRecordUnidentifiableCompounds(boolean recordUnidentifiableModifiedCompounds) { 105 this.recordUnidentifiableModifiedCompounds = recordUnidentifiableModifiedCompounds; 106 } 107 108 /** 109 * 110 * @return true if choosing to record unidentifiable 111 * atoms; false, otherwise. 112 * @see #setRecordUnidentifiableCompounds 113 * @see #getUnidentifiableModifiedResidues 114 * @see #getUnidentifiableAtomLinkages 115 */ 116 public boolean getRecordUnidentifiableCompounds() { 117 return recordUnidentifiableModifiedCompounds; 118 } 119 120 /** 121 * 122 * @param recordAdditionalAttachments true if choosing to record additional attachments 123 * that are not directly attached to a modified residue. 124 * @see #getRecordAdditionalAttachments 125 */ 126 public void setRecordAdditionalAttachments(boolean recordAdditionalAttachments) { 127 this.recordAdditionalAttachments = recordAdditionalAttachments; 128 } 129 130 /** 131 * 132 * @return true if choosing to record additional attachments 133 * that are not directly attached to a modified residue. 134 * @see #setRecordAdditionalAttachments 135 */ 136 public boolean getRecordAdditionalAttachments() { 137 return recordAdditionalAttachments; 138 } 139 140 /** 141 * 142 * @return a set of identified {@link ModifiedCompound}s from 143 * the last parse result. 144 * @see ModifiedCompound 145 */ 146 public Set<ModifiedCompound> getIdentifiedModifiedCompound() { 147 if (identifiedModifiedCompounds==null) { 148 throw new IllegalStateException("No result available. Please call parse() first."); 149 } 150 151 return identifiedModifiedCompounds; 152 } 153 154 /** 155 * 156 * @return a set of atom linkages, which represent the 157 * atom bonds that were not covered by the identified 158 * {@link ModifiedCompound}s from the last parse result. 159 * Each element of the list is a array containing two atoms. 160 * @see StructureAtomLinkage 161 * @see #setRecordUnidentifiableCompounds 162 */ 163 public Set<StructureAtomLinkage> getUnidentifiableAtomLinkages() { 164 if (!recordUnidentifiableModifiedCompounds) { 165 throw new UnsupportedOperationException("Recording unidentified atom linkages" + 166 "is not supported. Please setRecordUnidentifiableCompounds(true) first."); 167 } 168 169 if (identifiedModifiedCompounds==null) { 170 throw new IllegalStateException("No result available. Please call parse() first."); 171 } 172 173 return unidentifiableAtomLinkages; 174 } 175 176 /** 177 * 178 * @return a set of modified residues that were not covered by 179 * the identified ModifiedCompounds from the last parse 180 * result. 181 * @see StructureGroup 182 * @see #setRecordUnidentifiableCompounds 183 * @see #getIdentifiedModifiedCompound 184 */ 185 public Set<StructureGroup> getUnidentifiableModifiedResidues() { 186 if (!recordUnidentifiableModifiedCompounds) { 187 throw new UnsupportedOperationException("Recording unidentified atom linkages" + 188 "is not supported. Please setRecordUnidentifiableCompounds(true) first."); 189 } 190 191 if (identifiedModifiedCompounds==null) { 192 throw new IllegalStateException("No result available. Please call parse() first."); 193 } 194 195 return unidentifiableModifiedResidues; 196 } 197 198 /** 199 * Identify all registered modifications in a structure. 200 * @param structure 201 */ 202 public void identify(final Structure structure) { 203 identify(structure, ProteinModificationRegistry.allModifications()); 204 } 205 206 /** 207 * Identify a set of modifications in a structure. 208 * @param structure query {@link Structure}. 209 * @param potentialModifications query {@link ProteinModification}s. 210 */ 211 public void identify(final Structure structure, 212 final Set<ProteinModification> potentialModifications) { 213 if (structure==null) { 214 throw new IllegalArgumentException("Null structure."); 215 } 216 217 identify(structure.getChains(), potentialModifications); 218 } 219 220 /** 221 * Identify all registered modifications in a chain. 222 * @param chain query {@link Chain}. 223 */ 224 public void identify(final Chain chain) { 225 identify(Collections.singletonList(chain)); 226 } 227 228 /** 229 * Identify all registered modifications in chains. 230 * @param chains query {@link Chain}s. 231 */ 232 public void identify(final List<Chain> chains) { 233 identify(chains, ProteinModificationRegistry.allModifications()); 234 } 235 236 /** 237 * Identify a set of modifications in a a chains. 238 * @param chain query {@link Chain}. 239 * @param potentialModifications query {@link ProteinModification}s. 240 */ 241 public void identify(final Chain chain, 242 final Set<ProteinModification> potentialModifications) { 243 identify(Collections.singletonList(chain), potentialModifications); 244 } 245 246 /** 247 * Identify a set of modifications in a a list of chains. 248 * @param chains query {@link Chain}s. 249 * @param potentialModifications query {@link ProteinModification}s. 250 */ 251 public void identify(final List<Chain> chains, 252 final Set<ProteinModification> potentialModifications) { 253 254 if (chains==null) { 255 throw new IllegalArgumentException("Null structure."); 256 } 257 258 if (potentialModifications==null) { 259 throw new IllegalArgumentException("Null potentialModifications."); 260 } 261 262 263 reset(); 264 265 if (potentialModifications.isEmpty()) { 266 return; 267 } 268 269 Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size()); 270 residues = new ArrayList<Group>(); 271 List<Group> ligands = new ArrayList<Group>(); 272 Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>(); 273 274 for (Chain chain : chains) { 275 mapChainIdChain.put(chain.getChainID(), chain); 276 277 List<Group> ress = StructureUtil.getAminoAcids(chain); 278 279 //List<Group> ligs = chain.getAtomLigands(); 280 List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); 281 residues.addAll(ress); 282 residues.removeAll(ligs); 283 ligands.addAll(ligs); 284 addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); 285 } 286 287 if (residues.isEmpty()) { 288 String pdbId = "?"; 289 if ( chains.size() > 0) { 290 Structure struc = chains.get(0).getParent(); 291 if ( struc != null) 292 pdbId = struc.getPDBCode(); 293 } 294 logger.warn("No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); 295 } 296 List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>(); 297 298 for (ProteinModification mod : potentialModifications) { 299 ModificationCondition condition = mod.getCondition(); 300 List<Component> components = condition.getComponents(); 301 if (!mapCompGroups.keySet().containsAll(components)) { 302 // not all components exist for this mod. 303 continue; 304 } 305 306 int sizeComps = components.size(); 307 if (sizeComps==1) { 308 309 processCrosslink1(mapCompGroups, modComps, mod, components); 310 311 } else { 312 313 processMultiCrosslink(mapCompGroups, modComps, mod, condition); 314 } 315 } 316 317 if (recordAdditionalAttachments) { 318 // identify additional groups that are not directly attached to amino acids. 319 for (ModifiedCompound mc : modComps) { 320 identifyAdditionalAttachments(mc, ligands, mapChainIdChain); 321 } 322 } 323 324 mergeModComps(modComps); 325 326 identifiedModifiedCompounds.addAll(modComps); 327 328 329 // record unidentifiable linkage 330 if (recordUnidentifiableModifiedCompounds) { 331 recordUnidentifiableAtomLinkages(modComps, ligands); 332 recordUnidentifiableModifiedResidues(modComps); 333 } 334 } 335 336 private void reset() { 337 identifiedModifiedCompounds = new LinkedHashSet<ModifiedCompound>(); 338 if (recordUnidentifiableModifiedCompounds) { 339 unidentifiableAtomLinkages = new LinkedHashSet<StructureAtomLinkage>(); 340 unidentifiableModifiedResidues = new LinkedHashSet<StructureGroup>(); 341 } 342 343 } 344 345 private void processMultiCrosslink( 346 Map<Component, Set<Group>> mapCompGroups, 347 List<ModifiedCompound> modComps, ProteinModification mod, 348 ModificationCondition condition) { 349 // for multiple components 350 351 // find linkages first 352 List<List<Atom[]>> matchedAtomsOfLinkages = 353 getMatchedAtomsOfLinkages(condition, mapCompGroups); 354 355 if (matchedAtomsOfLinkages.size() != condition.getLinkages().size()) { 356 return; 357 } 358 359 assembleLinkages(matchedAtomsOfLinkages, mod, modComps); 360 361 } 362 363 private void processCrosslink1(Map<Component, Set<Group>> mapCompGroups, 364 List<ModifiedCompound> modComps, ProteinModification mod, 365 List<Component> components) { 366 // modified residue 367 // TODO: is this the correct logic for CROSS_LINK_1? 368 Set<Group> modifiedResidues = mapCompGroups.get(components.get(0)); 369 if (modifiedResidues != null) { 370 for (Group residue : modifiedResidues) { 371 StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true); 372 ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup); 373 modComps.add(modRes); 374 } 375 } 376 } 377 378 /** 379 * identify additional groups that are not directly attached to amino acids. 380 * @param mc {@link ModifiedCompound} 381 * @param ligands {@link Group} 382 * @param chains List of {@link Chain}s 383 * @return a list of added groups 384 */ 385 private void identifyAdditionalAttachments(ModifiedCompound mc, 386 List<Group> ligands, Map<String, Chain> mapChainIdChain) { 387 if (ligands.isEmpty()) { 388 return; 389 } 390 391 // TODO: should the additional groups only be allowed to the identified 392 // ligands or both amino acids and ligands? Currently only on ligands 393 // ligands to amino acid bonds for same modification of unknown category 394 // will be combined in mergeModComps() 395 // TODO: how about chain-chain links? 396 List<Group> identifiedGroups = new ArrayList<Group>(); 397 for (StructureGroup num : mc.getGroups(false)) { 398 Group group; 399 try { 400 //String numIns = "" + num.getResidueNumber(); 401 //if (num.getInsCode() != null) { 402 // numIns += num.getInsCode(); 403 //} 404 ResidueNumber resNum = new ResidueNumber(); 405 resNum.setChainId(num.getChainId()); 406 resNum.setSeqNum(num.getResidueNumber()); 407 resNum.setInsCode(num.getInsCode()); 408 //group = chain.getGroupByPDB(numIns); 409 410 group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum); 411 //group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum); 412 } catch (StructureException e) { 413 logger.error("Exception: ", e); 414 // should not happen 415 continue; 416 } 417 identifiedGroups.add(group); 418 } 419 420 int start = 0; 421 422 int n = identifiedGroups.size(); 423 while (n > start) { 424 for (Group group1 : ligands) { 425 for (int i=start; i<n; i++) { 426 Group group2 = identifiedGroups.get(i); 427 if (!identifiedGroups.contains(group1)) { 428 List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages( 429 group1, group2, false, bondLengthTolerance); 430 if (!linkedAtoms.isEmpty()) { 431 for (Atom[] atoms : linkedAtoms) { 432 mc.addAtomLinkage(StructureUtil.getStructureAtomLinkage(atoms[0], 433 false, atoms[1], false)); 434 } 435 identifiedGroups.add(group1); 436 break; 437 } 438 } 439 } 440 } 441 442 start = n; 443 n = identifiedGroups.size(); 444 } 445 } 446 447 private Group getGroup(StructureGroup num, List<Chain> chains) throws StructureException { 448 for (Chain c : chains){ 449 if ( c.getId().equals(num.getChainId())){ 450 451 ResidueNumber resNum = new ResidueNumber(); 452 453 resNum.setSeqNum(num.getResidueNumber()); 454 resNum.setInsCode(num.getInsCode()); 455 456 457 return c.getGroupByPDB(resNum); 458 } 459 } 460 461 throw new StructureException("Could not find residue " + num); 462 } 463 464 /** 465 * Merge identified modified compounds if linked. 466 */ 467 private void mergeModComps(List<ModifiedCompound> modComps) { 468 TreeSet<Integer> remove = new TreeSet<Integer>(); 469 int n = modComps.size(); 470 for (int icurr=1; icurr<n; icurr++) { 471 ModifiedCompound curr = modComps.get(icurr); 472 473 String id = curr.getModification().getId(); 474 if (ProteinModificationRegistry.getById(id).getCategory() 475 !=ModificationCategory.UNDEFINED) 476 continue; 477 478 // find linked compounds that before curr 479 //List<Integer> merging = new ArrayList<Integer>(); 480 int ipre = 0; 481 for (; ipre<icurr; ipre++) { 482 if (remove.contains(ipre)) continue; 483 ModifiedCompound pre = modComps.get(ipre); 484 if (!Collections.disjoint(pre.getGroups(false), 485 curr.getGroups(false))) { 486 break; 487 } 488 } 489 490 if (ipre<icurr) { 491 ModifiedCompound mcKeep = modComps.get(ipre); 492 493 // merge modifications of the same type 494 if (mcKeep.getModification().getId().equals(id)) { 495 // merging the current one to the previous one 496 mcKeep.addAtomLinkages(curr.getAtomLinkages()); 497 remove.add(icurr); 498 } 499 } 500 } 501 502 Iterator<Integer> it = remove.descendingIterator(); 503 while (it.hasNext()) { 504 modComps.remove(it.next().intValue()); 505 } 506 } 507 508 /** 509 * Record unidentifiable atom linkages in a chain. Only linkages between two 510 * residues or one residue and one ligand will be recorded. 511 */ 512 private void recordUnidentifiableAtomLinkages(List<ModifiedCompound> modComps, 513 List<Group> ligands) { 514 515 // first put identified linkages in a map for fast query 516 Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>(); 517 for (ModifiedCompound mc : modComps) { 518 identifiedLinkages.addAll(mc.getAtomLinkages()); 519 } 520 521 // record 522 // cross link 523 int nRes = residues.size(); 524 for (int i=0; i<nRes-1; i++) { 525 Group group1 = residues.get(i); 526 for (int j=i+1; j<nRes; j++) { 527 Group group2 = residues.get(j); 528 List<Atom[]> linkages = StructureUtil.findAtomLinkages( 529 group1, group2, true, bondLengthTolerance); 530 for (Atom[] atoms : linkages) { 531 StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], 532 true, atoms[1], true); 533 unidentifiableAtomLinkages.add(link); 534 } 535 } 536 } 537 538 // attachment 539 int nLig = ligands.size(); 540 for (int i=0; i<nRes; i++) { 541 Group group1 = residues.get(i); 542 for (int j=0; j<nLig; j++) { 543 Group group2 = ligands.get(j); 544 if (group1.equals(group2)) { // overlap between residues and ligands 545 continue; 546 } 547 List<Atom[]> linkages = StructureUtil.findAtomLinkages( 548 group1, group2, false, bondLengthTolerance); 549 for (Atom[] atoms : linkages) { 550 StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], 551 true, atoms[1], false); 552 unidentifiableAtomLinkages.add(link); 553 } 554 } 555 } 556 } 557 558 private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) { 559 Set<StructureGroup> identifiedComps = new HashSet<StructureGroup>(); 560 for (ModifiedCompound mc : modComps) { 561 identifiedComps.addAll(mc.getGroups(true)); 562 } 563 564 // TODO: use the ModifiedAminoAcid after Andreas add that. 565 for (Group group : residues) { 566 if (group.getType().equals(GroupType.HETATM)) { 567 StructureGroup strucGroup = StructureUtil.getStructureGroup( 568 group, true); 569 //strucGroup.setChainId(group.getChainId()); 570 571 if (!identifiedComps.contains(strucGroup)) { 572 unidentifiableModifiedResidues.add(strucGroup); 573 } 574 } 575 } 576 } 577 578 /** 579 * 580 * @param modifications a set of {@link ProteinModification}s. 581 * @param residues 582 * @param ligands 583 * @param saveTo save result to 584 * @return map from component to list of corresponding residues 585 * in the chain. 586 */ 587 private void addModificationGroups( 588 final Set<ProteinModification> modifications, 589 final List<Group> residues, 590 final List<Group> ligands, 591 final Map<Component, Set<Group>> saveTo) { 592 if (residues==null || ligands==null || modifications==null) { 593 throw new IllegalArgumentException("Null argument(s)."); 594 } 595 596 Map<Component,Set<Component>> mapSingleMultiComps = new HashMap<Component,Set<Component>>(); 597 for (ProteinModification mod : modifications) { 598 ModificationCondition condition = mod.getCondition(); 599 for (Component comp : condition.getComponents()) { 600 for (String pdbccId : comp.getPdbccIds()) { 601 Component single = Component.of(Collections.singleton(pdbccId), 602 comp.isNTerminal(), comp.isCTerminal()); 603 Set<Component> mult = mapSingleMultiComps.get(single); 604 if (mult == null) { 605 mult = new HashSet<Component>(); 606 mapSingleMultiComps.put(single, mult); 607 } 608 mult.add(comp); 609 } 610 } 611 } 612 613 { 614 // ligands 615 Set<Component> ligandsWildCard = mapSingleMultiComps.get( 616 Component.of("*")); 617 for (Group group : ligands) { 618 String pdbccId = group.getPDBName().trim(); 619 Set<Component> comps = mapSingleMultiComps.get( 620 Component.of(pdbccId)); 621 622 for (Component comp : unionComponentSet(ligandsWildCard, comps)) { 623 Set<Group> gs = saveTo.get(comp); 624 if (gs==null) { 625 gs = new LinkedHashSet<Group>(); 626 saveTo.put(comp, gs); 627 } 628 gs.add(group); 629 } 630 } 631 } 632 633 { 634 // residues 635 if (residues.isEmpty()) { 636 return; 637 } 638 639 Set<Component> residuesWildCard = mapSingleMultiComps.get( 640 Component.of("*")); 641 642 // for all residues 643 for (Group group : residues) { 644 String pdbccId = group.getPDBName().trim(); 645 Set<Component> comps = mapSingleMultiComps.get( 646 Component.of(pdbccId)); 647 648 for (Component comp : unionComponentSet(residuesWildCard, comps)) { 649 Set<Group> gs = saveTo.get(comp); 650 if (gs==null) { 651 gs = new LinkedHashSet<Group>(); 652 saveTo.put(comp, gs); 653 } 654 gs.add(group); 655 } 656 } 657 658 // for N-terminal 659 int nRes = residues.size(); 660 int iRes = 0; 661 Group res; 662 do { 663 // for all ligands on N terminal and the first residue 664 res = residues.get(iRes++); 665 666 Set<Component> nTermWildCard = mapSingleMultiComps.get( 667 Component.of("*", true, false)); 668 669 Set<Component> comps = mapSingleMultiComps.get( 670 Component.of(res.getPDBName(), true, false)); 671 672 for (Component comp : unionComponentSet(nTermWildCard, comps)) { 673 Set<Group> gs = saveTo.get(comp); 674 if (gs==null) { 675 gs = new LinkedHashSet<Group>(); 676 saveTo.put(comp, gs); 677 } 678 gs.add(res); 679 } 680 } while (iRes<nRes && ligands.contains(res)); 681 682 // for C-terminal 683 iRes = residues.size()-1; 684 do { 685 // for all ligands on C terminal and the last residue 686 res = residues.get(iRes--); 687 688 Set<Component> cTermWildCard = mapSingleMultiComps.get( 689 Component.of("*", false, true)); 690 691 Set<Component> comps = mapSingleMultiComps.get( 692 Component.of(res.getPDBName(), false, true)); 693 694 for (Component comp : unionComponentSet(cTermWildCard, comps)) { 695 Set<Group> gs = saveTo.get(comp); 696 if (gs==null) { 697 gs = new LinkedHashSet<Group>(); 698 saveTo.put(comp, gs); 699 } 700 gs.add(res); 701 } 702 } while (iRes>=0 && ligands.contains(res)); 703 } 704 } 705 706 private Set<Component> unionComponentSet(Set<Component> set1, Set<Component> set2) { 707 if (set1 == null && set2 == null) 708 return Collections.emptySet(); 709 710 if (set1 == null) 711 return set2; 712 713 if (set2 == null) 714 return set1; 715 716 Set<Component> set = new HashSet<Component>(set1.size()+set2.size()); 717 set.addAll(set1); 718 set.addAll(set2); 719 720 return set; 721 } 722 723 /** 724 * Get matched atoms for all linkages. 725 */ 726 private List<List<Atom[]>> getMatchedAtomsOfLinkages( 727 ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) { 728 List<ModificationLinkage> linkages = condition.getLinkages(); 729 int nLink = linkages.size(); 730 731 List<List<Atom[]>> matchedAtomsOfLinkages = 732 new ArrayList<List<Atom[]>>(nLink); 733 734 for (int iLink=0; iLink<nLink; iLink++) { 735 ModificationLinkage linkage = linkages.get(iLink); 736 Component comp1 = linkage.getComponent1(); 737 Component comp2 = linkage.getComponent2(); 738 739// boolean isAA1 = comp1.; 740// boolean isAA2 = comp2.getType()==true; 741 742 Set<Group> groups1 = mapCompGroups.get(comp1); 743 Set<Group> groups2 = mapCompGroups.get(comp2); 744 745 List<Atom[]> list = new ArrayList<Atom[]>(); 746 747 List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1(); 748 for (String name : potentialNamesOfAtomOnGroup1) { 749 if (name.equals("*")) { 750 // wildcard 751 potentialNamesOfAtomOnGroup1 = null; // search all atoms 752 break; 753 } 754 } 755 756 List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2(); 757 for (String name : potentialNamesOfAtomOnGroup2) { 758 if (name.equals("*")) { 759 // wildcard 760 potentialNamesOfAtomOnGroup2 = null; // search all atoms 761 break; 762 } 763 } 764 765 for (Group g1 : groups1) { 766 for (Group g2 : groups2) { 767 if (g1.equals(g2)) { 768 continue; 769 } 770 771 // only for wildcard match of two residues 772 boolean ignoreNCLinkage = 773 potentialNamesOfAtomOnGroup1 == null && 774 potentialNamesOfAtomOnGroup2 == null && 775 residues.contains(g1) && 776 residues.contains(g2); 777 778 Atom[] atoms = StructureUtil.findNearestAtomLinkage( 779 g1, g2, 780 potentialNamesOfAtomOnGroup1, 781 potentialNamesOfAtomOnGroup2, 782 ignoreNCLinkage, 783 bondLengthTolerance); 784 if (atoms!=null) { 785 list.add(atoms); 786 } 787 } 788 } 789 790 if (list.isEmpty()) { 791 // broken linkage 792 break; 793 } 794 795 matchedAtomsOfLinkages.add(list); 796 } 797 798 return matchedAtomsOfLinkages; 799 } 800 801 /** Assembly the matched linkages 802 * 803 * @param matchedAtomsOfLinkages 804 * @param mod 805 * @param ret ModifiedCompound will be stored here 806 */ 807 private void assembleLinkages(List<List<Atom[]>> matchedAtomsOfLinkages, 808 ProteinModification mod, List<ModifiedCompound> ret) { 809 ModificationCondition condition = mod.getCondition(); 810 List<ModificationLinkage> modLinks = condition.getLinkages(); 811 812 int nLink = matchedAtomsOfLinkages.size(); 813 int[] indices = new int[nLink]; 814 Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>(); 815 while (indices[0]<matchedAtomsOfLinkages.get(0).size()) { 816 List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink); 817 for (int iLink=0; iLink<nLink; iLink++) { 818 Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]); 819 atomLinkages.add(atoms); 820 } 821 if (matchLinkages(modLinks, atomLinkages)) { 822 // matched 823 824 int n = atomLinkages.size(); 825 List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n); 826 for (int i=0; i<n; i++) { 827 Atom[] linkage = atomLinkages.get(i); 828 StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage( 829 linkage[0], residues.contains(linkage[0].getGroup()), 830 linkage[1], residues.contains(linkage[1].getGroup())); 831 linkages.add(link); 832 } 833 834 ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages); 835 if (!identifiedCompounds.contains(mc)) { 836 ret.add(mc); 837 identifiedCompounds.add(mc); 838 } 839 } 840 841 // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0]) 842 int i = nLink-1; 843 while (i>=0) { 844 if (i==0 || indices[i]<matchedAtomsOfLinkages.get(i).size()-1) { 845 indices[i]++; 846 break; 847 } else { 848 indices[i] = 0; 849 i--; 850 } 851 } 852 } 853 } 854 855 /** 856 * 857 * @param linkages 858 * @param atomLinkages 859 * @return true if atomLinkages satisfy the condition; false, otherwise. 860 */ 861 private boolean matchLinkages(List<ModificationLinkage> linkages, 862 List<Atom[]> atomLinkages) { 863 int nLink = linkages.size(); 864 if (nLink != atomLinkages.size()) { 865 return false; 866 } 867 for (int i=0; i<nLink-1; i++) { 868 ModificationLinkage link1 = linkages.get(i); 869 Atom[] atoms1 = atomLinkages.get(i); 870 for (int j=i+1; j<nLink; j++) { 871 ModificationLinkage link2 = linkages.get(j); 872 Atom[] atoms2 = atomLinkages.get(j); 873 874 // check components 875 if (((link1.getIndexOfComponent1()==link2.getIndexOfComponent1()) 876 != (atoms1[0].getGroup().equals(atoms2[0].getGroup()))) 877 || ((link1.getIndexOfComponent1()==link2.getIndexOfComponent2()) 878 != (atoms1[0].getGroup().equals(atoms2[1].getGroup()))) 879 || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent1()) 880 != (atoms1[1].getGroup().equals(atoms2[0].getGroup()))) 881 || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent2()) 882 != (atoms1[1].getGroup().equals(atoms2[1].getGroup())))) { 883 return false; 884 } 885 886 // check atoms 887 String label11 = link1.getLabelOfAtomOnComponent1(); 888 String label12 = link1.getLabelOfAtomOnComponent2(); 889 String label21 = link2.getLabelOfAtomOnComponent1(); 890 String label22 = link2.getLabelOfAtomOnComponent2(); 891 if ((label11!=null && label21!=null && label11.equals(label21)) 892 != (atoms1[0].equals(atoms2[0])) 893 || (label11!=null && label22!=null && label11.equals(label22)) 894 != (atoms1[0].equals(atoms2[1])) 895 || (label12!=null && label21!=null && label12.equals(label21)) 896 != (atoms1[1].equals(atoms2[0])) 897 || (label12!=null && label22!=null && label12.equals(label22)) 898 != (atoms1[1].equals(atoms2[1]))) { 899 return false; 900 } 901 } 902 } 903 904 return true; 905 } 906}