001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jun 12, 2010
021 * Author: Jianjiong Gao
022 *
023 */
024
025package org.biojava.nbio.protmod.structure;
026
027import org.biojava.nbio.protmod.*;
028import org.biojava.nbio.structure.*;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import java.util.*;
033
034/**
035 * Identify attachment modification in a 3-D structure.
036 *
037 * @author Jianjiong Gao
038 * @since 3.0
039 */
040public class ProteinModificationIdentifier {
041
042        private static final Logger logger = LoggerFactory.getLogger(ProteinModificationIdentifier.class);
043
044        private double bondLengthTolerance ;
045        private boolean recordUnidentifiableModifiedCompounds ;
046        private boolean recordAdditionalAttachments ;
047
048        private Set<ModifiedCompound> identifiedModifiedCompounds = null;
049        private Set<StructureAtomLinkage> unidentifiableAtomLinkages = null;
050        private Set<StructureGroup> unidentifiableModifiedResidues = null;
051
052        /**
053         * Temporary save the amino acids for each call of identify().
054         */
055        private List<Group> residues;
056
057
058        public ProteinModificationIdentifier(){
059
060                bondLengthTolerance =  0.4;
061                recordUnidentifiableModifiedCompounds = false;
062                recordAdditionalAttachments = true;
063
064                reset();
065        }
066
067
068        public void destroy(){
069                if ( identifiedModifiedCompounds != null)
070                        identifiedModifiedCompounds.clear();
071                if ( unidentifiableAtomLinkages != null)
072                        unidentifiableAtomLinkages.clear();
073                if ( unidentifiableModifiedResidues != null)
074                        unidentifiableModifiedResidues.clear();
075
076                unidentifiableAtomLinkages = null;
077                unidentifiableAtomLinkages = null;
078                unidentifiableModifiedResidues = null;
079
080
081        }
082
083        /**
084         *
085         * @param bondLengthTolerance tolerance of error (in Angstroms) of the
086         *  covalent bond length, when calculating the atom distance threshold.
087         */
088        public void setbondLengthTolerance(final double bondLengthTolerance) {
089                if (bondLengthTolerance<0) {
090                        throw new IllegalArgumentException("bondLengthTolerance " +
091                                        "must be positive.");
092                }
093                this.bondLengthTolerance = bondLengthTolerance;
094        }
095
096        /**
097         *
098         * @param recordUnidentifiableModifiedCompounds true if choosing to record unidentifiable
099         *  atoms; false, otherwise.
100         * @see #getRecordUnidentifiableCompounds
101         * @see #getUnidentifiableModifiedResidues
102         * @see #getUnidentifiableAtomLinkages
103         */
104        public void setRecordUnidentifiableCompounds(boolean recordUnidentifiableModifiedCompounds) {
105                this.recordUnidentifiableModifiedCompounds = recordUnidentifiableModifiedCompounds;
106        }
107
108        /**
109         *
110         * @return true if choosing to record unidentifiable
111         *  atoms; false, otherwise.
112         * @see #setRecordUnidentifiableCompounds
113         * @see #getUnidentifiableModifiedResidues
114         * @see #getUnidentifiableAtomLinkages
115         */
116        public boolean getRecordUnidentifiableCompounds() {
117                return recordUnidentifiableModifiedCompounds;
118        }
119
120        /**
121         *
122         * @param recordAdditionalAttachments true if choosing to record additional attachments
123         *  that are not directly attached to a modified residue.
124         * @see #getRecordAdditionalAttachments
125         */
126        public void setRecordAdditionalAttachments(boolean recordAdditionalAttachments) {
127                this.recordAdditionalAttachments = recordAdditionalAttachments;
128        }
129
130        /**
131         *
132         * @return true if choosing to record additional attachments
133         *  that are not directly attached to a modified residue.
134         * @see #setRecordAdditionalAttachments
135         */
136        public boolean getRecordAdditionalAttachments() {
137                return recordAdditionalAttachments;
138        }
139
140        /**
141         *
142         * @return a set of identified {@link ModifiedCompound}s from
143         *  the last parse result.
144         * @see ModifiedCompound
145         */
146        public Set<ModifiedCompound> getIdentifiedModifiedCompound() {
147                if (identifiedModifiedCompounds==null) {
148                        throw new IllegalStateException("No result available. Please call parse() first.");
149                }
150
151                return identifiedModifiedCompounds;
152        }
153
154        /**
155         *
156         * @return a set of atom linkages, which represent the
157         *  atom bonds that were not covered by the identified
158         *  {@link ModifiedCompound}s from the last parse result.
159         *  Each element of the list is a array containing two atoms.
160         * @see StructureAtomLinkage
161         * @see #setRecordUnidentifiableCompounds
162         */
163        public Set<StructureAtomLinkage> getUnidentifiableAtomLinkages() {
164                if (!recordUnidentifiableModifiedCompounds) {
165                        throw new UnsupportedOperationException("Recording unidentified atom linkages" +
166                                        "is not supported. Please setRecordUnidentifiableCompounds(true) first.");
167                }
168
169                if (identifiedModifiedCompounds==null) {
170                        throw new IllegalStateException("No result available. Please call parse() first.");
171                }
172
173                return unidentifiableAtomLinkages;
174        }
175
176        /**
177         *
178         * @return a set of modified residues that were not covered by
179         *  the identified ModifiedCompounds from the last parse
180         *  result.
181         *  @see StructureGroup
182         *  @see #setRecordUnidentifiableCompounds
183         *  @see #getIdentifiedModifiedCompound
184         */
185        public Set<StructureGroup> getUnidentifiableModifiedResidues() {
186                if (!recordUnidentifiableModifiedCompounds) {
187                        throw new UnsupportedOperationException("Recording unidentified atom linkages" +
188                                        "is not supported. Please setRecordUnidentifiableCompounds(true) first.");
189                }
190
191                if (identifiedModifiedCompounds==null) {
192                        throw new IllegalStateException("No result available. Please call parse() first.");
193                }
194
195                return unidentifiableModifiedResidues;
196        }
197
198        /**
199         * Identify all registered modifications in a structure.
200         * @param structure
201         */
202        public void identify(final Structure structure) {
203                identify(structure, ProteinModificationRegistry.allModifications());
204        }
205
206        /**
207         * Identify a set of modifications in a structure.
208         * @param structure query {@link Structure}.
209         * @param potentialModifications query {@link ProteinModification}s.
210         */
211        public void identify(final Structure structure,
212                        final Set<ProteinModification> potentialModifications) {
213                if (structure==null) {
214                        throw new IllegalArgumentException("Null structure.");
215                }
216
217                identify(structure.getChains(), potentialModifications);
218        }
219
220        /**
221         * Identify all registered modifications in a chain.
222         * @param chain query {@link Chain}.
223         */
224        public void identify(final Chain chain) {
225                identify(Collections.singletonList(chain));
226        }
227
228        /**
229         * Identify all registered modifications in chains.
230         * @param chains query {@link Chain}s.
231         */
232        public void identify(final List<Chain> chains) {
233                identify(chains, ProteinModificationRegistry.allModifications());
234        }
235
236        /**
237         * Identify a set of modifications in a a chains.
238         * @param chain query {@link Chain}.
239         * @param potentialModifications query {@link ProteinModification}s.
240         */
241        public void identify(final Chain chain,
242                        final Set<ProteinModification> potentialModifications)  {
243                identify(Collections.singletonList(chain), potentialModifications);
244        }
245
246        /**
247         * Identify a set of modifications in a a list of chains.
248         * @param chains query {@link Chain}s.
249         * @param potentialModifications query {@link ProteinModification}s.
250         */
251        public void identify(final List<Chain> chains,
252                        final Set<ProteinModification> potentialModifications) {
253
254                if (chains==null) {
255                        throw new IllegalArgumentException("Null structure.");
256                }
257
258                if (potentialModifications==null) {
259                        throw new IllegalArgumentException("Null potentialModifications.");
260                }
261
262
263                reset();
264
265                if (potentialModifications.isEmpty()) {
266                        return;
267                }
268
269
270                residues = new ArrayList<Group>();
271                List<Group> ligands = new ArrayList<Group>();
272                Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>();
273
274                for (Chain chain : chains) {
275
276                        List<Group> ress = StructureUtil.getAminoAcids(chain);
277
278                        //List<Group> ligs = chain.getAtomLigands();
279                        List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups());
280                        residues.addAll(ress);
281                        residues.removeAll(ligs);
282                        ligands.addAll(ligs);
283                        addModificationGroups(potentialModifications, ress, ligs, mapCompGroups);
284                }
285
286                if (residues.isEmpty()) {
287                        String pdbId = "?";
288                        if ( chains.size() > 0) {
289                                Structure struc = chains.get(0).getStructure();
290                                if ( struc != null)
291                                        pdbId = struc.getPDBCode();
292                        }
293                        logger.warn("No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId);
294                }
295                List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>();
296
297                for (ProteinModification mod : potentialModifications) {
298                        ModificationCondition condition = mod.getCondition();
299                        List<Component> components = condition.getComponents();
300                        if (!mapCompGroups.keySet().containsAll(components)) {
301                                // not all components exist for this mod.
302                                continue;
303                        }
304
305                        int sizeComps = components.size();
306                        if (sizeComps==1) {
307
308                                processCrosslink1(mapCompGroups, modComps, mod, components);
309
310                        } else {
311
312                                processMultiCrosslink(mapCompGroups, modComps, mod, condition);
313                        }
314                }
315
316                if (recordAdditionalAttachments) {
317                        // identify additional groups that are not directly attached to amino acids.
318                        for (ModifiedCompound mc : modComps) {
319                                identifyAdditionalAttachments(mc, ligands, chains);
320                        }
321                }
322
323                mergeModComps(modComps);
324
325                identifiedModifiedCompounds.addAll(modComps);
326
327
328                // record unidentifiable linkage
329                if (recordUnidentifiableModifiedCompounds) {
330                        recordUnidentifiableAtomLinkages(modComps, ligands);
331                        recordUnidentifiableModifiedResidues(modComps);
332                }
333        }
334
335        private void reset() {
336                identifiedModifiedCompounds = new LinkedHashSet<ModifiedCompound>();
337                if (recordUnidentifiableModifiedCompounds) {
338                        unidentifiableAtomLinkages = new LinkedHashSet<StructureAtomLinkage>();
339                        unidentifiableModifiedResidues = new LinkedHashSet<StructureGroup>();
340                }
341
342        }
343
344        private void processMultiCrosslink(
345                        Map<Component, Set<Group>> mapCompGroups,
346                        List<ModifiedCompound> modComps, ProteinModification mod,
347                        ModificationCondition condition) {
348                // for multiple components
349
350                // find linkages first
351                List<List<Atom[]>> matchedAtomsOfLinkages =
352                                getMatchedAtomsOfLinkages(condition, mapCompGroups);
353
354                if (matchedAtomsOfLinkages.size() != condition.getLinkages().size()) {
355                        return;
356                }
357
358                assembleLinkages(matchedAtomsOfLinkages, mod, modComps);
359
360        }
361
362        private void processCrosslink1(Map<Component, Set<Group>> mapCompGroups,
363                        List<ModifiedCompound> modComps, ProteinModification mod,
364                        List<Component> components) {
365                // modified residue
366                // TODO: is this the correct logic for CROSS_LINK_1?
367                Set<Group> modifiedResidues = mapCompGroups.get(components.get(0));
368                if (modifiedResidues != null) {
369                        for (Group residue : modifiedResidues) {
370                                StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true);
371                                ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup);
372                                modComps.add(modRes);
373                        }
374                }
375        }
376
377        /**
378         * identify additional groups that are not directly attached to amino acids.
379         * @param mc {@link ModifiedCompound}
380         * @param ligands {@link Group}
381         * @param chains List of {@link Chain}s
382         * @return a list of added groups
383         */
384        private void identifyAdditionalAttachments(ModifiedCompound mc,
385                        List<Group> ligands, List<Chain> chains) {
386                if (ligands.isEmpty()) {
387                        return;
388                }
389
390                // TODO: should the additional groups only be allowed to the identified
391                // ligands or both amino acids and ligands? Currently only on ligands
392                // ligands to amino acid bonds for same modification of unknown category
393                // will be combined in mergeModComps()
394                // TODO: how about chain-chain links?
395                List<Group> identifiedGroups = new ArrayList<Group>();
396                for (StructureGroup num : mc.getGroups(false)) {
397                        Group group;
398                        try {
399                                //String numIns = "" + num.getResidueNumber();
400                                //if (num.getInsCode() != null) {
401                                //      numIns += num.getInsCode();
402                                //}
403                                ResidueNumber resNum = new ResidueNumber();
404                                resNum.setChainName(num.getChainId());
405                                resNum.setSeqNum(num.getResidueNumber());
406                                resNum.setInsCode(num.getInsCode());
407                                //group = chain.getGroupByPDB(numIns);
408
409                                group = getGroup(num,chains);
410                                //group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum);
411                        } catch (StructureException e) {
412                                logger.error("Exception: ", e);
413                                // should not happen
414                                continue;
415                        }
416                        identifiedGroups.add(group);
417                }
418
419                int start = 0;
420
421                int n = identifiedGroups.size();
422                while (n > start) {
423                        for (Group group1 : ligands) {
424                                for (int i=start; i<n; i++) {
425                                        Group group2 = identifiedGroups.get(i);
426                                        if (!identifiedGroups.contains(group1)) {
427                                                List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages(
428                                                                group1, group2, false, bondLengthTolerance);
429                                                if (!linkedAtoms.isEmpty()) {
430                                                        for (Atom[] atoms : linkedAtoms) {
431                                                                mc.addAtomLinkage(StructureUtil.getStructureAtomLinkage(atoms[0],
432                                                                                false, atoms[1], false));
433                                                        }
434                                                        identifiedGroups.add(group1);
435                                                        break;
436                                                }
437                                        }
438                                }
439                        }
440
441                        start = n;
442                        n = identifiedGroups.size();
443                }
444        }
445
446        private Group getGroup(StructureGroup num, List<Chain> chains) throws StructureException {
447                for (Chain c : chains){
448                        if ( c.getId().equals(num.getChainId())){
449
450                                ResidueNumber resNum = new ResidueNumber();
451
452                                resNum.setSeqNum(num.getResidueNumber());
453                                resNum.setInsCode(num.getInsCode());
454
455
456                                return c.getGroupByPDB(resNum);
457                        }
458                }
459
460                throw new StructureException("Could not find residue " + num);
461        }
462
463        /**
464         * Merge identified modified compounds if linked.
465         */
466        private void mergeModComps(List<ModifiedCompound> modComps) {
467                TreeSet<Integer> remove = new TreeSet<Integer>();
468                int n = modComps.size();
469                for (int icurr=1; icurr<n; icurr++) {
470                        ModifiedCompound curr = modComps.get(icurr);
471
472                        String id = curr.getModification().getId();
473                        if (ProteinModificationRegistry.getById(id).getCategory()
474                                        !=ModificationCategory.UNDEFINED)
475                                continue;
476
477                        // find linked compounds that before curr
478                        //List<Integer> merging = new ArrayList<Integer>();
479                        int ipre = 0;
480                        for (; ipre<icurr; ipre++) {
481                                if (remove.contains(ipre))      continue;
482                                ModifiedCompound pre = modComps.get(ipre);
483                                if (!Collections.disjoint(pre.getGroups(false),
484                                                curr.getGroups(false))) {
485                                        break;
486                                }
487                        }
488
489                        if (ipre<icurr) {
490                                ModifiedCompound mcKeep = modComps.get(ipre);
491
492                                // merge modifications of the same type
493                                if (mcKeep.getModification().getId().equals(id)) {
494                                        // merging the current one to the previous one
495                                        mcKeep.addAtomLinkages(curr.getAtomLinkages());
496                                        remove.add(icurr);
497                                }
498                        }
499                }
500
501                Iterator<Integer> it = remove.descendingIterator();
502                while (it.hasNext()) {
503                        modComps.remove(it.next().intValue());
504                }
505        }
506
507        /**
508         * Record unidentifiable atom linkages in a chain. Only linkages between two
509         * residues or one residue and one ligand will be recorded.
510         */
511        private void recordUnidentifiableAtomLinkages(List<ModifiedCompound> modComps,
512                        List<Group> ligands) {
513
514                // first put identified linkages in a map for fast query
515                Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>();
516                for (ModifiedCompound mc : modComps) {
517                        identifiedLinkages.addAll(mc.getAtomLinkages());
518                }
519
520                // record
521                // cross link
522                int nRes = residues.size();
523                for (int i=0; i<nRes-1; i++) {
524                        Group group1 = residues.get(i);
525                        for (int j=i+1; j<nRes; j++) {
526                                Group group2 = residues.get(j);
527                                List<Atom[]> linkages = StructureUtil.findAtomLinkages(
528                                                group1, group2, true, bondLengthTolerance);
529                                for (Atom[] atoms : linkages) {
530                                        StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0],
531                                                        true, atoms[1], true);
532                                        unidentifiableAtomLinkages.add(link);
533                                }
534                        }
535                }
536
537                // attachment
538                int nLig = ligands.size();
539                for (int i=0; i<nRes; i++) {
540                        Group group1 = residues.get(i);
541                        for (int j=0; j<nLig; j++) {
542                                Group group2 = ligands.get(j);
543                                if (group1.equals(group2)) { // overlap between residues and ligands
544                                        continue;
545                                }
546                                List<Atom[]> linkages = StructureUtil.findAtomLinkages(
547                                                group1, group2, false, bondLengthTolerance);
548                                for (Atom[] atoms : linkages) {
549                                        StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0],
550                                                        true, atoms[1], false);
551                                        unidentifiableAtomLinkages.add(link);
552                                }
553                        }
554                }
555        }
556
557        private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) {
558                Set<StructureGroup> identifiedComps = new HashSet<StructureGroup>();
559                for (ModifiedCompound mc : modComps) {
560                        identifiedComps.addAll(mc.getGroups(true));
561                }
562
563                // TODO: use the ModifiedAminoAcid after Andreas add that.
564                for (Group group : residues) {
565                        if (group.getType().equals(GroupType.HETATM)) {
566                                StructureGroup strucGroup = StructureUtil.getStructureGroup(
567                                                group, true);
568                                strucGroup.setChainId(group.getChainId());
569
570                                if (!identifiedComps.contains(strucGroup)) {
571                                        unidentifiableModifiedResidues.add(strucGroup);
572                                }
573                        }
574                }
575        }
576
577        /**
578         *
579         * @param modifications a set of {@link ProteinModification}s.
580         * @param residues
581         * @param ligands
582         * @param saveTo save result to
583         * @return map from component to list of corresponding residues
584         *  in the chain.
585         */
586        private void addModificationGroups(
587                        final Set<ProteinModification> modifications,
588                        final List<Group> residues,
589                        final List<Group> ligands,
590                        final Map<Component, Set<Group>> saveTo) {
591                if (residues==null || ligands==null || modifications==null) {
592                        throw new IllegalArgumentException("Null argument(s).");
593                }
594
595                Map<Component,Set<Component>> mapSingleMultiComps = new HashMap<Component,Set<Component>>();
596                for (ProteinModification mod : modifications) {
597                        ModificationCondition condition = mod.getCondition();
598                        for (Component comp : condition.getComponents()) {
599                                for (String pdbccId : comp.getPdbccIds()) {
600                                        Component single = Component.of(Collections.singleton(pdbccId),
601                                                        comp.isNTerminal(), comp.isCTerminal());
602                                        Set<Component> mult = mapSingleMultiComps.get(single);
603                                        if (mult == null) {
604                                                mult = new HashSet<Component>();
605                                                mapSingleMultiComps.put(single, mult);
606                                        }
607                                        mult.add(comp);
608                                }
609                        }
610                }
611
612                {
613                        // ligands
614                        Set<Component> ligandsWildCard = mapSingleMultiComps.get(
615                                        Component.of("*"));
616                        for (Group group : ligands) {
617                                String pdbccId = group.getPDBName().trim();
618                                Set<Component> comps = mapSingleMultiComps.get(
619                                                Component.of(pdbccId));
620
621                                for (Component comp : unionComponentSet(ligandsWildCard, comps)) {
622                                        Set<Group> gs = saveTo.get(comp);
623                                        if (gs==null) {
624                                                gs = new LinkedHashSet<Group>();
625                                                saveTo.put(comp, gs);
626                                        }
627                                        gs.add(group);
628                                }
629                        }
630                }
631
632                {
633                        // residues
634                        if (residues.isEmpty()) {
635                                return;
636                        }
637
638                        Set<Component> residuesWildCard = mapSingleMultiComps.get(
639                                        Component.of("*"));
640
641                        // for all residues
642                        for (Group group : residues) {
643                                String pdbccId = group.getPDBName().trim();
644                                Set<Component> comps = mapSingleMultiComps.get(
645                                                Component.of(pdbccId));
646
647                                for (Component comp : unionComponentSet(residuesWildCard, comps)) {
648                                        Set<Group> gs = saveTo.get(comp);
649                                        if (gs==null) {
650                                                gs = new LinkedHashSet<Group>();
651                                                saveTo.put(comp, gs);
652                                        }
653                                        gs.add(group);
654                                }
655                        }
656
657                        // for N-terminal
658                        int nRes = residues.size();
659                        int iRes = 0;
660                        Group res;
661                        do {
662                                // for all ligands on N terminal and the first residue
663                                res = residues.get(iRes++);
664
665                                Set<Component> nTermWildCard = mapSingleMultiComps.get(
666                                                Component.of("*", true, false));
667
668                                Set<Component> comps = mapSingleMultiComps.get(
669                                                Component.of(res.getPDBName(), true, false));
670
671                                for (Component comp : unionComponentSet(nTermWildCard, comps)) {
672                                        Set<Group> gs = saveTo.get(comp);
673                                        if (gs==null) {
674                                                gs = new LinkedHashSet<Group>();
675                                                saveTo.put(comp, gs);
676                                        }
677                                        gs.add(res);
678                                }
679                        } while (iRes<nRes && ligands.contains(res));
680
681                        // for C-terminal
682                        iRes = residues.size()-1;
683                        do {
684                                // for all ligands on C terminal and the last residue
685                                res = residues.get(iRes--);
686
687                                Set<Component> cTermWildCard = mapSingleMultiComps.get(
688                                                Component.of("*", false, true));
689
690                                Set<Component> comps = mapSingleMultiComps.get(
691                                                Component.of(res.getPDBName(), false, true));
692
693                                for (Component comp : unionComponentSet(cTermWildCard, comps)) {
694                                        Set<Group> gs = saveTo.get(comp);
695                                        if (gs==null) {
696                                                gs = new LinkedHashSet<Group>();
697                                                saveTo.put(comp, gs);
698                                        }
699                                        gs.add(res);
700                                }
701                        } while (iRes>=0 && ligands.contains(res));
702                }
703        }
704
705        private Set<Component> unionComponentSet(Set<Component> set1, Set<Component> set2) {
706                if (set1 == null && set2 == null)
707                        return Collections.emptySet();
708
709                if (set1 == null)
710                        return set2;
711
712                if (set2 == null)
713                        return set1;
714
715                Set<Component> set = new HashSet<Component>(set1.size()+set2.size());
716                set.addAll(set1);
717                set.addAll(set2);
718
719                return set;
720        }
721
722        /**
723         * Get matched atoms for all linkages.
724         */
725        private List<List<Atom[]>> getMatchedAtomsOfLinkages(
726                        ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) {
727                List<ModificationLinkage> linkages = condition.getLinkages();
728                int nLink = linkages.size();
729
730                List<List<Atom[]>> matchedAtomsOfLinkages =
731                                new ArrayList<List<Atom[]>>(nLink);
732
733                for (int iLink=0; iLink<nLink; iLink++) {
734                        ModificationLinkage linkage = linkages.get(iLink);
735                        Component comp1 = linkage.getComponent1();
736                        Component comp2 = linkage.getComponent2();
737
738//                      boolean isAA1 = comp1.;
739//                      boolean isAA2 = comp2.getType()==true;
740
741                        Set<Group> groups1 = mapCompGroups.get(comp1);
742                        Set<Group> groups2 = mapCompGroups.get(comp2);
743
744                        List<Atom[]> list = new ArrayList<Atom[]>();
745
746                        List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1();
747                        for (String name : potentialNamesOfAtomOnGroup1) {
748                                if (name.equals("*")) {
749                                        // wildcard
750                                        potentialNamesOfAtomOnGroup1 = null; // search all atoms
751                                        break;
752                                }
753                        }
754
755                        List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2();
756                        for (String name : potentialNamesOfAtomOnGroup2) {
757                                if (name.equals("*")) {
758                                        // wildcard
759                                        potentialNamesOfAtomOnGroup2 = null; // search all atoms
760                                        break;
761                                }
762                        }
763
764                        for (Group g1 : groups1) {
765                                for (Group g2 : groups2) {
766                                        if (g1.equals(g2)) {
767                                                continue;
768                                        }
769
770                                        // only for wildcard match of two residues
771                                        boolean ignoreNCLinkage =
772                                                potentialNamesOfAtomOnGroup1 == null &&
773                                                potentialNamesOfAtomOnGroup2 == null &&
774                                                residues.contains(g1) &&
775                                                residues.contains(g2);
776
777                                        Atom[] atoms = StructureUtil.findNearestAtomLinkage(
778                                                        g1, g2,
779                                                        potentialNamesOfAtomOnGroup1,
780                                                        potentialNamesOfAtomOnGroup2,
781                                                        ignoreNCLinkage,
782                                                        bondLengthTolerance);
783                                        if (atoms!=null) {
784                                                list.add(atoms);
785                                        }
786                                }
787                        }
788
789                        if (list.isEmpty()) {
790                                // broken linkage
791                                break;
792                        }
793
794                        matchedAtomsOfLinkages.add(list);
795                }
796
797                return matchedAtomsOfLinkages;
798        }
799
800        /** Assembly the matched linkages
801         *
802         * @param matchedAtomsOfLinkages
803         * @param mod
804         * @param ret ModifiedCompound will be stored here
805         */
806        private void assembleLinkages(List<List<Atom[]>> matchedAtomsOfLinkages,
807                        ProteinModification mod, List<ModifiedCompound> ret) {
808                ModificationCondition condition = mod.getCondition();
809                List<ModificationLinkage> modLinks = condition.getLinkages();
810
811                int nLink = matchedAtomsOfLinkages.size();
812                int[] indices = new int[nLink];
813                Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>();
814                while (indices[0]<matchedAtomsOfLinkages.get(0).size()) {
815                        List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink);
816                        for (int iLink=0; iLink<nLink; iLink++) {
817                                Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]);
818                                atomLinkages.add(atoms);
819                        }
820                        if (matchLinkages(modLinks, atomLinkages)) {
821                                // matched
822
823                                int n = atomLinkages.size();
824                                List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n);
825                                for (int i=0; i<n; i++) {
826                                        Atom[] linkage = atomLinkages.get(i);
827                                        StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(
828                                                        linkage[0], residues.contains(linkage[0].getGroup()),
829                                                        linkage[1], residues.contains(linkage[1].getGroup()));
830                                        linkages.add(link);
831                                }
832
833                                ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages);
834                                if (!identifiedCompounds.contains(mc)) {
835                                        ret.add(mc);
836                                        identifiedCompounds.add(mc);
837                                }
838                        }
839
840                        // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0])
841                        int i = nLink-1;
842                        while (i>=0) {
843                                if (i==0 || indices[i]<matchedAtomsOfLinkages.get(i).size()-1) {
844                                        indices[i]++;
845                                        break;
846                                } else {
847                                        indices[i] = 0;
848                                        i--;
849                                }
850                        }
851                }
852        }
853
854        /**
855         *
856         * @param linkages
857         * @param atomLinkages
858         * @return true if atomLinkages satisfy the condition; false, otherwise.
859         */
860        private boolean matchLinkages(List<ModificationLinkage> linkages,
861                        List<Atom[]> atomLinkages) {
862                int nLink = linkages.size();
863                if (nLink != atomLinkages.size()) {
864                        return false;
865                }
866                for (int i=0; i<nLink-1; i++) {
867                        ModificationLinkage link1 = linkages.get(i);
868                        Atom[] atoms1 = atomLinkages.get(i);
869                        for (int j=i+1; j<nLink; j++) {
870                                ModificationLinkage link2 = linkages.get(j);
871                                Atom[] atoms2 = atomLinkages.get(j);
872
873                                // check components
874                                if (((link1.getIndexOfComponent1()==link2.getIndexOfComponent1())
875                                                        != (atoms1[0].getGroup().equals(atoms2[0].getGroup())))
876                                        || ((link1.getIndexOfComponent1()==link2.getIndexOfComponent2())
877                                                        != (atoms1[0].getGroup().equals(atoms2[1].getGroup())))
878                                        || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent1())
879                                                        != (atoms1[1].getGroup().equals(atoms2[0].getGroup())))
880                                        || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent2())
881                                                        != (atoms1[1].getGroup().equals(atoms2[1].getGroup())))) {
882                                        return false;
883                                }
884
885                                // check atoms
886                                String label11 = link1.getLabelOfAtomOnComponent1();
887                                String label12 = link1.getLabelOfAtomOnComponent2();
888                                String label21 = link2.getLabelOfAtomOnComponent1();
889                                String label22 = link2.getLabelOfAtomOnComponent2();
890                                if ((label11!=null && label21!=null && label11.equals(label21))
891                                                        != (atoms1[0].equals(atoms2[0]))
892                                         || (label11!=null && label22!=null && label11.equals(label22))
893                                                        != (atoms1[0].equals(atoms2[1]))
894                                         || (label12!=null && label21!=null && label12.equals(label21))
895                                                        != (atoms1[1].equals(atoms2[0]))
896                                         || (label12!=null && label22!=null && label12.equals(label22))
897                                                        != (atoms1[1].equals(atoms2[1]))) {
898                                        return false;
899                                }
900                        }
901                }
902
903                return true;
904        }
905}