001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jun 12, 2010
021 * Author: Jianjiong Gao
022 *
023 */
024
025package org.biojava.nbio.protmod.structure;
026
027import org.biojava.nbio.protmod.*;
028import org.biojava.nbio.structure.*;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import java.util.*;
033
034/**
035 * Identify attachment modification in a 3-D structure.
036 *
037 * @author Jianjiong Gao
038 * @since 3.0
039 */
040public class ProteinModificationIdentifier {
041
042        private static final Logger logger = LoggerFactory.getLogger(ProteinModificationIdentifier.class);
043
044        private double bondLengthTolerance ;
045        private boolean recordUnidentifiableModifiedCompounds ;
046        private boolean recordAdditionalAttachments ;
047
048        private Set<ModifiedCompound> identifiedModifiedCompounds = null;
049        private Set<StructureAtomLinkage> unidentifiableAtomLinkages = null;
050        private Set<StructureGroup> unidentifiableModifiedResidues = null;
051
052        /**
053         * Temporary save the amino acids for each call of identify().
054         */
055        private List<Group> residues;
056
057
058        public ProteinModificationIdentifier(){
059
060                bondLengthTolerance =  0.4;
061                recordUnidentifiableModifiedCompounds = false;
062                recordAdditionalAttachments = true;
063
064                reset();
065        }
066
067
068        public void destroy(){
069                if ( identifiedModifiedCompounds != null)
070                        identifiedModifiedCompounds.clear();
071                if ( unidentifiableAtomLinkages != null)
072                        unidentifiableAtomLinkages.clear();
073                if ( unidentifiableModifiedResidues != null)
074                        unidentifiableModifiedResidues.clear();
075
076                unidentifiableAtomLinkages = null;
077                unidentifiableAtomLinkages = null;
078                unidentifiableModifiedResidues = null;
079
080
081        }
082
083        /**
084         *
085         * @param bondLengthTolerance tolerance of error (in Angstroms) of the
086         *  covalent bond length, when calculating the atom distance threshold.
087         */
088        public void setbondLengthTolerance(final double bondLengthTolerance) {
089                if (bondLengthTolerance<0) {
090                        throw new IllegalArgumentException("bondLengthTolerance " +
091                                        "must be positive.");
092                }
093                this.bondLengthTolerance = bondLengthTolerance;
094        }
095
096        /**
097         *
098         * @param recordUnidentifiableModifiedCompounds true if choosing to record unidentifiable
099         *  atoms; false, otherwise.
100         * @see #getRecordUnidentifiableCompounds
101         * @see #getUnidentifiableModifiedResidues
102         * @see #getUnidentifiableAtomLinkages
103         */
104        public void setRecordUnidentifiableCompounds(boolean recordUnidentifiableModifiedCompounds) {
105                this.recordUnidentifiableModifiedCompounds = recordUnidentifiableModifiedCompounds;
106        }
107
108        /**
109         *
110         * @return true if choosing to record unidentifiable
111         *  atoms; false, otherwise.
112         * @see #setRecordUnidentifiableCompounds
113         * @see #getUnidentifiableModifiedResidues
114         * @see #getUnidentifiableAtomLinkages
115         */
116        public boolean getRecordUnidentifiableCompounds() {
117                return recordUnidentifiableModifiedCompounds;
118        }
119
120        /**
121         *
122         * @param recordAdditionalAttachments true if choosing to record additional attachments
123         *  that are not directly attached to a modified residue.
124         * @see #getRecordAdditionalAttachments
125         */
126        public void setRecordAdditionalAttachments(boolean recordAdditionalAttachments) {
127                this.recordAdditionalAttachments = recordAdditionalAttachments;
128        }
129
130        /**
131         *
132         * @return true if choosing to record additional attachments
133         *  that are not directly attached to a modified residue.
134         * @see #setRecordAdditionalAttachments
135         */
136        public boolean getRecordAdditionalAttachments() {
137                return recordAdditionalAttachments;
138        }
139
140        /**
141         *
142         * @return a set of identified {@link ModifiedCompound}s from
143         *  the last parse result.
144         * @see ModifiedCompound
145         */
146        public Set<ModifiedCompound> getIdentifiedModifiedCompound() {
147                if (identifiedModifiedCompounds==null) {
148                        throw new IllegalStateException("No result available. Please call parse() first.");
149                }
150
151                return identifiedModifiedCompounds;
152        }
153
154        /**
155         *
156         * @return a set of atom linkages, which represent the
157         *  atom bonds that were not covered by the identified
158         *  {@link ModifiedCompound}s from the last parse result.
159         *  Each element of the list is a array containing two atoms.
160         * @see StructureAtomLinkage
161         * @see #setRecordUnidentifiableCompounds
162         */
163        public Set<StructureAtomLinkage> getUnidentifiableAtomLinkages() {
164                if (!recordUnidentifiableModifiedCompounds) {
165                        throw new UnsupportedOperationException("Recording unidentified atom linkages" +
166                                        "is not supported. Please setRecordUnidentifiableCompounds(true) first.");
167                }
168
169                if (identifiedModifiedCompounds==null) {
170                        throw new IllegalStateException("No result available. Please call parse() first.");
171                }
172
173                return unidentifiableAtomLinkages;
174        }
175
176        /**
177         *
178         * @return a set of modified residues that were not covered by
179         *  the identified ModifiedCompounds from the last parse
180         *  result.
181         *  @see StructureGroup
182         *  @see #setRecordUnidentifiableCompounds
183         *  @see #getIdentifiedModifiedCompound
184         */
185        public Set<StructureGroup> getUnidentifiableModifiedResidues() {
186                if (!recordUnidentifiableModifiedCompounds) {
187                        throw new UnsupportedOperationException("Recording unidentified atom linkages" +
188                                        "is not supported. Please setRecordUnidentifiableCompounds(true) first.");
189                }
190
191                if (identifiedModifiedCompounds==null) {
192                        throw new IllegalStateException("No result available. Please call parse() first.");
193                }
194
195                return unidentifiableModifiedResidues;
196        }
197
198        /**
199         * Identify all registered modifications in a structure.
200         * @param structure
201         */
202        public void identify(final Structure structure) {
203                identify(structure, ProteinModificationRegistry.allModifications());
204        }
205
206        /**
207         * Identify a set of modifications in a structure.
208         * @param structure query {@link Structure}.
209         * @param potentialModifications query {@link ProteinModification}s.
210         */
211        public void identify(final Structure structure,
212                        final Set<ProteinModification> potentialModifications) {
213                if (structure==null) {
214                        throw new IllegalArgumentException("Null structure.");
215                }
216
217                identify(structure.getChains(), potentialModifications);
218        }
219
220        /**
221         * Identify all registered modifications in a chain.
222         * @param chain query {@link Chain}.
223         */
224        public void identify(final Chain chain) {
225                identify(Collections.singletonList(chain));
226        }
227
228        /**
229         * Identify all registered modifications in chains.
230         * @param chains query {@link Chain}s.
231         */
232        public void identify(final List<Chain> chains) {
233                identify(chains, ProteinModificationRegistry.allModifications());
234        }
235
236        /**
237         * Identify a set of modifications in a a chains.
238         * @param chain query {@link Chain}.
239         * @param potentialModifications query {@link ProteinModification}s.
240         */
241        public void identify(final Chain chain,
242                        final Set<ProteinModification> potentialModifications)  {
243                identify(Collections.singletonList(chain), potentialModifications);
244        }
245
246        /**
247         * Identify a set of modifications in a a list of chains.
248         * @param chains query {@link Chain}s.
249         * @param potentialModifications query {@link ProteinModification}s.
250         */
251        public void identify(final List<Chain> chains,
252                        final Set<ProteinModification> potentialModifications) {
253
254                if (chains==null) {
255                        throw new IllegalArgumentException("Null structure.");
256                }
257
258                if (potentialModifications==null) {
259                        throw new IllegalArgumentException("Null potentialModifications.");
260                }
261
262
263                reset();
264
265                if (potentialModifications.isEmpty()) {
266                        return;
267                }
268
269                Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size());
270                residues = new ArrayList<Group>();
271                List<Group> ligands = new ArrayList<Group>();
272                Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>();
273
274                for (Chain chain : chains) {
275                        mapChainIdChain.put(chain.getChainID(), chain);
276
277                        List<Group> ress = StructureUtil.getAminoAcids(chain);
278
279                        //List<Group> ligs = chain.getAtomLigands();
280                        List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups());
281                        residues.addAll(ress);
282                        residues.removeAll(ligs);
283                        ligands.addAll(ligs);
284                        addModificationGroups(potentialModifications, ress, ligs, mapCompGroups);
285                }
286
287                if (residues.isEmpty()) {
288                        String pdbId = "?";
289                        if ( chains.size() > 0) {
290                                Structure struc = chains.get(0).getParent();
291                                if ( struc != null)
292                                        pdbId = struc.getPDBCode();
293                        }
294                        logger.warn("No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId);
295                }
296                List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>();
297
298                for (ProteinModification mod : potentialModifications) {
299                        ModificationCondition condition = mod.getCondition();
300                        List<Component> components = condition.getComponents();
301                        if (!mapCompGroups.keySet().containsAll(components)) {
302                                // not all components exist for this mod.
303                                continue;
304                        }
305
306                        int sizeComps = components.size();
307                        if (sizeComps==1) {
308
309                                processCrosslink1(mapCompGroups, modComps, mod, components);
310
311                        } else {
312
313                                processMultiCrosslink(mapCompGroups, modComps, mod, condition);
314                        }
315                }
316
317                if (recordAdditionalAttachments) {
318                        // identify additional groups that are not directly attached to amino acids.
319                        for (ModifiedCompound mc : modComps) {
320                                identifyAdditionalAttachments(mc, ligands, mapChainIdChain);
321                        }
322                }
323
324                mergeModComps(modComps);
325
326                identifiedModifiedCompounds.addAll(modComps);
327
328
329                // record unidentifiable linkage
330                if (recordUnidentifiableModifiedCompounds) {
331                        recordUnidentifiableAtomLinkages(modComps, ligands);
332                        recordUnidentifiableModifiedResidues(modComps);
333                }
334        }
335
336        private void reset() {
337                identifiedModifiedCompounds = new LinkedHashSet<ModifiedCompound>();
338                if (recordUnidentifiableModifiedCompounds) {
339                        unidentifiableAtomLinkages = new LinkedHashSet<StructureAtomLinkage>();
340                        unidentifiableModifiedResidues = new LinkedHashSet<StructureGroup>();
341                }
342
343        }
344
345        private void processMultiCrosslink(
346                        Map<Component, Set<Group>> mapCompGroups,
347                        List<ModifiedCompound> modComps, ProteinModification mod,
348                        ModificationCondition condition) {
349                // for multiple components
350
351                // find linkages first
352                List<List<Atom[]>> matchedAtomsOfLinkages =
353                                getMatchedAtomsOfLinkages(condition, mapCompGroups);
354
355                if (matchedAtomsOfLinkages.size() != condition.getLinkages().size()) {
356                        return;
357                }
358
359                assembleLinkages(matchedAtomsOfLinkages, mod, modComps);
360
361        }
362
363        private void processCrosslink1(Map<Component, Set<Group>> mapCompGroups,
364                        List<ModifiedCompound> modComps, ProteinModification mod,
365                        List<Component> components) {
366                // modified residue
367                // TODO: is this the correct logic for CROSS_LINK_1?
368                Set<Group> modifiedResidues = mapCompGroups.get(components.get(0));
369                if (modifiedResidues != null) {
370                        for (Group residue : modifiedResidues) {
371                                StructureGroup strucGroup = StructureUtil.getStructureGroup(residue, true);
372                                ModifiedCompound modRes = new ModifiedCompoundImpl(mod, strucGroup);
373                                modComps.add(modRes);
374                        }
375                }
376        }
377
378        /**
379         * identify additional groups that are not directly attached to amino acids.
380         * @param mc {@link ModifiedCompound}
381         * @param ligands {@link Group}
382         * @param chains List of {@link Chain}s
383         * @return a list of added groups
384         */
385        private void identifyAdditionalAttachments(ModifiedCompound mc,
386                        List<Group> ligands, Map<String, Chain> mapChainIdChain) {
387                if (ligands.isEmpty()) {
388                        return;
389                }
390
391                // TODO: should the additional groups only be allowed to the identified
392                // ligands or both amino acids and ligands? Currently only on ligands
393                // ligands to amino acid bonds for same modification of unknown category
394                // will be combined in mergeModComps()
395                // TODO: how about chain-chain links?
396                List<Group> identifiedGroups = new ArrayList<Group>();
397                for (StructureGroup num : mc.getGroups(false)) {
398                        Group group;
399                        try {
400                                //String numIns = "" + num.getResidueNumber();
401                                //if (num.getInsCode() != null) {
402                                //      numIns += num.getInsCode();
403                                //}
404                                ResidueNumber resNum = new ResidueNumber();
405                                resNum.setChainId(num.getChainId());
406                                resNum.setSeqNum(num.getResidueNumber());
407                                resNum.setInsCode(num.getInsCode());
408                                //group = chain.getGroupByPDB(numIns);
409
410                                group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum);
411                                //group = mapChainIdChain.get(num.getChainId()).getGroupByPDB(resNum);
412                        } catch (StructureException e) {
413                                logger.error("Exception: ", e);
414                                // should not happen
415                                continue;
416                        }
417                        identifiedGroups.add(group);
418                }
419
420                int start = 0;
421
422                int n = identifiedGroups.size();
423                while (n > start) {
424                        for (Group group1 : ligands) {
425                                for (int i=start; i<n; i++) {
426                                        Group group2 = identifiedGroups.get(i);
427                                        if (!identifiedGroups.contains(group1)) {
428                                                List<Atom[]> linkedAtoms = StructureUtil.findAtomLinkages(
429                                                                group1, group2, false, bondLengthTolerance);
430                                                if (!linkedAtoms.isEmpty()) {
431                                                        for (Atom[] atoms : linkedAtoms) {
432                                                                mc.addAtomLinkage(StructureUtil.getStructureAtomLinkage(atoms[0],
433                                                                                false, atoms[1], false));
434                                                        }
435                                                        identifiedGroups.add(group1);
436                                                        break;
437                                                }
438                                        }
439                                }
440                        }
441
442                        start = n;
443                        n = identifiedGroups.size();
444                }
445        }
446
447        private Group getGroup(StructureGroup num, List<Chain> chains) throws StructureException {
448                for (Chain c : chains){
449                        if ( c.getId().equals(num.getChainId())){
450
451                                ResidueNumber resNum = new ResidueNumber();
452
453                                resNum.setSeqNum(num.getResidueNumber());
454                                resNum.setInsCode(num.getInsCode());
455
456
457                                return c.getGroupByPDB(resNum);
458                        }
459                }
460
461                throw new StructureException("Could not find residue " + num);
462        }
463
464        /**
465         * Merge identified modified compounds if linked.
466         */
467        private void mergeModComps(List<ModifiedCompound> modComps) {
468                TreeSet<Integer> remove = new TreeSet<Integer>();
469                int n = modComps.size();
470                for (int icurr=1; icurr<n; icurr++) {
471                        ModifiedCompound curr = modComps.get(icurr);
472
473                        String id = curr.getModification().getId();
474                        if (ProteinModificationRegistry.getById(id).getCategory()
475                                        !=ModificationCategory.UNDEFINED)
476                                continue;
477
478                        // find linked compounds that before curr
479                        //List<Integer> merging = new ArrayList<Integer>();
480                        int ipre = 0;
481                        for (; ipre<icurr; ipre++) {
482                                if (remove.contains(ipre))      continue;
483                                ModifiedCompound pre = modComps.get(ipre);
484                                if (!Collections.disjoint(pre.getGroups(false),
485                                                curr.getGroups(false))) {
486                                        break;
487                                }
488                        }
489
490                        if (ipre<icurr) {
491                                ModifiedCompound mcKeep = modComps.get(ipre);
492
493                                // merge modifications of the same type
494                                if (mcKeep.getModification().getId().equals(id)) {
495                                        // merging the current one to the previous one
496                                        mcKeep.addAtomLinkages(curr.getAtomLinkages());
497                                        remove.add(icurr);
498                                }
499                        }
500                }
501
502                Iterator<Integer> it = remove.descendingIterator();
503                while (it.hasNext()) {
504                        modComps.remove(it.next().intValue());
505                }
506        }
507
508        /**
509         * Record unidentifiable atom linkages in a chain. Only linkages between two
510         * residues or one residue and one ligand will be recorded.
511         */
512        private void recordUnidentifiableAtomLinkages(List<ModifiedCompound> modComps,
513                        List<Group> ligands) {
514
515                // first put identified linkages in a map for fast query
516                Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>();
517                for (ModifiedCompound mc : modComps) {
518                        identifiedLinkages.addAll(mc.getAtomLinkages());
519                }
520
521                // record
522                // cross link
523                int nRes = residues.size();
524                for (int i=0; i<nRes-1; i++) {
525                        Group group1 = residues.get(i);
526                        for (int j=i+1; j<nRes; j++) {
527                                Group group2 = residues.get(j);
528                                List<Atom[]> linkages = StructureUtil.findAtomLinkages(
529                                                group1, group2, true, bondLengthTolerance);
530                                for (Atom[] atoms : linkages) {
531                                        StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0],
532                                                        true, atoms[1], true);
533                                        unidentifiableAtomLinkages.add(link);
534                                }
535                        }
536                }
537
538                // attachment
539                int nLig = ligands.size();
540                for (int i=0; i<nRes; i++) {
541                        Group group1 = residues.get(i);
542                        for (int j=0; j<nLig; j++) {
543                                Group group2 = ligands.get(j);
544                                if (group1.equals(group2)) { // overlap between residues and ligands
545                                        continue;
546                                }
547                                List<Atom[]> linkages = StructureUtil.findAtomLinkages(
548                                                group1, group2, false, bondLengthTolerance);
549                                for (Atom[] atoms : linkages) {
550                                        StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0],
551                                                        true, atoms[1], false);
552                                        unidentifiableAtomLinkages.add(link);
553                                }
554                        }
555                }
556        }
557
558        private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) {
559                Set<StructureGroup> identifiedComps = new HashSet<StructureGroup>();
560                for (ModifiedCompound mc : modComps) {
561                        identifiedComps.addAll(mc.getGroups(true));
562                }
563
564                // TODO: use the ModifiedAminoAcid after Andreas add that.
565                for (Group group : residues) {
566                        if (group.getType().equals(GroupType.HETATM)) {
567                                StructureGroup strucGroup = StructureUtil.getStructureGroup(
568                                                group, true);
569                                //strucGroup.setChainId(group.getChainId());
570
571                                if (!identifiedComps.contains(strucGroup)) {
572                                        unidentifiableModifiedResidues.add(strucGroup);
573                                }
574                        }
575                }
576        }
577
578        /**
579         *
580         * @param modifications a set of {@link ProteinModification}s.
581         * @param residues
582         * @param ligands
583         * @param saveTo save result to
584         * @return map from component to list of corresponding residues
585         *  in the chain.
586         */
587        private void addModificationGroups(
588                        final Set<ProteinModification> modifications,
589                        final List<Group> residues,
590                        final List<Group> ligands,
591                        final Map<Component, Set<Group>> saveTo) {
592                if (residues==null || ligands==null || modifications==null) {
593                        throw new IllegalArgumentException("Null argument(s).");
594                }
595
596                Map<Component,Set<Component>> mapSingleMultiComps = new HashMap<Component,Set<Component>>();
597                for (ProteinModification mod : modifications) {
598                        ModificationCondition condition = mod.getCondition();
599                        for (Component comp : condition.getComponents()) {
600                                for (String pdbccId : comp.getPdbccIds()) {
601                                        Component single = Component.of(Collections.singleton(pdbccId),
602                                                        comp.isNTerminal(), comp.isCTerminal());
603                                        Set<Component> mult = mapSingleMultiComps.get(single);
604                                        if (mult == null) {
605                                                mult = new HashSet<Component>();
606                                                mapSingleMultiComps.put(single, mult);
607                                        }
608                                        mult.add(comp);
609                                }
610                        }
611                }
612
613                {
614                        // ligands
615                        Set<Component> ligandsWildCard = mapSingleMultiComps.get(
616                                        Component.of("*"));
617                        for (Group group : ligands) {
618                                String pdbccId = group.getPDBName().trim();
619                                Set<Component> comps = mapSingleMultiComps.get(
620                                                Component.of(pdbccId));
621
622                                for (Component comp : unionComponentSet(ligandsWildCard, comps)) {
623                                        Set<Group> gs = saveTo.get(comp);
624                                        if (gs==null) {
625                                                gs = new LinkedHashSet<Group>();
626                                                saveTo.put(comp, gs);
627                                        }
628                                        gs.add(group);
629                                }
630                        }
631                }
632
633                {
634                        // residues
635                        if (residues.isEmpty()) {
636                                return;
637                        }
638
639                        Set<Component> residuesWildCard = mapSingleMultiComps.get(
640                                        Component.of("*"));
641
642                        // for all residues
643                        for (Group group : residues) {
644                                String pdbccId = group.getPDBName().trim();
645                                Set<Component> comps = mapSingleMultiComps.get(
646                                                Component.of(pdbccId));
647
648                                for (Component comp : unionComponentSet(residuesWildCard, comps)) {
649                                        Set<Group> gs = saveTo.get(comp);
650                                        if (gs==null) {
651                                                gs = new LinkedHashSet<Group>();
652                                                saveTo.put(comp, gs);
653                                        }
654                                        gs.add(group);
655                                }
656                        }
657
658                        // for N-terminal
659                        int nRes = residues.size();
660                        int iRes = 0;
661                        Group res;
662                        do {
663                                // for all ligands on N terminal and the first residue
664                                res = residues.get(iRes++);
665
666                                Set<Component> nTermWildCard = mapSingleMultiComps.get(
667                                                Component.of("*", true, false));
668
669                                Set<Component> comps = mapSingleMultiComps.get(
670                                                Component.of(res.getPDBName(), true, false));
671
672                                for (Component comp : unionComponentSet(nTermWildCard, comps)) {
673                                        Set<Group> gs = saveTo.get(comp);
674                                        if (gs==null) {
675                                                gs = new LinkedHashSet<Group>();
676                                                saveTo.put(comp, gs);
677                                        }
678                                        gs.add(res);
679                                }
680                        } while (iRes<nRes && ligands.contains(res));
681
682                        // for C-terminal
683                        iRes = residues.size()-1;
684                        do {
685                                // for all ligands on C terminal and the last residue
686                                res = residues.get(iRes--);
687
688                                Set<Component> cTermWildCard = mapSingleMultiComps.get(
689                                                Component.of("*", false, true));
690
691                                Set<Component> comps = mapSingleMultiComps.get(
692                                                Component.of(res.getPDBName(), false, true));
693
694                                for (Component comp : unionComponentSet(cTermWildCard, comps)) {
695                                        Set<Group> gs = saveTo.get(comp);
696                                        if (gs==null) {
697                                                gs = new LinkedHashSet<Group>();
698                                                saveTo.put(comp, gs);
699                                        }
700                                        gs.add(res);
701                                }
702                        } while (iRes>=0 && ligands.contains(res));
703                }
704        }
705
706        private Set<Component> unionComponentSet(Set<Component> set1, Set<Component> set2) {
707                if (set1 == null && set2 == null)
708                        return Collections.emptySet();
709
710                if (set1 == null)
711                        return set2;
712
713                if (set2 == null)
714                        return set1;
715
716                Set<Component> set = new HashSet<Component>(set1.size()+set2.size());
717                set.addAll(set1);
718                set.addAll(set2);
719
720                return set;
721        }
722
723        /**
724         * Get matched atoms for all linkages.
725         */
726        private List<List<Atom[]>> getMatchedAtomsOfLinkages(
727                        ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) {
728                List<ModificationLinkage> linkages = condition.getLinkages();
729                int nLink = linkages.size();
730
731                List<List<Atom[]>> matchedAtomsOfLinkages =
732                                new ArrayList<List<Atom[]>>(nLink);
733
734                for (int iLink=0; iLink<nLink; iLink++) {
735                        ModificationLinkage linkage = linkages.get(iLink);
736                        Component comp1 = linkage.getComponent1();
737                        Component comp2 = linkage.getComponent2();
738
739//                      boolean isAA1 = comp1.;
740//                      boolean isAA2 = comp2.getType()==true;
741
742                        Set<Group> groups1 = mapCompGroups.get(comp1);
743                        Set<Group> groups2 = mapCompGroups.get(comp2);
744
745                        List<Atom[]> list = new ArrayList<Atom[]>();
746
747                        List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1();
748                        for (String name : potentialNamesOfAtomOnGroup1) {
749                                if (name.equals("*")) {
750                                        // wildcard
751                                        potentialNamesOfAtomOnGroup1 = null; // search all atoms
752                                        break;
753                                }
754                        }
755
756                        List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2();
757                        for (String name : potentialNamesOfAtomOnGroup2) {
758                                if (name.equals("*")) {
759                                        // wildcard
760                                        potentialNamesOfAtomOnGroup2 = null; // search all atoms
761                                        break;
762                                }
763                        }
764
765                        for (Group g1 : groups1) {
766                                for (Group g2 : groups2) {
767                                        if (g1.equals(g2)) {
768                                                continue;
769                                        }
770
771                                        // only for wildcard match of two residues
772                                        boolean ignoreNCLinkage =
773                                                potentialNamesOfAtomOnGroup1 == null &&
774                                                potentialNamesOfAtomOnGroup2 == null &&
775                                                residues.contains(g1) &&
776                                                residues.contains(g2);
777
778                                        Atom[] atoms = StructureUtil.findNearestAtomLinkage(
779                                                        g1, g2,
780                                                        potentialNamesOfAtomOnGroup1,
781                                                        potentialNamesOfAtomOnGroup2,
782                                                        ignoreNCLinkage,
783                                                        bondLengthTolerance);
784                                        if (atoms!=null) {
785                                                list.add(atoms);
786                                        }
787                                }
788                        }
789
790                        if (list.isEmpty()) {
791                                // broken linkage
792                                break;
793                        }
794
795                        matchedAtomsOfLinkages.add(list);
796                }
797
798                return matchedAtomsOfLinkages;
799        }
800
801        /** Assembly the matched linkages
802         *
803         * @param matchedAtomsOfLinkages
804         * @param mod
805         * @param ret ModifiedCompound will be stored here
806     */
807        private void assembleLinkages(List<List<Atom[]>> matchedAtomsOfLinkages,
808                        ProteinModification mod, List<ModifiedCompound> ret) {
809                ModificationCondition condition = mod.getCondition();
810                List<ModificationLinkage> modLinks = condition.getLinkages();
811
812                int nLink = matchedAtomsOfLinkages.size();
813                int[] indices = new int[nLink];
814                Set<ModifiedCompound> identifiedCompounds = new HashSet<ModifiedCompound>();
815                while (indices[0]<matchedAtomsOfLinkages.get(0).size()) {
816                        List<Atom[]> atomLinkages = new ArrayList<Atom[]>(nLink);
817                        for (int iLink=0; iLink<nLink; iLink++) {
818                                Atom[] atoms = matchedAtomsOfLinkages.get(iLink).get(indices[iLink]);
819                                atomLinkages.add(atoms);
820                        }
821                        if (matchLinkages(modLinks, atomLinkages)) {
822                                // matched
823
824                                int n = atomLinkages.size();
825                                List<StructureAtomLinkage> linkages = new ArrayList<StructureAtomLinkage>(n);
826                                for (int i=0; i<n; i++) {
827                                        Atom[] linkage = atomLinkages.get(i);
828                                        StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(
829                                                        linkage[0], residues.contains(linkage[0].getGroup()),
830                                                        linkage[1], residues.contains(linkage[1].getGroup()));
831                                        linkages.add(link);
832                                }
833
834                                ModifiedCompound mc = new ModifiedCompoundImpl(mod, linkages);
835                                if (!identifiedCompounds.contains(mc)) {
836                                        ret.add(mc);
837                                        identifiedCompounds.add(mc);
838                                }
839                        }
840
841                        // indices++ (e.g. [0,0,1]=>[0,0,2]=>[1,2,0])
842                        int i = nLink-1;
843                        while (i>=0) {
844                                if (i==0 || indices[i]<matchedAtomsOfLinkages.get(i).size()-1) {
845                                        indices[i]++;
846                                        break;
847                                } else {
848                                        indices[i] = 0;
849                                        i--;
850                                }
851                        }
852                }
853        }
854
855        /**
856         *
857         * @param linkages
858         * @param atomLinkages
859         * @return true if atomLinkages satisfy the condition; false, otherwise.
860         */
861        private boolean matchLinkages(List<ModificationLinkage> linkages,
862                        List<Atom[]> atomLinkages) {
863                int nLink = linkages.size();
864                if (nLink != atomLinkages.size()) {
865                        return false;
866                }
867                for (int i=0; i<nLink-1; i++) {
868                        ModificationLinkage link1 = linkages.get(i);
869                        Atom[] atoms1 = atomLinkages.get(i);
870                        for (int j=i+1; j<nLink; j++) {
871                                ModificationLinkage link2 = linkages.get(j);
872                                Atom[] atoms2 = atomLinkages.get(j);
873
874                                // check components
875                                if (((link1.getIndexOfComponent1()==link2.getIndexOfComponent1())
876                                                        != (atoms1[0].getGroup().equals(atoms2[0].getGroup())))
877                                        || ((link1.getIndexOfComponent1()==link2.getIndexOfComponent2())
878                                                        != (atoms1[0].getGroup().equals(atoms2[1].getGroup())))
879                                        || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent1())
880                                                        != (atoms1[1].getGroup().equals(atoms2[0].getGroup())))
881                                        || ((link1.getIndexOfComponent2()==link2.getIndexOfComponent2())
882                                                        != (atoms1[1].getGroup().equals(atoms2[1].getGroup())))) {
883                                        return false;
884                                }
885
886                                // check atoms
887                                String label11 = link1.getLabelOfAtomOnComponent1();
888                                String label12 = link1.getLabelOfAtomOnComponent2();
889                                String label21 = link2.getLabelOfAtomOnComponent1();
890                                String label22 = link2.getLabelOfAtomOnComponent2();
891                                if ((label11!=null && label21!=null && label11.equals(label21))
892                                                        != (atoms1[0].equals(atoms2[0]))
893                                         || (label11!=null && label22!=null && label11.equals(label22))
894                                                        != (atoms1[0].equals(atoms2[1]))
895                                         || (label12!=null && label21!=null && label12.equals(label21))
896                                                        != (atoms1[1].equals(atoms2[0]))
897                                         || (label12!=null && label22!=null && label12.equals(label22))
898                                                        != (atoms1[1].equals(atoms2[1]))) {
899                                        return false;
900                                }
901                        }
902                }
903
904                return true;
905        }
906}