Source code

001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.cluster;
022
023import org.biojava.nbio.alignment.Alignments;
024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
025import org.biojava.nbio.alignment.SimpleGapPenalty;
026import org.biojava.nbio.alignment.template.GapPenalty;
027import org.biojava.nbio.alignment.template.PairwiseSequenceAligner;
028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
029import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
030import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
031import org.biojava.nbio.core.sequence.ProteinSequence;
032import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.Chain;
035import org.biojava.nbio.structure.EntityInfo;
036import org.biojava.nbio.structure.Group;
037import org.biojava.nbio.structure.Structure;
038import org.biojava.nbio.structure.StructureException;
039import org.biojava.nbio.structure.align.StructureAlignment;
040import org.biojava.nbio.structure.align.StructureAlignmentFactory;
041import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams;
042import org.biojava.nbio.structure.align.model.AFPChain;
043import org.biojava.nbio.structure.align.multiple.Block;
044import org.biojava.nbio.structure.align.multiple.BlockImpl;
045import org.biojava.nbio.structure.align.multiple.BlockSet;
046import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
047import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
048import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
049import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
050import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
051import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer;
052import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
053import org.biojava.nbio.structure.symmetry.core.QuatSymmetrySubunits;
054import org.biojava.nbio.structure.symmetry.internal.CESymmParameters;
055import org.biojava.nbio.structure.symmetry.internal.CeSymm;
056import org.biojava.nbio.structure.symmetry.internal.CeSymmResult;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import java.lang.reflect.InvocationTargetException;
061import java.lang.reflect.Method;
062import java.util.*;
063import java.util.stream.Collectors;
064
065/**
066 * A SubunitCluster contains a set of equivalent {@link QuatSymmetrySubunits},
067 * the set of equivalent residues (EQR) between {@link Subunit} and a
068 * {@link Subunit} representative. It also stores the method used for
069 * clustering.
070 * <p>
071 * This class allows the comparison and merging of SubunitClusters.
072 *
073 * @author Aleix Lafita
074 * @since 5.0.0
075 *
076 */
077public class SubunitCluster {
078
079        private static final Logger logger = LoggerFactory.getLogger(SubunitCluster.class);
080
081        private List<Subunit> subunits = new ArrayList<>();
082        private List<List<Integer>> subunitEQR = new ArrayList<>();
083        private int representative = -1;
084
085        private SubunitClustererMethod method = SubunitClustererMethod.SEQUENCE;
086        private boolean pseudoStoichiometric = false;
087
088        /**
089         * A letter that is assigned to this cluster in stoichiometry.
090        */
091        private String alpha = "";
092
093        /**
094         * A letter that is assigned to this cluster in stoichiometry.
095         *
096         * @return alpha
097         *          String
098         */
099
100        public String getAlpha() {
101                return alpha;
102        }
103
104        /**
105         * A letter that is assigned to this cluster in stoichiometry.
106         *
107         * @param  alpha
108         *          String
109         */
110        public void setAlpha(String alpha) {
111                this.alpha = alpha;
112        }
113
114        /**
115         * A constructor from a single Subunit. To obtain a
116         * SubunitCluster with multiple Subunits, initialize different
117         * SubunitClusters and merge them.
118         *
119         * @param subunit
120         *            initial Subunit
121         */
122        public SubunitCluster(Subunit subunit) {
123
124                subunits.add(subunit);
125
126                List<Integer> identity = new ArrayList<>();
127                for (int i = 0; i < subunit.size(); i++)
128                        identity.add(i);
129                subunitEQR.add(identity);
130
131                representative = 0;
132        }
133
134        /**
135         * A copy constructor with the possibility of removing subunits.
136         * No re-clustering is done.
137         *
138         * @param other
139         *            reference SubunitCluster
140         * @param subunitsToRetain
141         *            which subunits to copy to this cluster
142         */
143        public SubunitCluster(SubunitCluster other, List<Integer> subunitsToRetain) {
144                method = other.method;
145                pseudoStoichiometric = other.pseudoStoichiometric;
146                for (int i = 0; i < other.subunits.size(); i++) {
147                        if(subunitsToRetain.contains(i)) {
148                                subunits.add(other.subunits.get(i));
149                                subunitEQR.add(other.subunitEQR.get(i));
150                        }
151                }
152                representative = 0;
153                for (int i=1; i<subunits.size(); i++) {
154                        if (subunits.get(i).size() > subunits.get(representative).size()) {
155                                representative = i;
156                        }
157                }
158                setAlpha(other.getAlpha());
159        }
160
161        /**
162         * Subunits contained in the SubunitCluster.
163         *
164         * @return an unmodifiable view of the original List
165         */
166        public List<Subunit> getSubunits() {
167                return Collections.unmodifiableList(subunits);
168        }
169
170        /**
171         * Tells whether the other SubunitCluster contains exactly the same Subunit.
172         * This is checked by String equality of their residue one-letter sequences.
173         *
174         * @param other
175         *            SubunitCluster
176         * @return true if the SubunitClusters are identical, false otherwise
177         */
178        public boolean isIdenticalTo(SubunitCluster other) {
179                String thisSequence = this.subunits.get(this.representative)
180                                .getProteinSequenceString();
181                String otherSequence = other.subunits.get(other.representative)
182                                .getProteinSequenceString();
183                return thisSequence.equals(otherSequence);
184        }
185
186        /**
187         * Tells whether the other SubunitCluster contains exactly the same Subunit.
188         * This is checked by equality of their entity identifiers if they are present.
189         *
190         * @param other
191         *            SubunitCluster
192         * @return true if the SubunitClusters are identical, false otherwise
193         */
194        public boolean isIdenticalByEntityIdTo(SubunitCluster other) {
195                Subunit thisSub = this.subunits.get(this.representative);
196                Subunit otherSub = other.subunits.get(other.representative);
197                String thisName = thisSub.getName();
198                String otherName = otherSub.getName();
199
200                Structure thisStruct = thisSub.getStructure();
201                Structure otherStruct = otherSub.getStructure();
202                if (thisStruct == null || otherStruct == null) {
203                        logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id",
204                                        thisName,
205                                        otherName);
206                        return false;
207                }
208                if (thisStruct != otherStruct) {
209                        // different object references: will not cluster even if entity id is same
210                        return false;
211                }
212                Chain thisChain = thisStruct.getChain(thisName);
213                Chain otherChain = otherStruct.getChain(otherName);
214                if (thisChain == null || otherChain == null) {
215                        logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
216                                        thisName,
217                                        otherName);
218                        return false;
219                }
220                if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) {
221                        logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
222                                        thisName,
223                                        otherName);
224                        return false;
225                }
226                int thisEntityId = thisChain.getEntityInfo().getMolId();
227                int otherEntityId = otherChain.getEntityInfo().getMolId();
228                return thisEntityId == otherEntityId;
229        }
230
231        /**
232         * Merges the other SubunitCluster into this one if it contains exactly the
233         * same Subunit. This is checked by {@link #isIdenticalTo(SubunitCluster)}.
234         *
235         * @param other
236         *            SubunitCluster
237         * @return true if the SubunitClusters were merged, false otherwise
238         */
239        public boolean mergeIdentical(SubunitCluster other) {
240
241                if (!isIdenticalTo(other))
242                        return false;
243
244                logger.info("SubunitClusters {}-{} are identical in sequence",
245                                this.subunits.get(this.representative).getName(),
246                                other.subunits.get(other.representative).getName());
247
248                this.subunits.addAll(other.subunits);
249                this.subunitEQR.addAll(other.subunitEQR);
250
251                return true;
252        }
253
254        /**
255         * Merges the other SubunitCluster into this one if it contains exactly the
256         * same Subunit. This is checked by comparing the entity identifiers of the subunits
257         * if one can be found.
258         * Thus this only makes sense when the subunits are complete chains of a
259         * deposited PDB entry.
260         *
261         * @param other
262         *            SubunitCluster
263         * @return true if the SubunitClusters were merged, false otherwise
264         */
265        public boolean mergeIdenticalByEntityId(SubunitCluster other) {
266
267                if (!isIdenticalByEntityIdTo(other))
268                        return false;
269
270                Subunit thisSub = this.subunits.get(this.representative);
271                Subunit otherSub = other.subunits.get(other.representative);
272                String thisName = thisSub.getName();
273                String otherName = otherSub.getName();
274
275                logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
276                                thisName,
277                                otherName);
278
279                List<Integer> thisAligned = new ArrayList<>();
280                List<Integer> otherAligned = new ArrayList<>();
281
282                // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo())
283                Structure thisStruct = thisSub.getStructure();
284                Structure otherStruct = otherSub.getStructure();
285                Chain thisChain = thisStruct.getChain(thisName);
286                Chain otherChain = otherStruct.getChain(otherName);
287                EntityInfo entityInfo = thisChain.getEntityInfo();
288
289                // Extract the aligned residues of both Subunits
290                for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) {
291
292                        Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup();
293
294                        int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);
295
296                        if (seqresIndex == -1) {
297                                // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing
298                                continue;
299                        }
300
301                        // note the seqresindex is 1-based
302                        Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
303
304                        int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
305                        if (otherIndex == -1) {
306                                // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment)
307                                continue;
308                        }
309
310                        // Only consider residues that are part of the SubunitCluster
311                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
312                                        && other.subunitEQR.get(other.representative).contains(otherIndex)) {
313                                thisAligned.add(thisIndex);
314                                otherAligned.add(otherIndex);
315                        }
316                }
317
318                if (thisAligned.size() == 0 && otherAligned.size() == 0) {
319                        logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
320                }
321
322                updateEquivResidues(other, thisAligned, otherAligned);
323
324                return true;
325        }
326
327        /**
328         * Merges the other SubunitCluster into this one if their representatives
329         * sequences are similar (according to the criteria in params).
330         * <p>
331         * The sequence alignment is performed using linear {@link SimpleGapPenalty} and
332         * BLOSUM62 as scoring matrix.
333         *
334         * @param other
335         *            SubunitCluster
336         * @param params
337         *            SubunitClustererParameters, with information whether to use local
338         *            or global alignment, sequence identity and coverage thresholds.
339         *            Threshold values lower than 0.7 are not recommended.
340         *            Use {@link #mergeStructure} for lower values.
341         * @return true if the SubunitClusters were merged, false otherwise
342         * @throws CompoundNotFoundException
343         */
344
345        public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params) throws CompoundNotFoundException {
346                PairwiseSequenceAlignerType alignerType = PairwiseSequenceAlignerType.LOCAL;
347                if (params.isUseGlobalMetrics()) {
348                        alignerType = PairwiseSequenceAlignerType.GLOBAL;
349                }
350                return mergeSequence(other, params,alignerType
351                                , new SimpleGapPenalty(),
352                                SubstitutionMatrixHelper.getBlosum62());
353        }
354
355        /**
356         * Merges the other SubunitCluster into this one if their representatives
357         * sequences are similar (according to the criteria in params).
358         * <p>
359         * The sequence alignment is performed using linear {@link SimpleGapPenalty} and
360         * BLOSUM62 as scoring matrix.
361         *
362         * @param other
363         *            SubunitCluster
364         * @param params
365         *            {@link SubunitClustererParameters}, with information whether to use local
366         *            or global alignment, sequence identity and coverage thresholds.
367         *            Threshold values lower than 0.7 are not recommended.
368         *            Use {@link #mergeStructure} for lower values.
369         * @param alignerType
370         *            parameter for the sequence alignment algorithm
371         * @param gapPenalty
372         *            parameter for the sequence alignment algorithm
373         * @param subsMatrix
374         *            parameter for the sequence alignment algorithm
375         * @return true if the SubunitClusters were merged, false otherwise
376         * @throws CompoundNotFoundException
377         */
378
379        public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params,
380                                                                 PairwiseSequenceAlignerType alignerType,
381                                                                 GapPenalty gapPenalty,
382                                                                 SubstitutionMatrix<AminoAcidCompound> subsMatrix)
383                        throws CompoundNotFoundException {
384
385                // Extract the protein sequences as BioJava alignment objects
386                ProteinSequence thisSequence = this.subunits.get(this.representative)
387                                .getProteinSequence();
388                ProteinSequence otherSequence = other.subunits
389                                .get(other.representative).getProteinSequence();
390
391                // Perform the alignment with provided parameters
392                PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> aligner = Alignments
393                                .getPairwiseAligner(thisSequence, otherSequence, alignerType,
394                                                gapPenalty, subsMatrix);
395
396                double sequenceIdentity;
397                if(params.isUseGlobalMetrics()) {
398                        sequenceIdentity = aligner.getPair().getPercentageOfIdentity(true);
399                } else {
400                        sequenceIdentity = aligner.getPair().getPercentageOfIdentity(false);
401                }
402
403                if (sequenceIdentity < params.getSequenceIdentityThreshold())
404                        return false;
405
406                double sequenceCoverage = 0;
407                if(params.isUseSequenceCoverage()) {
408                        // Calculate real coverage (subtract gaps in both sequences)
409                        double gaps1 = aligner.getPair().getAlignedSequence(1)
410                                        .getNumGapPositions();
411                        double gaps2 = aligner.getPair().getAlignedSequence(2)
412                                        .getNumGapPositions();
413                        double lengthAlignment = aligner.getPair().getLength();
414                        double lengthThis = aligner.getQuery().getLength();
415                        double lengthOther = aligner.getTarget().getLength();
416                        sequenceCoverage = (lengthAlignment - gaps1 - gaps2)
417                                        / Math.max(lengthThis, lengthOther);
418
419                        if (sequenceCoverage < params.getSequenceCoverageThreshold())
420                                return false;
421                }
422
423                logger.info(String.format("SubunitClusters %s-%s are similar in sequence "
424                                                + "with %.2f sequence identity and %.2f coverage",
425                                this.subunits.get(this.representative).getName(),
426                                other.subunits.get(other.representative).getName(),
427                                sequenceIdentity, sequenceCoverage));
428
429                // If coverage and sequence identity sufficient, merge other and this
430                List<Integer> thisAligned = new ArrayList<>();
431                List<Integer> otherAligned = new ArrayList<>();
432
433                // Extract the aligned residues of both Subunit
434                for (int p = 1; p < aligner.getPair().getLength() + 1; p++) {
435
436                        // Skip gaps in any of the two sequences
437                        if (aligner.getPair().getAlignedSequence(1).isGap(p))
438                                continue;
439                        if (aligner.getPair().getAlignedSequence(2).isGap(p))
440                                continue;
441
442                        int thisIndex = aligner.getPair().getIndexInQueryAt(p) - 1;
443                        int otherIndex = aligner.getPair().getIndexInTargetAt(p) - 1;
444
445                        // Only consider residues that are part of the SubunitCluster
446                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
447                                        && other.subunitEQR.get(other.representative).contains(otherIndex)) {
448                                thisAligned.add(thisIndex);
449                                otherAligned.add(otherIndex);
450                        }
451                }
452
453                updateEquivResidues(other, thisAligned, otherAligned);
454
455                this.method = SubunitClustererMethod.SEQUENCE;
456                pseudoStoichiometric = !params.isHighConfidenceScores(sequenceIdentity,sequenceCoverage);
457
458                return true;
459        }
460
461        /**
462         * Merges the other SubunitCluster into this one if their representative
463         * Atoms are structurally similar (according to the criteria in params).
464         * <p>
465         *
466         * @param other
467         *            SubunitCluster
468         * @param params
469         *            {@link SubunitClustererParameters}, with information on what alignment
470         *            algorithm to use, RMSD/TMScore and structure coverage thresholds.
471         * @return true if the SubunitClusters were merged, false otherwise
472         * @throws StructureException
473         */
474
475        public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters params) throws StructureException {
476
477                StructureAlignment aligner = StructureAlignmentFactory.getAlgorithm(params.getSuperpositionAlgorithm());
478                ConfigStrucAligParams aligner_params = aligner.getParameters();
479
480                Method setOptimizeAlignment = null;
481                try {
482                        setOptimizeAlignment = aligner_params.getClass().getMethod("setOptimizeAlignment", boolean.class);
483                } catch (NoSuchMethodException e) {
484                        //alignment algorithm does not have an optimization switch, moving on
485                }
486                if (setOptimizeAlignment != null) {
487                        try {
488                                setOptimizeAlignment.invoke(aligner_params, params.isOptimizeAlignment());
489                        } catch (IllegalAccessException|InvocationTargetException e) {
490                                logger.warn("Could not set alignment optimisation switch");
491                        }
492                }
493
494                AFPChain afp = aligner.align(this.subunits.get(this.representative)
495                                .getRepresentativeAtoms(),
496                                other.subunits.get(other.representative)
497                                                .getRepresentativeAtoms());
498
499                // Convert AFPChain to MultipleAlignment for convenience
500                MultipleAlignment msa = new MultipleAlignmentEnsembleImpl(
501                                afp,
502                                this.subunits.get(this.representative).getRepresentativeAtoms(),
503                                other.subunits.get(other.representative)
504                                                .getRepresentativeAtoms(), false)
505                                .getMultipleAlignment(0);
506
507                double structureCoverage = Math.min(msa.getCoverages().get(0), msa
508                                .getCoverages().get(1));
509
510                if(params.isUseStructureCoverage() && structureCoverage < params.getStructureCoverageThreshold()) {
511                        return false;
512                }
513
514                double rmsd = afp.getTotalRmsdOpt();
515                if (params.isUseRMSD() && rmsd > params.getRMSDThreshold()) {
516                        return false;
517                }
518
519                double tmScore = afp.getTMScore();
520                if (params.isUseTMScore() && tmScore < params.getTMThreshold()) {
521                        return false;
522                }
523
524                logger.info(String.format("SubunitClusters are structurally similar with "
525                                + "%.2f RMSD %.2f coverage", rmsd, structureCoverage));
526
527                // Merge clusters
528                List<List<Integer>> alignedRes = msa.getBlock(0).getAlignRes();
529                List<Integer> thisAligned = new ArrayList<>();
530                List<Integer> otherAligned = new ArrayList<>();
531
532                // Extract the aligned residues of both Subunit
533                for (int p = 0; p < msa.length(); p++) {
534
535                        // Skip gaps in any of the two sequences
536                        if (alignedRes.get(0).get(p) == null)
537                                continue;
538                        if (alignedRes.get(1).get(p) == null)
539                                continue;
540
541                        int thisIndex = alignedRes.get(0).get(p);
542                        int otherIndex = alignedRes.get(1).get(p);
543
544                        // Only consider residues that are part of the SubunitCluster
545                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
546                                        && other.subunitEQR.get(other.representative).contains(
547                                                        otherIndex)) {
548                                thisAligned.add(thisIndex);
549                                otherAligned.add(otherIndex);
550                        }
551                }
552
553                updateEquivResidues(other, thisAligned, otherAligned);
554
555                this.method = SubunitClustererMethod.STRUCTURE;
556                pseudoStoichiometric = true;
557
558                return true;
559        }
560
561        private void updateEquivResidues(SubunitCluster other, List<Integer> thisAligned, List<Integer> otherAligned) {
562                // Do a List intersection to find out which EQR columns to remove
563                List<Integer> thisRemove = new ArrayList<>();
564                List<Integer> otherRemove = new ArrayList<>();
565
566                for (int t = 0; t < this.subunitEQR.get(this.representative).size(); t++) {
567                        // If the index is aligned do nothing, otherwise mark as removing
568                        if (!thisAligned.contains(this.subunitEQR.get(this.representative).get(t)))
569                                thisRemove.add(t);
570                }
571
572                for (int t = 0; t < other.subunitEQR.get(other.representative).size(); t++) {
573                        // If the index is aligned do nothing, otherwise mark as removing
574                        if (!otherAligned.contains(other.subunitEQR.get(other.representative).get(t)))
575                                otherRemove.add(t);
576                }
577                // Now remove unaligned columns, from end to start
578                Collections.sort(thisRemove);
579                Collections.reverse(thisRemove);
580                Collections.sort(otherRemove);
581                Collections.reverse(otherRemove);
582
583                for (int t = 0; t < thisRemove.size(); t++) {
584                        for (List<Integer> eqr : this.subunitEQR) {
585                                int column = thisRemove.get(t);
586                                eqr.remove(column);
587                        }
588                }
589
590                for (int t = 0; t < otherRemove.size(); t++) {
591                        for (List<Integer> eqr : other.subunitEQR) {
592                                int column = otherRemove.get(t);
593                                eqr.remove(column);
594                        }
595                }
596
597                // The representative is the longest sequence
598                if (this.subunits.get(this.representative).size() < other.subunits.get(other.representative).size())
599                        this.representative = other.representative + subunits.size();
600
601                this.subunits.addAll(other.subunits);
602                this.subunitEQR.addAll(other.subunitEQR);
603
604        }
605
606        /**
607         * Analyze the internal symmetry of the SubunitCluster and divide its
608         * {@link Subunit} into the internal repeats (domains) if they are
609         * internally symmetric.
610         *
611         * @param clusterParams {@link SubunitClustererParameters} with fields used as follows:
612         * structureCoverageThreshold
613         *            the minimum coverage of all repeats in the Subunit
614         * rmsdThreshold
615         *            the maximum allowed RMSD between the repeats
616         * minimumSequenceLength
617         *            the minimum length of the repeating units
618         * @return true if the cluster was internally symmetric, false otherwise
619         * @throws StructureException
620         */
621        public boolean divideInternally(SubunitClustererParameters clusterParams)
622                        throws StructureException {
623
624                CESymmParameters cesym_params = new CESymmParameters();
625                cesym_params.setMinCoreLength(clusterParams.getMinimumSequenceLength());
626                cesym_params.setGaps(false); // We want no gaps between the repeats
627
628                // Analyze the internal symmetry of the representative subunit
629                CeSymmResult result = CeSymm.analyze(subunits.get(representative)
630                                .getRepresentativeAtoms(), cesym_params);
631
632                if (!result.isSignificant())
633                        return false;
634
635                double rmsd = result.getMultipleAlignment().getScore(
636                                MultipleAlignmentScorer.RMSD);
637                if (rmsd > clusterParams.getRMSDThreshold())
638                        return false;
639
640                double coverage = result.getMultipleAlignment().getCoverages().get(0)
641                                * result.getNumRepeats();
642                if (coverage < clusterParams.getStructureCoverageThreshold())
643                        return false;
644
645                logger.info("SubunitCluster is internally symmetric with {} repeats, "
646                                + "{} RMSD and {} coverage", result.getNumRepeats(), rmsd,
647                                coverage);
648
649                // Divide if symmety was significant with RMSD and coverage sufficient
650                List<List<Integer>> alignedRes = result.getMultipleAlignment()
651                                .getBlock(0).getAlignRes();
652
653                List<List<Integer>> columns = new ArrayList<>();
654                for (int s = 0; s < alignedRes.size(); s++)
655                        columns.add(new ArrayList<>(alignedRes.get(s).size()));
656
657                // Extract the aligned columns of each repeat in the Subunit
658                for (int col = 0; col < alignedRes.get(0).size(); col++) {
659
660                        // Check that all aligned residues are part of the Cluster
661                        boolean missing = false;
662                        for (int s = 0; s < alignedRes.size(); s++) {
663                                if (!subunitEQR.get(representative).contains(
664                                                alignedRes.get(s).get(col))) {
665                                        missing = true;
666                                        break;
667                                }
668                        }
669
670                        // Skip the column if any residue was not part of the cluster
671                        if (missing)
672                                continue;
673
674                        for (int s = 0; s < alignedRes.size(); s++) {
675                                columns.get(s).add(
676                                                subunitEQR.get(representative).indexOf(
677                                                                alignedRes.get(s).get(col)));
678                        }
679                }
680
681                // Divide the Subunits in their repeats
682                List<Subunit> newSubunits = new ArrayList<Subunit>(subunits.size()
683                                * columns.size());
684                List<List<Integer>> newSubunitEQR = new ArrayList<List<Integer>>(
685                                subunits.size() * columns.size());
686
687                for (int s = 0; s < subunits.size(); s++) {
688                        for (int r = 0; r < columns.size(); r++) {
689
690                                // Calculate start and end residues of the new Subunit
691                                int start = subunitEQR.get(s).get(columns.get(r).get(0));
692                                int end = subunitEQR.get(s).get(
693                                                columns.get(r).get(columns.get(r).size() - 1));
694
695                                Atom[] reprAtoms = Arrays.copyOfRange(subunits.get(s)
696                                                .getRepresentativeAtoms(), start, end + 1);
697
698                                newSubunits.add(new Subunit(reprAtoms, subunits.get(s)
699                                                .getName(), subunits.get(s).getIdentifier(), subunits
700                                                .get(s).getStructure()));
701
702                                // Recalculate equivalent residues
703                                List<Integer> eqr = new ArrayList<Integer>();
704                                for (int p = 0; p < columns.get(r).size(); p++) {
705                                        eqr.add(subunitEQR.get(s).get(columns.get(r).get(p))
706                                                        - start);
707                                }
708                                newSubunitEQR.add(eqr);
709                        }
710                }
711
712                subunits = newSubunits;
713                subunitEQR = newSubunitEQR;
714
715                // Update representative
716                for (int s = 0; s < subunits.size(); s++) {
717                        if (subunits.get(s).size() > subunits.get(representative).size())
718                                representative = s;
719                }
720
721                method = SubunitClustererMethod.STRUCTURE;
722                pseudoStoichiometric = true;
723                return true;
724        }
725
726        /**
727         * @return the number of Subunits in the cluster
728         */
729        public int size() {
730                return subunits.size();
731        }
732
733        /**
734         * @return the number of aligned residues between Subunits of the cluster
735         */
736        public int length() {
737                return subunitEQR.get(representative).size();
738        }
739
740        /**
741         * @return the {@link SubunitClustererMethod} used for clustering the
742         *         Subunits
743         */
744        public SubunitClustererMethod getClustererMethod() {
745                return method;
746        }
747
748        /**
749         * @return A List of size {@link #size()} of Atom arrays of length
750         *         {@link #length()} with the aligned Atoms for each Subunit in the
751         *         cluster
752         */
753        public List<Atom[]> getAlignedAtomsSubunits() {
754
755                List<Atom[]> alignedAtoms = new ArrayList<>();
756
757                // Loop through all subunits and add the aligned positions
758                for (int s = 0; s < subunits.size(); s++)
759                        alignedAtoms.add(getAlignedAtomsSubunit(s));
760
761                return alignedAtoms;
762        }
763
764        /**
765         * @param index
766         *            Subunit index in the Cluster
767         * @return An Atom array of length {@link #length()} with the aligned Atoms
768         *         from the selected Subunit in the Cluster
769         */
770        public Atom[] getAlignedAtomsSubunit(int index) {
771
772                Atom[] aligned = new Atom[subunitEQR.get(index).size()];
773
774                // Add only the aligned positions of the Subunit in the Cluster
775                for (int p = 0; p < subunitEQR.get(index).size(); p++) {
776                        aligned[p] = subunits.get(index).getRepresentativeAtoms()[subunitEQR
777                                        .get(index).get(p)];
778                }
779
780                return aligned;
781        }
782
783        /**
784         * The multiple alignment is calculated from the equivalent residues in the
785         * SubunitCluster. The alignment is recalculated every time the method is
786         * called (no caching).
787         *
788         * @return MultipleAlignment representation of the aligned residues in this
789         *         Subunit Cluster
790         * @throws StructureException
791         */
792        public MultipleAlignment getMultipleAlignment() throws StructureException {
793
794                // Create a multiple alignment with the atom arrays of the Subunits
795                MultipleAlignment msa = new MultipleAlignmentImpl();
796                msa.setEnsemble(new MultipleAlignmentEnsembleImpl());
797                msa.getEnsemble().setAtomArrays(
798                                subunits.stream().map(s -> s.getRepresentativeAtoms())
799                                                .collect(Collectors.toList()));
800
801                // Fill in the alignment information
802                BlockSet bs = new BlockSetImpl(msa);
803                Block b = new BlockImpl(bs);
804                b.setAlignRes(subunitEQR);
805
806                // Fill in the transformation matrices
807                new ReferenceSuperimposer(representative).superimpose(msa);
808
809                // Calculate some scores
810                MultipleAlignmentScorer.calculateScores(msa);
811
812                return msa;
813
814        }
815
816        @Override
817        public String toString() {
818                return "SubunitCluster [Size=" + size() + ", Length=" + length()
819                                + ", Representative=" + representative + ", Method=" + method
820                                + "]";
821        }
822
823        /**
824         * @return true if this cluster is considered pseudo-stoichiometric (i.e.,
825         *                 was either clustered by structure, or by sequence with low scores),
826         *         false otherwise.
827         */
828        public boolean isPseudoStoichiometric() {
829                return pseudoStoichiometric;
830        }
831
832}