Source code

001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.cluster;
022
023import org.biojava.nbio.alignment.Alignments;
024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
025import org.biojava.nbio.alignment.SimpleGapPenalty;
026import org.biojava.nbio.alignment.template.GapPenalty;
027import org.biojava.nbio.alignment.template.PairwiseSequenceAligner;
028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
029import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
030import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
031import org.biojava.nbio.core.sequence.ProteinSequence;
032import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.StructureException;
035import org.biojava.nbio.structure.align.StructureAlignment;
036import org.biojava.nbio.structure.align.StructureAlignmentFactory;
037import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams;
038import org.biojava.nbio.structure.align.model.AFPChain;
039import org.biojava.nbio.structure.align.multiple.Block;
040import org.biojava.nbio.structure.align.multiple.BlockImpl;
041import org.biojava.nbio.structure.align.multiple.BlockSet;
042import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
043import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
044import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
045import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
046import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
047import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer;
048import org.biojava.nbio.structure.symmetry.core.QuatSymmetrySubunits;
049import org.biojava.nbio.structure.symmetry.internal.CESymmParameters;
050import org.biojava.nbio.structure.symmetry.internal.CeSymm;
051import org.biojava.nbio.structure.symmetry.internal.CeSymmResult;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import java.lang.reflect.InvocationTargetException;
056import java.lang.reflect.Method;
057import java.util.*;
058import java.util.stream.Collectors;
059
060/**
061 * A SubunitCluster contains a set of equivalent {@link QuatSymmetrySubunits},
062 * the set of equivalent residues (EQR) between {@link Subunit} and a
063 * {@link Subunit} representative. It also stores the method used for
064 * clustering.
065 * <p>
066 * This class allows the comparison and merging of SubunitClusters.
067 *
068 * @author Aleix Lafita
069 * @since 5.0.0
070 *
071 */
072public class SubunitCluster {
073
074        private static final Logger logger = LoggerFactory
075                        .getLogger(SubunitCluster.class);
076
077        private List<Subunit> subunits = new ArrayList<Subunit>();
078        private List<List<Integer>> subunitEQR = new ArrayList<List<Integer>>();
079        private int representative = -1;
080
081        private SubunitClustererMethod method = SubunitClustererMethod.SEQUENCE;
082        private boolean pseudoStoichiometric = false;
083
084        /**
085         * A letter that is assigned to this cluster in stoichiometry.
086        */
087        private String alpha = "";
088
089        /**
090         * A letter that is assigned to this cluster in stoichiometry.
091         *
092         * @return alpha
093         *          String
094         */
095
096        public String getAlpha() {
097                return alpha;
098        }
099
100        /**
101         * A letter that is assigned to this cluster in stoichiometry.
102         *
103         * @param  alpha
104         *          String
105         */
106        public void setAlpha(String alpha) {
107                this.alpha = alpha;
108        }
109
110        /**
111         * A constructor from a single Subunit. To obtain a
112         * SubunitCluster with multiple Subunits, initialize different
113         * SubunitClusters and merge them.
114         *
115         * @param subunit
116         *            initial Subunit
117         */
118        public SubunitCluster(Subunit subunit) {
119
120                subunits.add(subunit);
121
122                List<Integer> identity = new ArrayList<Integer>();
123                for (int i = 0; i < subunit.size(); i++)
124                        identity.add(i);
125                subunitEQR.add(identity);
126
127                representative = 0;
128        }
129
130        /**
131         * A copy constructor with the possibility of removing subunits.
132         * No re-clustering is done.
133         *
134         * @param other
135         *            reference SubunitCluster
136         * @param subunitsToRetain
137         *            which subunits to copy to this cluster
138         */
139        public SubunitCluster(SubunitCluster other, List<Integer> subunitsToRetain) {
140                method = other.method;
141                pseudoStoichiometric = other.pseudoStoichiometric;
142                for (int i = 0; i < other.subunits.size(); i++) {
143                        if(subunitsToRetain.contains(i)) {
144                                subunits.add(other.subunits.get(i));
145                                subunitEQR.add(other.subunitEQR.get(i));
146                        }
147                }
148                representative = 0;
149                for (int i=1; i<subunits.size(); i++) {
150                        if (subunits.get(i).size() > subunits.get(representative).size()) {
151                                representative = i;
152                        }
153                }
154                setAlpha(other.getAlpha());
155        }
156
157        /**
158         * Subunits contained in the SubunitCluster.
159         *
160         * @return an unmodifiable view of the original List
161         */
162        public List<Subunit> getSubunits() {
163                return Collections.unmodifiableList(subunits);
164        }
165
166        /**
167         * Tells whether the other SubunitCluster contains exactly the same Subunit.
168         * This is checked by String equality of their residue one-letter sequences.
169         *
170         * @param other
171         *            SubunitCluster
172         * @return true if the SubunitClusters are identical, false otherwise
173         */
174        public boolean isIdenticalTo(SubunitCluster other) {
175                String thisSequence = this.subunits.get(this.representative)
176                                .getProteinSequenceString();
177                String otherSequence = other.subunits.get(other.representative)
178                                .getProteinSequenceString();
179                return thisSequence.equals(otherSequence);
180        }
181
182        /**
183         * Merges the other SubunitCluster into this one if it contains exactly the
184         * same Subunit. This is checked by {@link #isIdenticalTo(SubunitCluster)}.
185         *
186         * @param other
187         *            SubunitCluster
188         * @return true if the SubunitClusters were merged, false otherwise
189         */
190        public boolean mergeIdentical(SubunitCluster other) {
191
192                if (!isIdenticalTo(other))
193                        return false;
194
195                logger.info("SubunitClusters are identical");
196
197                this.subunits.addAll(other.subunits);
198                this.subunitEQR.addAll(other.subunitEQR);
199
200                return true;
201        }
202
203        /**
204         * Merges the other SubunitCluster into this one if their representatives
205         * sequences are similar (according to the criteria in params).
206         * <p>
207         * The sequence alignment is performed using linear {@link SimpleGapPenalty} and
208         * BLOSUM62 as scoring matrix.
209         *
210         * @param other
211         *            SubunitCluster
212         * @param params
213         *            SubunitClustererParameters, with information whether to use local
214         *            or global alignment, sequence identity and coverage thresholds.
215         *            Threshold values lower than 0.7 are not recommended.
216         *            Use {@link #mergeStructure} for lower values.
217         * @return true if the SubunitClusters were merged, false otherwise
218         * @throws CompoundNotFoundException
219         */
220
221        public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params) throws CompoundNotFoundException {
222                PairwiseSequenceAlignerType alignerType = PairwiseSequenceAlignerType.LOCAL;
223                if (params.isUseGlobalMetrics()) {
224                        alignerType = PairwiseSequenceAlignerType.GLOBAL;
225                }
226                return mergeSequence(other, params,alignerType
227                                , new SimpleGapPenalty(),
228                                SubstitutionMatrixHelper.getBlosum62());
229        }
230
231        /**
232         * Merges the other SubunitCluster into this one if their representatives
233         * sequences are similar (according to the criteria in params).
234         * <p>
235         * The sequence alignment is performed using linear {@link SimpleGapPenalty} and
236         * BLOSUM62 as scoring matrix.
237         *
238         * @param other
239         *            SubunitCluster
240         * @param params
241         *            {@link SubunitClustererParameters}, with information whether to use local
242         *            or global alignment, sequence identity and coverage thresholds.
243         *            Threshold values lower than 0.7 are not recommended.
244         *            Use {@link #mergeStructure} for lower values.
245         * @param alignerType
246         *            parameter for the sequence alignment algorithm
247         * @param gapPenalty
248         *            parameter for the sequence alignment algorithm
249         * @param subsMatrix
250         *            parameter for the sequence alignment algorithm
251         * @return true if the SubunitClusters were merged, false otherwise
252         * @throws CompoundNotFoundException
253         */
254
255        public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params,
256                                                                 PairwiseSequenceAlignerType alignerType,
257                                                                 GapPenalty gapPenalty,
258                                                                 SubstitutionMatrix<AminoAcidCompound> subsMatrix)
259                        throws CompoundNotFoundException {
260
261                // Extract the protein sequences as BioJava alignment objects
262                ProteinSequence thisSequence = this.subunits.get(this.representative)
263                                .getProteinSequence();
264                ProteinSequence otherSequence = other.subunits
265                                .get(other.representative).getProteinSequence();
266
267                // Perform the alignment with provided parameters
268                PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> aligner = Alignments
269                                .getPairwiseAligner(thisSequence, otherSequence, alignerType,
270                                                gapPenalty, subsMatrix);
271
272                double sequenceIdentity;
273                if(params.isUseGlobalMetrics()) {
274                        sequenceIdentity = aligner.getPair().getPercentageOfIdentity(true);
275                } else {
276                        sequenceIdentity = aligner.getPair().getPercentageOfIdentity(false);
277                }
278
279                if (sequenceIdentity < params.getSequenceIdentityThreshold())
280                        return false;
281
282                double sequenceCoverage = 0;
283                if(params.isUseSequenceCoverage()) {
284                        // Calculate real coverage (subtract gaps in both sequences)
285                        double gaps1 = aligner.getPair().getAlignedSequence(1)
286                                        .getNumGapPositions();
287                        double gaps2 = aligner.getPair().getAlignedSequence(2)
288                                        .getNumGapPositions();
289                        double lengthAlignment = aligner.getPair().getLength();
290                        double lengthThis = aligner.getQuery().getLength();
291                        double lengthOther = aligner.getTarget().getLength();
292                        sequenceCoverage = (lengthAlignment - gaps1 - gaps2)
293                                        / Math.max(lengthThis, lengthOther);
294
295                        if (sequenceCoverage < params.getSequenceCoverageThreshold())
296                                return false;
297                }
298
299                logger.info(String.format("SubunitClusters are similar in sequence "
300                                                + "with %.2f sequence identity and %.2f coverage", sequenceIdentity,
301                                sequenceCoverage));
302
303                // If coverage and sequence identity sufficient, merge other and this
304                List<Integer> thisAligned = new ArrayList<Integer>();
305                List<Integer> otherAligned = new ArrayList<Integer>();
306
307                // Extract the aligned residues of both Subunit
308                for (int p = 1; p < aligner.getPair().getLength() + 1; p++) {
309
310                        // Skip gaps in any of the two sequences
311                        if (aligner.getPair().getAlignedSequence(1).isGap(p))
312                                continue;
313                        if (aligner.getPair().getAlignedSequence(2).isGap(p))
314                                continue;
315
316                        int thisIndex = aligner.getPair().getIndexInQueryAt(p) - 1;
317                        int otherIndex = aligner.getPair().getIndexInTargetAt(p) - 1;
318
319                        // Only consider residues that are part of the SubunitCluster
320                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
321                                        && other.subunitEQR.get(other.representative).contains(
322                                                        otherIndex)) {
323                                thisAligned.add(thisIndex);
324                                otherAligned.add(otherIndex);
325                        }
326                }
327
328                // Do a List intersection to find out which EQR columns to remove
329                List<Integer> thisRemove = new ArrayList<Integer>();
330                List<Integer> otherRemove = new ArrayList<Integer>();
331
332                for (int t = 0; t < this.subunitEQR.get(this.representative).size(); t++) {
333                        // If the index is aligned do nothing, otherwise mark as removing
334                        if (!thisAligned.contains(this.subunitEQR.get(this.representative)
335                                        .get(t)))
336                                thisRemove.add(t);
337                }
338
339                for (int t = 0; t < other.subunitEQR.get(other.representative).size(); t++) {
340                        // If the index is aligned do nothing, otherwise mark as removing
341                        if (!otherAligned.contains(other.subunitEQR.get(
342                                        other.representative).get(t)))
343                                otherRemove.add(t);
344                }
345                // Now remove unaligned columns, from end to start
346                Collections.sort(thisRemove);
347                Collections.reverse(thisRemove);
348                Collections.sort(otherRemove);
349                Collections.reverse(otherRemove);
350
351                for (int t = 0; t < thisRemove.size(); t++) {
352                        for (List<Integer> eqr : this.subunitEQR) {
353                                int column = thisRemove.get(t);
354                                eqr.remove(column);
355                        }
356                }
357
358                for (int t = 0; t < otherRemove.size(); t++) {
359                        for (List<Integer> eqr : other.subunitEQR) {
360                                int column = otherRemove.get(t);
361                                eqr.remove(column);
362                        }
363                }
364
365                // The representative is the longest sequence
366                if (this.subunits.get(this.representative).size() < other.subunits.get(
367                                other.representative).size())
368                        this.representative = other.representative + subunits.size();
369
370                this.subunits.addAll(other.subunits);
371                this.subunitEQR.addAll(other.subunitEQR);
372
373                this.method = SubunitClustererMethod.SEQUENCE;
374
375                pseudoStoichiometric = !params.isHighConfidenceScores(sequenceIdentity,sequenceCoverage);
376
377                return true;
378        }
379
380        /**
381         * Merges the other SubunitCluster into this one if their representative
382         * Atoms are structurally similar (according to the criteria in params).
383         * <p>
384         *
385         * @param other
386         *            SubunitCluster
387         * @param params
388         *            {@link SubunitClustererParameters}, with information on what alignment
389         *            algorithm to use, RMSD/TMScore and structure coverage thresholds.
390         * @return true if the SubunitClusters were merged, false otherwise
391         * @throws StructureException
392         */
393
394        public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters params) throws StructureException {
395
396                StructureAlignment aligner = StructureAlignmentFactory.getAlgorithm(params.getSuperpositionAlgorithm());
397                ConfigStrucAligParams aligner_params = aligner.getParameters();
398
399                Method setOptimizeAlignment = null;
400                try {
401                        setOptimizeAlignment = aligner_params.getClass().getMethod("setOptimizeAlignment", boolean.class);
402                } catch (NoSuchMethodException e) {
403                        //alignment algorithm does not have an optimization switch, moving on
404                }
405                if (setOptimizeAlignment != null) {
406                        try {
407                                setOptimizeAlignment.invoke(aligner_params, params.isOptimizeAlignment());
408                        } catch (IllegalAccessException|InvocationTargetException e) {
409                                logger.warn("Could not set alignment optimisation switch");
410                        }
411                }
412
413                AFPChain afp = aligner.align(this.subunits.get(this.representative)
414                                .getRepresentativeAtoms(),
415                                other.subunits.get(other.representative)
416                                                .getRepresentativeAtoms());
417
418                // Convert AFPChain to MultipleAlignment for convenience
419                MultipleAlignment msa = new MultipleAlignmentEnsembleImpl(
420                                afp,
421                                this.subunits.get(this.representative).getRepresentativeAtoms(),
422                                other.subunits.get(other.representative)
423                                                .getRepresentativeAtoms(), false)
424                                .getMultipleAlignment(0);
425
426                double structureCoverage = Math.min(msa.getCoverages().get(0), msa
427                                .getCoverages().get(1));
428
429                if(params.isUseStructureCoverage() && structureCoverage < params.getStructureCoverageThreshold()) {
430                        return false;
431                }
432
433                double rmsd = afp.getTotalRmsdOpt();
434                if (params.isUseRMSD() && rmsd > params.getRMSDThreshold()) {
435                        return false;
436                }
437
438                double tmScore = afp.getTMScore();
439                if (params.isUseTMScore() && tmScore < params.getTMThreshold()) {
440                        return false;
441                }
442
443                logger.info(String.format("SubunitClusters are structurally similar with "
444                                + "%.2f RMSD %.2f coverage", rmsd, structureCoverage));
445
446                // Merge clusters
447                List<List<Integer>> alignedRes = msa.getBlock(0).getAlignRes();
448                List<Integer> thisAligned = new ArrayList<Integer>();
449                List<Integer> otherAligned = new ArrayList<Integer>();
450
451                // Extract the aligned residues of both Subunit
452                for (int p = 0; p < msa.length(); p++) {
453
454                        // Skip gaps in any of the two sequences
455                        if (alignedRes.get(0).get(p) == null)
456                                continue;
457                        if (alignedRes.get(1).get(p) == null)
458                                continue;
459
460                        int thisIndex = alignedRes.get(0).get(p);
461                        int otherIndex = alignedRes.get(1).get(p);
462
463                        // Only consider residues that are part of the SubunitCluster
464                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
465                                        && other.subunitEQR.get(other.representative).contains(
466                                                        otherIndex)) {
467                                thisAligned.add(thisIndex);
468                                otherAligned.add(otherIndex);
469                        }
470                }
471
472                // Do a List intersection to find out which EQR columns to remove
473                List<Integer> thisRemove = new ArrayList<Integer>();
474                List<Integer> otherRemove = new ArrayList<Integer>();
475
476                for (int t = 0; t < this.subunitEQR.get(this.representative).size(); t++) {
477                        // If the index is aligned do nothing, otherwise mark as removing
478                        if (!thisAligned.contains(this.subunitEQR.get(this.representative)
479                                        .get(t)))
480                                thisRemove.add(t);
481                }
482
483                for (int t = 0; t < other.subunitEQR.get(other.representative).size(); t++) {
484                        // If the index is aligned do nothing, otherwise mark as removing
485                        if (!otherAligned.contains(other.subunitEQR.get(
486                                        other.representative).get(t)))
487                                otherRemove.add(t);
488                }
489
490                // Now remove unaligned columns, from end to start
491                Collections.sort(thisRemove);
492                Collections.reverse(thisRemove);
493                Collections.sort(otherRemove);
494                Collections.reverse(otherRemove);
495
496                for (int t = 0; t < thisRemove.size(); t++) {
497                        for (List<Integer> eqr : this.subunitEQR) {
498                                int column = thisRemove.get(t);
499                                eqr.remove(column);
500                        }
501                }
502
503                for (int t = 0; t < otherRemove.size(); t++) {
504                        for (List<Integer> eqr : other.subunitEQR) {
505                                int column = otherRemove.get(t);
506                                eqr.remove(column);
507                        }
508                }
509
510                // The representative is the longest sequence
511                if (this.subunits.get(this.representative).size() < other.subunits.get(
512                                other.representative).size())
513                        this.representative = other.representative + subunits.size();
514
515                this.subunits.addAll(other.subunits);
516                this.subunitEQR.addAll(other.subunitEQR);
517
518                this.method = SubunitClustererMethod.STRUCTURE;
519                pseudoStoichiometric = true;
520
521                return true;
522        }
523
524        /**
525         * Analyze the internal symmetry of the SubunitCluster and divide its
526         * {@link Subunit} into the internal repeats (domains) if they are
527         * internally symmetric.
528         *
529         * @param clusterParams {@link SubunitClustererParameters} with fields used as follows:
530         * structureCoverageThreshold
531         *            the minimum coverage of all repeats in the Subunit
532         * rmsdThreshold
533         *            the maximum allowed RMSD between the repeats
534         * minimumSequenceLength
535         *            the minimum length of the repeating units
536         * @return true if the cluster was internally symmetric, false otherwise
537         * @throws StructureException
538         */
539        public boolean divideInternally(SubunitClustererParameters clusterParams)
540                        throws StructureException {
541
542                CESymmParameters cesym_params = new CESymmParameters();
543                cesym_params.setMinCoreLength(clusterParams.getMinimumSequenceLength());
544                cesym_params.setGaps(false); // We want no gaps between the repeats
545
546                // Analyze the internal symmetry of the representative subunit
547                CeSymmResult result = CeSymm.analyze(subunits.get(representative)
548                                .getRepresentativeAtoms(), cesym_params);
549
550                if (!result.isSignificant())
551                        return false;
552
553                double rmsd = result.getMultipleAlignment().getScore(
554                                MultipleAlignmentScorer.RMSD);
555                if (rmsd > clusterParams.getRMSDThreshold())
556                        return false;
557
558                double coverage = result.getMultipleAlignment().getCoverages().get(0)
559                                * result.getNumRepeats();
560                if (coverage < clusterParams.getStructureCoverageThreshold())
561                        return false;
562
563                logger.info("SubunitCluster is internally symmetric with {} repeats, "
564                                + "{} RMSD and {} coverage", result.getNumRepeats(), rmsd,
565                                coverage);
566
567                // Divide if symmety was significant with RMSD and coverage sufficient
568                List<List<Integer>> alignedRes = result.getMultipleAlignment()
569                                .getBlock(0).getAlignRes();
570
571                List<List<Integer>> columns = new ArrayList<List<Integer>>();
572                for (int s = 0; s < alignedRes.size(); s++)
573                        columns.add(new ArrayList<Integer>(alignedRes.get(s).size()));
574
575                // Extract the aligned columns of each repeat in the Subunit
576                for (int col = 0; col < alignedRes.get(0).size(); col++) {
577
578                        // Check that all aligned residues are part of the Cluster
579                        boolean missing = false;
580                        for (int s = 0; s < alignedRes.size(); s++) {
581                                if (!subunitEQR.get(representative).contains(
582                                                alignedRes.get(s).get(col))) {
583                                        missing = true;
584                                        break;
585                                }
586                        }
587
588                        // Skip the column if any residue was not part of the cluster
589                        if (missing)
590                                continue;
591
592                        for (int s = 0; s < alignedRes.size(); s++) {
593                                columns.get(s).add(
594                                                subunitEQR.get(representative).indexOf(
595                                                                alignedRes.get(s).get(col)));
596                        }
597                }
598
599                // Divide the Subunits in their repeats
600                List<Subunit> newSubunits = new ArrayList<Subunit>(subunits.size()
601                                * columns.size());
602                List<List<Integer>> newSubunitEQR = new ArrayList<List<Integer>>(
603                                subunits.size() * columns.size());
604
605                for (int s = 0; s < subunits.size(); s++) {
606                        for (int r = 0; r < columns.size(); r++) {
607
608                                // Calculate start and end residues of the new Subunit
609                                int start = subunitEQR.get(s).get(columns.get(r).get(0));
610                                int end = subunitEQR.get(s).get(
611                                                columns.get(r).get(columns.get(r).size() - 1));
612
613                                Atom[] reprAtoms = Arrays.copyOfRange(subunits.get(s)
614                                                .getRepresentativeAtoms(), start, end + 1);
615
616                                newSubunits.add(new Subunit(reprAtoms, subunits.get(s)
617                                                .getName(), subunits.get(s).getIdentifier(), subunits
618                                                .get(s).getStructure()));
619
620                                // Recalculate equivalent residues
621                                List<Integer> eqr = new ArrayList<Integer>();
622                                for (int p = 0; p < columns.get(r).size(); p++) {
623                                        eqr.add(subunitEQR.get(s).get(columns.get(r).get(p))
624                                                        - start);
625                                }
626                                newSubunitEQR.add(eqr);
627                        }
628                }
629
630                subunits = newSubunits;
631                subunitEQR = newSubunitEQR;
632
633                // Update representative
634                for (int s = 0; s < subunits.size(); s++) {
635                        if (subunits.get(s).size() > subunits.get(representative).size())
636                                representative = s;
637                }
638
639                method = SubunitClustererMethod.STRUCTURE;
640                pseudoStoichiometric = true;
641                return true;
642        }
643
644        /**
645         * @return the number of Subunits in the cluster
646         */
647        public int size() {
648                return subunits.size();
649        }
650
651        /**
652         * @return the number of aligned residues between Subunits of the cluster
653         */
654        public int length() {
655                return subunitEQR.get(representative).size();
656        }
657
658        /**
659         * @return the {@link SubunitClustererMethod} used for clustering the
660         *         Subunits
661         */
662        public SubunitClustererMethod getClustererMethod() {
663                return method;
664        }
665
666        /**
667         * @return A List of size {@link #size()} of Atom arrays of length
668         *         {@link #length()} with the aligned Atoms for each Subunit in the
669         *         cluster
670         */
671        public List<Atom[]> getAlignedAtomsSubunits() {
672
673                List<Atom[]> alignedAtoms = Collections.emptyList();
674
675                // Loop through all subunits and add the aligned positions
676                for (int s = 0; s < subunits.size(); s++)
677                        alignedAtoms.add(getAlignedAtomsSubunit(s));
678
679                return alignedAtoms;
680        }
681
682        /**
683         * @param index
684         *            Subunit index in the Cluster
685         * @return An Atom array of length {@link #length()} with the aligned Atoms
686         *         from the selected Subunit in the Cluster
687         */
688        public Atom[] getAlignedAtomsSubunit(int index) {
689
690                Atom[] aligned = new Atom[subunitEQR.get(index).size()];
691
692                // Add only the aligned positions of the Subunit in the Cluster
693                for (int p = 0; p < subunitEQR.get(index).size(); p++) {
694                        aligned[p] = subunits.get(index).getRepresentativeAtoms()[subunitEQR
695                                        .get(index).get(p)];
696                }
697
698                return aligned;
699        }
700
701        /**
702         * The multiple alignment is calculated from the equivalent residues in the
703         * SubunitCluster. The alignment is recalculated every time the method is
704         * called (no caching).
705         *
706         * @return MultipleAlignment representation of the aligned residues in this
707         *         Subunit Cluster
708         * @throws StructureException
709         */
710        public MultipleAlignment getMultipleAlignment() throws StructureException {
711
712                // Create a multiple alignment with the atom arrays of the Subunits
713                MultipleAlignment msa = new MultipleAlignmentImpl();
714                msa.setEnsemble(new MultipleAlignmentEnsembleImpl());
715                msa.getEnsemble().setAtomArrays(
716                                subunits.stream().map(s -> s.getRepresentativeAtoms())
717                                                .collect(Collectors.toList()));
718
719                // Fill in the alignment information
720                BlockSet bs = new BlockSetImpl(msa);
721                Block b = new BlockImpl(bs);
722                b.setAlignRes(subunitEQR);
723
724                // Fill in the transformation matrices
725                new ReferenceSuperimposer(representative).superimpose(msa);
726
727                // Calculate some scores
728                MultipleAlignmentScorer.calculateScores(msa);
729
730                return msa;
731
732        }
733
734        @Override
735        public String toString() {
736                return "SubunitCluster [Size=" + size() + ", Length=" + length()
737                                + ", Representative=" + representative + ", Method=" + method
738                                + "]";
739        }
740
741        /**
742         * @return true if this cluster is considered pseudo-stoichiometric (i.e.,
743         *                 was either clustered by structure, or by sequence with low scores),
744         *         false otherwise.
745         */
746        public boolean isPseudoStoichiometric() {
747                return pseudoStoichiometric;
748        }
749
750}