001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.cluster;
022
023import org.biojava.nbio.alignment.Alignments;
024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
025import org.biojava.nbio.alignment.SimpleGapPenalty;
026import org.biojava.nbio.alignment.template.GapPenalty;
027import org.biojava.nbio.alignment.template.PairwiseSequenceAligner;
028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
029import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
030import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
031import org.biojava.nbio.core.sequence.ProteinSequence;
032import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.Chain;
035import org.biojava.nbio.structure.EntityInfo;
036import org.biojava.nbio.structure.Group;
037import org.biojava.nbio.structure.Structure;
038import org.biojava.nbio.structure.StructureException;
039import org.biojava.nbio.structure.align.StructureAlignment;
040import org.biojava.nbio.structure.align.StructureAlignmentFactory;
041import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams;
042import org.biojava.nbio.structure.align.model.AFPChain;
043import org.biojava.nbio.structure.align.multiple.Block;
044import org.biojava.nbio.structure.align.multiple.BlockImpl;
045import org.biojava.nbio.structure.align.multiple.BlockSet;
046import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
047import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
048import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
049import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
050import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
051import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer;
052import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
053import org.biojava.nbio.structure.symmetry.core.QuatSymmetrySubunits;
054import org.biojava.nbio.structure.symmetry.internal.CESymmParameters;
055import org.biojava.nbio.structure.symmetry.internal.CeSymm;
056import org.biojava.nbio.structure.symmetry.internal.CeSymmResult;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import java.lang.reflect.InvocationTargetException;
061import java.lang.reflect.Method;
062import java.util.*;
063import java.util.stream.Collectors;
064
065/**
066 * A SubunitCluster contains a set of equivalent {@link QuatSymmetrySubunits},
067 * the set of equivalent residues (EQR) between {@link Subunit} and a
068 * {@link Subunit} representative. It also stores the method used for
069 * clustering.
070 * <p>
071 * This class allows the comparison and merging of SubunitClusters.
072 *
073 * @author Aleix Lafita
074 * @since 5.0.0
075 *
076 */
077public class SubunitCluster {
078
079        private static final Logger logger = LoggerFactory.getLogger(SubunitCluster.class);
080
081        private List<Subunit> subunits = new ArrayList<>();
082        private List<List<Integer>> subunitEQR = new ArrayList<>();
083        private int representative = -1;
084
085        private SubunitClustererMethod method = SubunitClustererMethod.SEQUENCE;
086        private boolean pseudoStoichiometric = false;
087
088        /**
089         * A letter that is assigned to this cluster in stoichiometry.
090        */
091        private String alpha = "";
092
093        /**
094         * A letter that is assigned to this cluster in stoichiometry.
095         *
096         * @return alpha
097         *          String
098         */
099
100        public String getAlpha() {
101                return alpha;
102        }
103
104        /**
105         * A letter that is assigned to this cluster in stoichiometry.
106         *
107         * @param  alpha
108         *          String
109         */
110        public void setAlpha(String alpha) {
111                this.alpha = alpha;
112        }
113
114        /**
115         * A constructor from a single Subunit. To obtain a
116         * SubunitCluster with multiple Subunits, initialize different
117         * SubunitClusters and merge them.
118         *
119         * @param subunit
120         *            initial Subunit
121         */
122        public SubunitCluster(Subunit subunit) {
123
124                subunits.add(subunit);
125
126                List<Integer> identity = new ArrayList<>();
127                for (int i = 0; i < subunit.size(); i++)
128                        identity.add(i);
129                subunitEQR.add(identity);
130
131                representative = 0;
132        }
133
134        /**
135         * A copy constructor with the possibility of removing subunits.
136         * No re-clustering is done.
137         *
138         * @param other
139         *            reference SubunitCluster
140         * @param subunitsToRetain
141         *            which subunits to copy to this cluster
142         */
143        public SubunitCluster(SubunitCluster other, List<Integer> subunitsToRetain) {
144                method = other.method;
145                pseudoStoichiometric = other.pseudoStoichiometric;
146                for (int i = 0; i < other.subunits.size(); i++) {
147                        if(subunitsToRetain.contains(i)) {
148                                subunits.add(other.subunits.get(i));
149                                subunitEQR.add(other.subunitEQR.get(i));
150                        }
151                }
152                representative = 0;
153                for (int i=1; i<subunits.size(); i++) {
154                        if (subunits.get(i).size() > subunits.get(representative).size()) {
155                                representative = i;
156                        }
157                }
158                setAlpha(other.getAlpha());
159        }
160
161        /**
162         * Create the cluster manually by specifying subunits and the equivalent residues
163         * @param subunits List of aligned subunits
164         * @param subunitEQR Double list giving the aligned residue indices in each subunit
165         */
166        public SubunitCluster(List<Subunit> subunits, List<List<Integer>> subunitEQR) {
167                if(subunits.size() != subunitEQR.size()) {
168                        throw new IllegalArgumentException("Mismatched subunit length");
169                }
170                this.subunits = subunits;
171                this.subunitEQR = subunitEQR;
172                this.representative = 0;
173                this.method = SubunitClustererMethod.MANUAL;
174                this.pseudoStoichiometric = false;
175        }
176
177        /**
178         * Subunits contained in the SubunitCluster.
179         *
180         * @return an unmodifiable view of the original List
181         */
182        public List<Subunit> getSubunits() {
183                return Collections.unmodifiableList(subunits);
184        }
185
186        /**
187         * Tells whether the other SubunitCluster contains exactly the same Subunit.
188         * This is checked by String equality of their residue one-letter sequences.
189         *
190         * @param other
191         *            SubunitCluster
192         * @return true if the SubunitClusters are identical, false otherwise
193         */
194        public boolean isIdenticalTo(SubunitCluster other) {
195                String thisSequence = this.subunits.get(this.representative)
196                                .getProteinSequenceString();
197                String otherSequence = other.subunits.get(other.representative)
198                                .getProteinSequenceString();
199                return thisSequence.equals(otherSequence);
200        }
201
202        /**
203         * Tells whether the other SubunitCluster contains exactly the same Subunit.
204         * This is checked by equality of their entity identifiers if they are present.
205         *
206         * @param other
207         *            SubunitCluster
208         * @return true if the SubunitClusters are identical, false otherwise
209         */
210        public boolean isIdenticalByEntityIdTo(SubunitCluster other) {
211                Subunit thisSub = this.subunits.get(this.representative);
212                Subunit otherSub = other.subunits.get(other.representative);
213                String thisName = thisSub.getName();
214                String otherName = otherSub.getName();
215
216                Structure thisStruct = thisSub.getStructure();
217                Structure otherStruct = otherSub.getStructure();
218                if (thisStruct == null || otherStruct == null) {
219                        logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id",
220                                        thisName,
221                                        otherName);
222                        return false;
223                }
224                if (thisStruct != otherStruct) {
225                        // different object references: will not cluster even if entity id is same
226                        return false;
227                }
228                Chain thisChain = thisStruct.getChain(thisName);
229                Chain otherChain = otherStruct.getChain(otherName);
230                if (thisChain == null || otherChain == null) {
231                        logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
232                                        thisName,
233                                        otherName);
234                        return false;
235                }
236                if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) {
237                        logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
238                                        thisName,
239                                        otherName);
240                        return false;
241                }
242                int thisEntityId = thisChain.getEntityInfo().getMolId();
243                int otherEntityId = otherChain.getEntityInfo().getMolId();
244                return thisEntityId == otherEntityId;
245        }
246
247        /**
248         * Merges the other SubunitCluster into this one if it contains exactly the
249         * same Subunit. This is checked by {@link #isIdenticalTo(SubunitCluster)}.
250         *
251         * @param other
252         *            SubunitCluster
253         * @return true if the SubunitClusters were merged, false otherwise
254         */
255        public boolean mergeIdentical(SubunitCluster other) {
256
257                if (!isIdenticalTo(other))
258                        return false;
259
260                logger.info("SubunitClusters {}-{} are identical in sequence",
261                                this.subunits.get(this.representative).getName(),
262                                other.subunits.get(other.representative).getName());
263
264                this.subunits.addAll(other.subunits);
265                this.subunitEQR.addAll(other.subunitEQR);
266
267                return true;
268        }
269
270        /**
271         * Merges the other SubunitCluster into this one if it contains exactly the
272         * same Subunit. This is checked by comparing the entity identifiers of the subunits
273         * if one can be found.
274         * Thus this only makes sense when the subunits are complete chains of a
275         * deposited PDB entry.
276         *
277         * @param other
278         *            SubunitCluster
279         * @return true if the SubunitClusters were merged, false otherwise
280         */
281        public boolean mergeIdenticalByEntityId(SubunitCluster other) {
282
283                if (!isIdenticalByEntityIdTo(other))
284                        return false;
285
286                Subunit thisSub = this.subunits.get(this.representative);
287                Subunit otherSub = other.subunits.get(other.representative);
288                String thisName = thisSub.getName();
289                String otherName = otherSub.getName();
290
291                logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
292                                thisName,
293                                otherName);
294
295                List<Integer> thisAligned = new ArrayList<>();
296                List<Integer> otherAligned = new ArrayList<>();
297
298                // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo())
299                Structure thisStruct = thisSub.getStructure();
300                Structure otherStruct = otherSub.getStructure();
301                Chain thisChain = thisStruct.getChain(thisName);
302                Chain otherChain = otherStruct.getChain(otherName);
303                EntityInfo entityInfo = thisChain.getEntityInfo();
304
305                // Extract the aligned residues of both Subunits
306                for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) {
307
308                        Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup();
309
310                        int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);
311
312                        if (seqresIndex == -1) {
313                                // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing
314                                continue;
315                        }
316
317                        // note the seqresindex is 1-based
318                        Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
319
320                        int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
321                        if (otherIndex == -1) {
322                                // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment)
323                                continue;
324                        }
325
326                        // Only consider residues that are part of the SubunitCluster
327                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
328                                        && other.subunitEQR.get(other.representative).contains(otherIndex)) {
329                                thisAligned.add(thisIndex);
330                                otherAligned.add(otherIndex);
331                        }
332                }
333
334                if (thisAligned.size() == 0 && otherAligned.size() == 0) {
335                        logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
336                }
337
338                updateEquivResidues(other, thisAligned, otherAligned);
339
340                return true;
341        }
342
343        /**
344         * Merges the other SubunitCluster into this one if their representatives
345         * sequences are similar (according to the criteria in params).
346         * <p>
347         * The sequence alignment is performed using linear {@link SimpleGapPenalty} and
348         * BLOSUM62 as scoring matrix.
349         *
350         * @param other
351         *            SubunitCluster
352         * @param params
353         *            SubunitClustererParameters, with information whether to use local
354         *            or global alignment, sequence identity and coverage thresholds.
355         *            Threshold values lower than 0.7 are not recommended.
356         *            Use {@link #mergeStructure} for lower values.
357         * @return true if the SubunitClusters were merged, false otherwise
358         * @throws CompoundNotFoundException
359         */
360
361        public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params) throws CompoundNotFoundException {
362                PairwiseSequenceAlignerType alignerType = PairwiseSequenceAlignerType.LOCAL;
363                if (params.isUseGlobalMetrics()) {
364                        alignerType = PairwiseSequenceAlignerType.GLOBAL;
365                }
366                return mergeSequence(other, params,alignerType
367                                , new SimpleGapPenalty(),
368                                SubstitutionMatrixHelper.getBlosum62());
369        }
370
371        /**
372         * Merges the other SubunitCluster into this one if their representatives
373         * sequences are similar (according to the criteria in params).
374         * <p>
375         * The sequence alignment is performed using linear {@link SimpleGapPenalty} and
376         * BLOSUM62 as scoring matrix.
377         *
378         * @param other
379         *            SubunitCluster
380         * @param params
381         *            {@link SubunitClustererParameters}, with information whether to use local
382         *            or global alignment, sequence identity and coverage thresholds.
383         *            Threshold values lower than 0.7 are not recommended.
384         *            Use {@link #mergeStructure} for lower values.
385         * @param alignerType
386         *            parameter for the sequence alignment algorithm
387         * @param gapPenalty
388         *            parameter for the sequence alignment algorithm
389         * @param subsMatrix
390         *            parameter for the sequence alignment algorithm
391         * @return true if the SubunitClusters were merged, false otherwise
392         * @throws CompoundNotFoundException
393         */
394
395        public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters params,
396                                                                 PairwiseSequenceAlignerType alignerType,
397                                                                 GapPenalty gapPenalty,
398                                                                 SubstitutionMatrix<AminoAcidCompound> subsMatrix)
399                        throws CompoundNotFoundException {
400
401                // Extract the protein sequences as BioJava alignment objects
402                ProteinSequence thisSequence = this.subunits.get(this.representative)
403                                .getProteinSequence();
404                ProteinSequence otherSequence = other.subunits
405                                .get(other.representative).getProteinSequence();
406
407                // Perform the alignment with provided parameters
408                PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> aligner = Alignments
409                                .getPairwiseAligner(thisSequence, otherSequence, alignerType,
410                                                gapPenalty, subsMatrix);
411
412                double sequenceIdentity;
413                if(params.isUseGlobalMetrics()) {
414                        sequenceIdentity = aligner.getPair().getPercentageOfIdentity(true);
415                } else {
416                        sequenceIdentity = aligner.getPair().getPercentageOfIdentity(false);
417                }
418
419                if (sequenceIdentity < params.getSequenceIdentityThreshold())
420                        return false;
421
422                double sequenceCoverage = 0;
423                if(params.isUseSequenceCoverage()) {
424                        // Calculate real coverage (subtract gaps in both sequences)
425                        double gaps1 = aligner.getPair().getAlignedSequence(1)
426                                        .getNumGapPositions();
427                        double gaps2 = aligner.getPair().getAlignedSequence(2)
428                                        .getNumGapPositions();
429                        double lengthAlignment = aligner.getPair().getLength();
430                        double lengthThis = aligner.getQuery().getLength();
431                        double lengthOther = aligner.getTarget().getLength();
432                        sequenceCoverage = (lengthAlignment - gaps1 - gaps2)
433                                        / Math.max(lengthThis, lengthOther);
434
435                        if (sequenceCoverage < params.getSequenceCoverageThreshold())
436                                return false;
437                }
438
439                logger.info(String.format("SubunitClusters %s-%s are similar in sequence "
440                                                + "with %.2f sequence identity and %.2f coverage",
441                                this.subunits.get(this.representative).getName(),
442                                other.subunits.get(other.representative).getName(),
443                                sequenceIdentity, sequenceCoverage));
444
445                // If coverage and sequence identity sufficient, merge other and this
446                List<Integer> thisAligned = new ArrayList<>();
447                List<Integer> otherAligned = new ArrayList<>();
448
449                // Extract the aligned residues of both Subunit
450                for (int p = 1; p < aligner.getPair().getLength() + 1; p++) {
451
452                        // Skip gaps in any of the two sequences
453                        if (aligner.getPair().getAlignedSequence(1).isGap(p))
454                                continue;
455                        if (aligner.getPair().getAlignedSequence(2).isGap(p))
456                                continue;
457
458                        int thisIndex = aligner.getPair().getIndexInQueryAt(p) - 1;
459                        int otherIndex = aligner.getPair().getIndexInTargetAt(p) - 1;
460
461                        // Only consider residues that are part of the SubunitCluster
462                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
463                                        && other.subunitEQR.get(other.representative).contains(otherIndex)) {
464                                thisAligned.add(thisIndex);
465                                otherAligned.add(otherIndex);
466                        }
467                }
468
469                updateEquivResidues(other, thisAligned, otherAligned);
470
471                this.method = SubunitClustererMethod.SEQUENCE;
472                pseudoStoichiometric = !params.isHighConfidenceScores(sequenceIdentity,sequenceCoverage);
473
474                return true;
475        }
476
477        /**
478         * Merges the other SubunitCluster into this one if their representative
479         * Atoms are structurally similar (according to the criteria in params).
480         * <p>
481         *
482         * @param other
483         *            SubunitCluster
484         * @param params
485         *            {@link SubunitClustererParameters}, with information on what alignment
486         *            algorithm to use, RMSD/TMScore and structure coverage thresholds.
487         * @return true if the SubunitClusters were merged, false otherwise
488         * @throws StructureException
489         */
490
491        public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters params) throws StructureException {
492
493                StructureAlignment aligner = StructureAlignmentFactory.getAlgorithm(params.getSuperpositionAlgorithm());
494                ConfigStrucAligParams aligner_params = aligner.getParameters();
495
496                Method setOptimizeAlignment = null;
497                try {
498                        setOptimizeAlignment = aligner_params.getClass().getMethod("setOptimizeAlignment", boolean.class);
499                } catch (NoSuchMethodException e) {
500                        //alignment algorithm does not have an optimization switch, moving on
501                }
502                if (setOptimizeAlignment != null) {
503                        try {
504                                setOptimizeAlignment.invoke(aligner_params, params.isOptimizeAlignment());
505                        } catch (IllegalAccessException|InvocationTargetException e) {
506                                logger.warn("Could not set alignment optimisation switch");
507                        }
508                }
509
510                AFPChain afp = aligner.align(this.subunits.get(this.representative)
511                                .getRepresentativeAtoms(),
512                                other.subunits.get(other.representative)
513                                                .getRepresentativeAtoms());
514
515                if (afp.getOptLength() < 1) {
516                        // alignment failed (eg if chains were too short)
517                        throw new StructureException(
518                                        String.format("Subunits failed to align using %s", params.getSuperpositionAlgorithm()));
519                }
520
521                // Convert AFPChain to MultipleAlignment for convenience
522                MultipleAlignment msa = new MultipleAlignmentEnsembleImpl(
523                                afp,
524                                this.subunits.get(this.representative).getRepresentativeAtoms(),
525                                other.subunits.get(other.representative)
526                                                .getRepresentativeAtoms(), false)
527                                .getMultipleAlignment(0);
528
529                double structureCoverage = Math.min(msa.getCoverages().get(0), msa
530                                .getCoverages().get(1));
531
532                if(params.isUseStructureCoverage() && structureCoverage < params.getStructureCoverageThreshold()) {
533                        return false;
534                }
535
536                double rmsd = afp.getTotalRmsdOpt();
537                if (params.isUseRMSD() && rmsd > params.getRMSDThreshold()) {
538                        return false;
539                }
540
541                double tmScore = afp.getTMScore();
542                if (params.isUseTMScore() && tmScore < params.getTMThreshold()) {
543                        return false;
544                }
545
546                logger.info(String.format("SubunitClusters are structurally similar with "
547                                + "%.2f RMSD %.2f coverage", rmsd, structureCoverage));
548
549                // Merge clusters
550                List<List<Integer>> alignedRes = msa.getBlock(0).getAlignRes();
551                List<Integer> thisAligned = new ArrayList<>();
552                List<Integer> otherAligned = new ArrayList<>();
553
554                // Extract the aligned residues of both Subunit
555                for (int p = 0; p < msa.length(); p++) {
556
557                        // Skip gaps in any of the two sequences
558                        if (alignedRes.get(0).get(p) == null)
559                                continue;
560                        if (alignedRes.get(1).get(p) == null)
561                                continue;
562
563                        int thisIndex = alignedRes.get(0).get(p);
564                        int otherIndex = alignedRes.get(1).get(p);
565
566                        // Only consider residues that are part of the SubunitCluster
567                        if (this.subunitEQR.get(this.representative).contains(thisIndex)
568                                        && other.subunitEQR.get(other.representative).contains(
569                                                        otherIndex)) {
570                                thisAligned.add(thisIndex);
571                                otherAligned.add(otherIndex);
572                        }
573                }
574
575                updateEquivResidues(other, thisAligned, otherAligned);
576
577                this.method = SubunitClustererMethod.STRUCTURE;
578                pseudoStoichiometric = true;
579
580                return true;
581        }
582
583        private void updateEquivResidues(SubunitCluster other, List<Integer> thisAligned, List<Integer> otherAligned) {
584                // Do a List intersection to find out which EQR columns to remove
585                List<Integer> thisRemove = new ArrayList<>();
586                List<Integer> otherRemove = new ArrayList<>();
587
588                for (int t = 0; t < this.subunitEQR.get(this.representative).size(); t++) {
589                        // If the index is aligned do nothing, otherwise mark as removing
590                        if (!thisAligned.contains(this.subunitEQR.get(this.representative).get(t)))
591                                thisRemove.add(t);
592                }
593
594                for (int t = 0; t < other.subunitEQR.get(other.representative).size(); t++) {
595                        // If the index is aligned do nothing, otherwise mark as removing
596                        if (!otherAligned.contains(other.subunitEQR.get(other.representative).get(t)))
597                                otherRemove.add(t);
598                }
599                // Now remove unaligned columns, from end to start
600                Collections.sort(thisRemove);
601                Collections.reverse(thisRemove);
602                Collections.sort(otherRemove);
603                Collections.reverse(otherRemove);
604
605                for (int t = 0; t < thisRemove.size(); t++) {
606                        for (List<Integer> eqr : this.subunitEQR) {
607                                int column = thisRemove.get(t);
608                                eqr.remove(column);
609                        }
610                }
611
612                for (int t = 0; t < otherRemove.size(); t++) {
613                        for (List<Integer> eqr : other.subunitEQR) {
614                                int column = otherRemove.get(t);
615                                eqr.remove(column);
616                        }
617                }
618
619                // The representative is the longest sequence
620                if (this.subunits.get(this.representative).size() < other.subunits.get(other.representative).size())
621                        this.representative = other.representative + subunits.size();
622
623                this.subunits.addAll(other.subunits);
624                this.subunitEQR.addAll(other.subunitEQR);
625
626        }
627
628        /**
629         * Analyze the internal symmetry of the SubunitCluster and divide its
630         * {@link Subunit} into the internal repeats (domains) if they are
631         * internally symmetric.
632         *
633         * @param clusterParams {@link SubunitClustererParameters} with fields used as follows:
634         * structureCoverageThreshold
635         *            the minimum coverage of all repeats in the Subunit
636         * rmsdThreshold
637         *            the maximum allowed RMSD between the repeats
638         * minimumSequenceLength
639         *            the minimum length of the repeating units
640         * @return true if the cluster was internally symmetric, false otherwise
641         * @throws StructureException
642         */
643        public boolean divideInternally(SubunitClustererParameters clusterParams)
644                        throws StructureException {
645
646                CESymmParameters cesym_params = new CESymmParameters();
647                cesym_params.setMinCoreLength(clusterParams.getMinimumSequenceLength());
648                cesym_params.setGaps(false); // We want no gaps between the repeats
649
650                // Analyze the internal symmetry of the representative subunit
651                CeSymmResult result = CeSymm.analyze(subunits.get(representative)
652                                .getRepresentativeAtoms(), cesym_params);
653
654                if (!result.isSignificant())
655                        return false;
656
657                double rmsd = result.getMultipleAlignment().getScore(
658                                MultipleAlignmentScorer.RMSD);
659                if (rmsd > clusterParams.getRMSDThreshold())
660                        return false;
661
662                double coverage = result.getMultipleAlignment().getCoverages().get(0)
663                                * result.getNumRepeats();
664                if (coverage < clusterParams.getStructureCoverageThreshold())
665                        return false;
666
667                logger.info("SubunitCluster is internally symmetric with {} repeats, "
668                                + "{} RMSD and {} coverage", result.getNumRepeats(), rmsd,
669                                coverage);
670
671                // Divide if symmety was significant with RMSD and coverage sufficient
672                List<List<Integer>> alignedRes = result.getMultipleAlignment()
673                                .getBlock(0).getAlignRes();
674
675                List<List<Integer>> columns = new ArrayList<>();
676                for (int s = 0; s < alignedRes.size(); s++)
677                        columns.add(new ArrayList<>(alignedRes.get(s).size()));
678
679                // Extract the aligned columns of each repeat in the Subunit
680                for (int col = 0; col < alignedRes.get(0).size(); col++) {
681
682                        // Check that all aligned residues are part of the Cluster
683                        boolean missing = false;
684                        for (int s = 0; s < alignedRes.size(); s++) {
685                                if (!subunitEQR.get(representative).contains(
686                                                alignedRes.get(s).get(col))) {
687                                        missing = true;
688                                        break;
689                                }
690                        }
691
692                        // Skip the column if any residue was not part of the cluster
693                        if (missing)
694                                continue;
695
696                        for (int s = 0; s < alignedRes.size(); s++) {
697                                columns.get(s).add(
698                                                subunitEQR.get(representative).indexOf(
699                                                                alignedRes.get(s).get(col)));
700                        }
701                }
702
703                // Divide the Subunits in their repeats
704                List<Subunit> newSubunits = new ArrayList<>(subunits.size()
705                                * columns.size());
706                List<List<Integer>> newSubunitEQR = new ArrayList<>(
707                                subunits.size() * columns.size());
708
709                for (int s = 0; s < subunits.size(); s++) {
710                        for (int r = 0; r < columns.size(); r++) {
711
712                                // Calculate start and end residues of the new Subunit
713                                int start = subunitEQR.get(s).get(columns.get(r).get(0));
714                                int end = subunitEQR.get(s).get(
715                                                columns.get(r).get(columns.get(r).size() - 1));
716
717                                Atom[] reprAtoms = Arrays.copyOfRange(subunits.get(s)
718                                                .getRepresentativeAtoms(), start, end + 1);
719
720                                newSubunits.add(new Subunit(reprAtoms, subunits.get(s)
721                                                .getName(), subunits.get(s).getIdentifier(), subunits
722                                                .get(s).getStructure()));
723
724                                // Recalculate equivalent residues
725                                List<Integer> eqr = new ArrayList<>();
726                                for (int p = 0; p < columns.get(r).size(); p++) {
727                                        eqr.add(subunitEQR.get(s).get(columns.get(r).get(p))
728                                                        - start);
729                                }
730                                newSubunitEQR.add(eqr);
731                        }
732                }
733
734                subunits = newSubunits;
735                subunitEQR = newSubunitEQR;
736
737                // Update representative
738                for (int s = 0; s < subunits.size(); s++) {
739                        if (subunits.get(s).size() > subunits.get(representative).size())
740                                representative = s;
741                }
742
743                method = SubunitClustererMethod.STRUCTURE;
744                pseudoStoichiometric = true;
745                return true;
746        }
747
748        /**
749         * @return the number of Subunits in the cluster
750         */
751        public int size() {
752                return subunits.size();
753        }
754
755        /**
756         * @return the number of aligned residues between Subunits of the cluster
757         */
758        public int length() {
759                return subunitEQR.get(representative).size();
760        }
761
762        /**
763         * @return the {@link SubunitClustererMethod} used for clustering the
764         *         Subunits
765         */
766        public SubunitClustererMethod getClustererMethod() {
767                return method;
768        }
769
770        /**
771         * @return A List of size {@link #size()} of Atom arrays of length
772         *         {@link #length()} with the aligned Atoms for each Subunit in the
773         *         cluster
774         */
775        public List<Atom[]> getAlignedAtomsSubunits() {
776
777                List<Atom[]> alignedAtoms = new ArrayList<>();
778
779                // Loop through all subunits and add the aligned positions
780                for (int s = 0; s < subunits.size(); s++)
781                        alignedAtoms.add(getAlignedAtomsSubunit(s));
782
783                return alignedAtoms;
784        }
785
786        /**
787         * @param index
788         *            Subunit index in the Cluster
789         * @return An Atom array of length {@link #length()} with the aligned Atoms
790         *         from the selected Subunit in the Cluster
791         */
792        public Atom[] getAlignedAtomsSubunit(int index) {
793
794                Atom[] aligned = new Atom[subunitEQR.get(index).size()];
795
796                // Add only the aligned positions of the Subunit in the Cluster
797                for (int p = 0; p < subunitEQR.get(index).size(); p++) {
798                        aligned[p] = subunits.get(index).getRepresentativeAtoms()[subunitEQR
799                                        .get(index).get(p)];
800                }
801
802                return aligned;
803        }
804
805        /**
806         * The multiple alignment is calculated from the equivalent residues in the
807         * SubunitCluster. The alignment is recalculated every time the method is
808         * called (no caching).
809         *
810         * @return MultipleAlignment representation of the aligned residues in this
811         *         Subunit Cluster
812         * @throws StructureException
813         */
814        public MultipleAlignment getMultipleAlignment() throws StructureException {
815
816                // Create a multiple alignment with the atom arrays of the Subunits
817                MultipleAlignment msa = new MultipleAlignmentImpl();
818                msa.setEnsemble(new MultipleAlignmentEnsembleImpl());
819                msa.getEnsemble().setAtomArrays(
820                                subunits.stream().map(s -> s.getRepresentativeAtoms())
821                                                .collect(Collectors.toList()));
822
823                // Fill in the alignment information
824                BlockSet bs = new BlockSetImpl(msa);
825                Block b = new BlockImpl(bs);
826                b.setAlignRes(subunitEQR);
827
828                // Fill in the transformation matrices
829                new ReferenceSuperimposer(representative).superimpose(msa);
830
831                // Calculate some scores
832                MultipleAlignmentScorer.calculateScores(msa);
833
834                return msa;
835
836        }
837
838        @Override
839        public String toString() {
840                return "SubunitCluster [Size=" + size() + ", Length=" + length()
841                                + ", Representative=" + representative + ", Method=" + method
842                                + "]";
843        }
844
845        /**
846         * @return true if this cluster is considered pseudo-stoichiometric (i.e.,
847         *                 was either clustered by structure, or by sequence with low scores),
848         *         false otherwise.
849         */
850        public boolean isPseudoStoichiometric() {
851                return pseudoStoichiometric;
852        }
853
854}