001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.cluster;
022
023import java.util.ArrayList;
024import java.util.List;
025
026import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
027import org.biojava.nbio.structure.Structure;
028import org.biojava.nbio.structure.StructureException;
029import org.biojava.nbio.structure.symmetry.core.Stoichiometry;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033/**
034 * The SubunitClusterer takes as input a collection of {@link Subunit} and
035 * returns a collection of {@link SubunitCluster}.
036 *
037 * @author Aleix Lafita
038 * @since 5.0.0
039 *
040 */
041public class SubunitClusterer {
042
043        private static final Logger logger = LoggerFactory
044                        .getLogger(SubunitClusterer.class);
045
046        /** Prevent instantiation **/
047        private SubunitClusterer() {
048        }
049
050        public static Stoichiometry cluster(Structure structure,
051                        SubunitClustererParameters params) {
052                List<Subunit> subunits = SubunitExtractor.extractSubunits(structure,
053                                params.getAbsoluteMinimumSequenceLength(),
054                                params.getMinimumSequenceLengthFraction(),
055                                params.getMinimumSequenceLength());
056                return cluster(subunits, params);
057        }
058
059        public static Stoichiometry cluster(List<Subunit> subunits, SubunitClustererParameters params) {
060                List<SubunitCluster> clusters = new ArrayList<>();
061                if (subunits.size() == 0)
062                        return new Stoichiometry(clusters);
063
064                // First generate a new cluster for each Subunit
065                for (Subunit s : subunits)
066                        clusters.add(new SubunitCluster(s));
067
068                if (params.getClustererMethod() == SubunitClustererMethod.SEQUENCE ||
069                                params.getClustererMethod() == SubunitClustererMethod.SEQUENCE_STRUCTURE) {
070                        // Now merge clusters by SEQUENCE
071                        for (int c1 = 0; c1 < clusters.size(); c1++) {
072                                for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
073                                        try {
074                                                if (params.isUseEntityIdForSeqIdentityDetermination() &&
075                                                                clusters.get(c1).mergeIdenticalByEntityId(clusters.get(c2))) {
076                                                        // This we will only do if the switch is for entity id comparison is on.
077                                                        // In some cases it can save enormous amounts of time, e.g. for clustering full
078                                                        // chains of deposited PDB entries. For instance for 6NHJ: with pure alignments it
079                                                        // takes ~ 6 hours, with entity id comparisons it takes 2 minutes.
080                                                        clusters.remove(c2);
081                                                } else if (clusters.get(c1).mergeSequence(clusters.get(c2), params)) {
082                                                        clusters.remove(c2);
083                                                }
084
085                                        } catch (CompoundNotFoundException e) {
086                                                logger.warn("Could not merge by Sequence. {}",
087                                                                e.getMessage());
088                                        }
089                                }
090                        }
091                }
092
093                if (params.getClustererMethod() == SubunitClustererMethod.STRUCTURE ||
094                                params.getClustererMethod() == SubunitClustererMethod.SEQUENCE_STRUCTURE) {
095                        // Now merge clusters by STRUCTURE
096                        for (int c1 = 0; c1 < clusters.size(); c1++) {
097                                for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
098                                        try {
099                                                if (clusters.get(c1).mergeStructure(clusters.get(c2), params)) {
100                                                        clusters.remove(c2);
101                                                }
102                                        } catch (StructureException e) {
103                                                logger.warn("Could not merge by Structure. {}", e.getMessage());
104                                        }
105                                }
106                        }
107                }
108
109                if (params.isInternalSymmetry()) {
110                        // Now divide clusters by their INTERNAL SYMMETRY
111                        for (int c = 0; c < clusters.size(); c++) {
112                                try {
113                                        clusters.get(c).divideInternally(params);
114                                } catch (StructureException e) {
115                                        logger.warn("Error analyzing internal symmetry. {}",
116                                                        e.getMessage());
117                                }
118                        }
119
120                        // After internal symmetry merge again by structural similarity
121                        // Use case: C8 propeller with 3 chains with 3+3+2 repeats each
122                        for (int c1 = 0; c1 < clusters.size(); c1++) {
123                                for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
124                                        try {
125                                                if (clusters.get(c1).mergeStructure(clusters.get(c2), params))
126                                                        clusters.remove(c2);
127                                        } catch (StructureException e) {
128                                                logger.warn("Could not merge by Structure. {}",
129                                                                e.getMessage());
130                                        }
131                                }
132                        }
133                }
134
135                return new Stoichiometry(clusters);
136        }
137}