001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.symmetry.core;
022
023import org.biojava.nbio.structure.cluster.SubunitCluster;
024import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
025
026import java.util.*;
027import java.util.function.Function;
028import java.util.stream.Collectors;
029
030/**
031 * A utility object that describes Stoichiometry (composition of a protein assembly),
032 * determined via clustering procedure {@link org.biojava.nbio.structure.cluster.SubunitClusterer},
033 * and implements human-readable representation using various strategies.
034 *
035 * @author Dmytro Guzenko
036 * @since 5.0.0
037 */
038
039public class Stoichiometry {
040
041        /**
042         * What to do when the number of {@link SubunitCluster} exceeds the length of the alphabet.
043         */
044        public enum StringOverflowStrategy {
045                /**
046                 * Put '?' symbol for every (alphabet.length+i)-th cluster
047                 */
048                QUESTIONMARK,
049                /**
050                 * Cycle through the alphabet (e.g., ...xyzABC...)
051                 */
052                CYCLE,
053                /**
054                 * Represent every cluster with two symbols from the alphabet,
055                 * this forces us to specify number of subunits for every subunit (e.g., AA1AB1AC1...).
056                 * This strategy will not work correctly if there are more than alphabet.length^2 subunit clusters.
057                 */
058                DOUBLE,
059                /**
060                 * The strategy is defined via an external function, we do not have to do anything.
061                 */
062                CUSTOM
063        }
064
065        /**
066         * Alphabet (a sequence of characters) used in this stoichiometry to construct human-readable representation.
067         */
068        private String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
069
070        /**
071         * Strategy determines how this stoichiometry will construct human-readable representation in case number
072         * of clusters exceeds number of letters in the alphabet.
073         */
074        private StringOverflowStrategy strategy = StringOverflowStrategy.CYCLE;
075
076        /**
077         * Full customisation of the string generation is supported via an external function
078         */
079        private Function<List<SubunitCluster>,String> customStringGenerator = null;
080        /**
081         * Subunit clusters that define this stoichiometry.
082         */
083        private List<SubunitCluster> orderedClusters = new ArrayList<>();
084
085        /** Prevent instantiation **/
086        @SuppressWarnings("unused")
087        private Stoichiometry() {
088        }
089
090        /**
091         * Constructor for Stoichiometry. The default strategy is CYCLE,
092         * the letters assigned for each cluster will be reset.
093         *
094         * @param clusters
095         *            List of {@link SubunitCluster} that defines assembly composition.
096         */
097        public Stoichiometry(List<SubunitCluster> clusters) {
098                this(clusters,StringOverflowStrategy.CYCLE,true);
099        }
100
101        /**
102         * Constructor for Stoichiometry. The default strategy is CYCLE.
103         *
104         * @param clusters
105         *            List of {@link SubunitCluster} that defines assembly composition.
106         * @param resetAlphas
107         *            Whether to keep alphas assigned to {@link SubunitCluster} object (useful for local symmetry detection)
108         *            or to generate them anew.
109         */
110        public Stoichiometry(List<SubunitCluster> clusters, boolean resetAlphas) {
111                this(clusters,StringOverflowStrategy.CYCLE,resetAlphas);
112        }
113
114        /**
115         * Constructor for Stoichiometry. The alphas assigned to {@link SubunitCluster} objects will be reset.
116         *
117         * @param clusters
118         *            List of {@link SubunitCluster} that defines assembly composition.
119         * @param strategy
120         *            What to do if number of {@link SubunitCluster} exceeds the alphabet length.
121         */
122        public Stoichiometry(List<SubunitCluster> clusters, StringOverflowStrategy strategy) {
123                this(clusters,strategy,true);
124        }
125
126        /**
127         * Constructor for Stoichiometry.
128         *
129         * @param clusters
130         *            List of {@link SubunitCluster} that defines assembly composition.
131         * @param strategy
132         *            What to do if number of {@link SubunitCluster} exceeds the alphabet length.
133         * @param resetAlphas
134         *            Whether to keep alphas assigned to {@link SubunitCluster} object (useful for local symmetry detection)
135         *            or to generate them anew.
136         */
137        public Stoichiometry(List<SubunitCluster> clusters, StringOverflowStrategy strategy, boolean resetAlphas) {
138                this.strategy = strategy;
139                this.orderedClusters =
140                                clusters.stream().
141                                        sorted(Comparator.
142                                                comparing(SubunitCluster::size).
143                                                reversed()).
144                                        collect(Collectors.toList());
145                if (resetAlphas) {
146                        doResetAlphas();
147                }
148        }
149
150        /**
151         * Constructor for Stoichiometry.
152         *
153         * @param clusters
154         *            List of {@link SubunitCluster} that defines assembly composition.
155         * @param customStringGenerator
156         *            A function which produces a string for a composition (list of subunit clusters).
157         */
158        public Stoichiometry(List<SubunitCluster> clusters, Function<List<SubunitCluster>,String> customStringGenerator) {
159                this(clusters,StringOverflowStrategy.CUSTOM,false);
160                this.customStringGenerator = customStringGenerator;
161        }
162
163
164        /**
165         * Reassign alpha-strings for each cluster according to the current strategy.
166         * Has no effect if custom string generator is used.
167         */
168        public void resetAlphas() {
169                doResetAlphas();
170        }
171
172        private void doResetAlphas() {
173                if(strategy == StringOverflowStrategy.CUSTOM) {
174                        return;
175                }
176                for (int i = 0; i < this.orderedClusters.size(); i++) {
177                        this.orderedClusters.get(i).setAlpha(generateAlpha(i));
178                }
179        }
180
181        /**
182         * Produce a string ("alpha") that describes each component depending on the current strategy.
183         * @param clusterInd
184         *          component index
185         * @return alphanumeric string.
186         */
187        private String generateAlpha(int clusterInd) {
188                String key;
189                int alphabetInd;
190                switch (strategy) {
191                        case CYCLE:
192                                alphabetInd = clusterInd % alphabet.length();
193                                key = alphabet.substring(alphabetInd, alphabetInd + 1);
194                                break;
195
196                        case DOUBLE:
197                                if (orderedClusters.size()>alphabet.length()) {
198                                        int alphabetInd1 = clusterInd / alphabet.length();
199                                        int alphabetInd2 = clusterInd % alphabet.length();
200                                        key = alphabet.substring(alphabetInd1, alphabetInd1 + 1);
201                                        key+=alphabet.substring(alphabetInd2, alphabetInd2 + 1);
202                                } else {
203                                        key = alphabet.substring(clusterInd, clusterInd + 1);
204                                }
205                                break;
206
207                        case QUESTIONMARK:
208                                key = "?";
209                                if(clusterInd<alphabet.length()) {
210                                        key = alphabet.substring(clusterInd, clusterInd + 1);
211                                }
212                                break;
213
214                        case CUSTOM:
215                                throw new IllegalStateException("Alphas should be handled by the custom generator function.");
216
217                        default:
218                                key = "?";
219                                if(clusterInd<alphabet.length()) {
220                                        key = alphabet.substring(clusterInd, clusterInd + 1);
221                                }
222                                break;
223                }
224                return key;
225        }
226        /**
227         * @return list of {@link SubunitCluster}, ordered by the number of subunits (decreasing).
228         */
229        public List<SubunitCluster> getClusters() {
230                return orderedClusters;
231        }
232
233        /**
234         * @return Number of distinct components in this stoichiometry.
235         */
236        public int numberOfComponents() {
237                return orderedClusters.size();
238        }
239
240        /**
241         * Make a combined Stoichiometry object of <i>this</> and the <i>other</>.
242         * The combined list of clusters will be ordered by the number of subunits.
243         * @return new {@link Stoichiometry} object.
244         */
245        public Stoichiometry combineWith(Stoichiometry other) {
246                Set<SubunitCluster> combinedClusters = new LinkedHashSet<>();
247                combinedClusters.addAll(this.orderedClusters);
248                combinedClusters.addAll(other.orderedClusters);
249
250                Stoichiometry combinedStoichiometry;
251                if (this.strategy == StringOverflowStrategy.CUSTOM) {
252                        combinedStoichiometry = new Stoichiometry(new ArrayList<>(combinedClusters),this.customStringGenerator);
253                } else {
254                        combinedStoichiometry = new Stoichiometry(new ArrayList<>(combinedClusters),this.strategy,false);
255                }
256                return combinedStoichiometry;
257        }
258
259        /**
260         * Make a Stoichiometry object that corresponds to a single component.
261         * @param i component index
262         * @return new {@link Stoichiometry} object.
263         */
264        public Stoichiometry getComponent(int i) {
265                return new Stoichiometry(Collections.singletonList(orderedClusters.get(i)),this.strategy,false);
266        }
267
268        /**
269         * @return {@link StringOverflowStrategy} used in this stoichiometry
270         *          to construct human-readable representation in case number
271         *          of clusters exceeds number of letters in the alphabet.
272         */
273        public StringOverflowStrategy getStrategy() {
274                return strategy;
275        }
276
277        /**
278         * Change string representation of a stoichiometry in case number of clusters exceeds number of letters in the alphabet.
279         * This action may invalidate alphas already assigned to the clusters.
280         * @param strategy
281         *          {@link StringOverflowStrategy} used in this stoichiometry
282         *          to construct human-readable representation in case number
283         *          of clusters exceeds number of letters in the alphabet.
284         */
285        public void setStrategy(StringOverflowStrategy strategy) {
286                if(strategy==StringOverflowStrategy.CUSTOM) {
287                        throw new IllegalArgumentException("Set this strategy by providing a function of the type Function<List<SubunitCluster>,String>.");
288                }
289
290                if(this.strategy != strategy) {
291                        this.strategy = strategy;
292                        if(orderedClusters.size()>alphabet.length())
293                                doResetAlphas();
294                }
295        }
296
297
298        /**
299         * Let a user-defined function handle the entire string representation of a stoichiometry.
300         * @param customStringGenerator
301         *          A function which accepts a list of subunit clusters and returns a string.
302         */
303        public void setCustomStringGenerator(Function<List<SubunitCluster>,String> customStringGenerator) {
304                this.strategy = StringOverflowStrategy.CUSTOM;
305                this.customStringGenerator = customStringGenerator;
306        }
307
308        /**
309         * @return Alphabet (a sequence of characters) used in this stoichiometry to construct human-readable representation.
310         */
311        public String getAlphabet() {
312                return alphabet;
313        }
314
315        /**
316         * Change alphabet used for string representation of a stoichiometry.
317         * This action invalidates alphas already assigned to the clusters.
318         * @param alphabet
319         *          a sequence of characters used in this stoichiometry to construct human-readable representation.
320         */
321        public void setAlphabet(String alphabet) {
322                this.alphabet = alphabet;
323                doResetAlphas();
324        }
325
326        /**
327         * @return Human-readable representation of this stoichiometry.
328         */
329        @Override
330        public String toString() {
331
332                if(strategy == StringOverflowStrategy.CUSTOM) {
333                        if(customStringGenerator == null) {
334                                throw new IllegalStateException("The strategy is CUSTOM, yet the string generator function is not defined.");
335                        }
336                        return customStringGenerator.apply(orderedClusters);
337                }
338
339                StringBuilder formula = new StringBuilder();
340
341                orderedClusters.forEach((SubunitCluster r) -> {
342                        formula.append(r.getAlpha());
343                        if(r.getAlpha().length()>1 || r.size()>1)
344                                formula.append(r.size());
345                });
346
347                return formula.toString();
348        }
349
350        /**
351         * A pseudostoichiometric {@link SubunitCluster} was obtained using the
352         * {@link SubunitClustererMethod#STRUCTURE} similarity,
353         * or {@link SubunitClustererMethod#SEQUENCE} similarity with low scores.
354         *
355         * @return true if any of the clusters is pseudostoichiometric, false
356         *         otherwise
357         */
358        public boolean isPseudoStoichiometric() {
359                for (SubunitCluster c : orderedClusters) {
360                        if(c.isPseudoStoichiometric())
361                                return true;
362                }
363                return false;
364        }
365
366}