001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 2013-05-23
021 *
022 */
023package org.biojava.nbio.structure.symmetry.core;
024
025import org.biojava.nbio.structure.Structure;
026import org.biojava.nbio.structure.symmetry.utils.CombinationGenerator;
027import org.jgrapht.UndirectedGraph;
028import org.jgrapht.alg.ConnectivityInspector;
029import org.jgrapht.graph.DefaultEdge;
030import org.jgrapht.graph.UndirectedSubgraph;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import javax.vecmath.Point3d;
035
036import java.math.BigInteger;
037import java.util.*;
038
039/**
040 * Detects global and local quaternary protein structure symmetry in a structure.
041 *
042 * The QuatSymmetryParameter settings affect the calculated results.
043 *
044 * @author Peter Rose
045 *
046 */
047public class QuatSymmetryDetector {
048
049        private static final Logger logger = LoggerFactory
050                        .getLogger(QuatSymmetryDetector.class);
051
052        private Structure structure = null;
053        private QuatSymmetryParameters parameters = null;
054
055        private List<QuatSymmetryResults> globalSymmetry = new ArrayList<QuatSymmetryResults>();
056        private List<List<QuatSymmetryResults>> localSymmetries = new ArrayList<List<QuatSymmetryResults>>();
057        private int proteinChainCount = 0;
058        private boolean complete = false;
059
060        public QuatSymmetryDetector(Structure structure, QuatSymmetryParameters parameters) {
061                this.structure = structure;
062                this.parameters = parameters;
063        }
064
065        /**
066         * Returns true if structure contains protein subunits. The other methods
067         * in this class will only return data if protein subunits are present.
068         * Always use this method first, before retrieving global or local symmetry
069         * results.
070         *
071         * @return true if protein subunits are present
072         */
073        public boolean hasProteinSubunits() {
074                run();
075                return proteinChainCount > 0;
076        }
077
078        /**
079         * Returns list of global quaternary structure symmetry results
080         *
081         * @return list of global quaternary structure symmetry results
082         */
083        public List<QuatSymmetryResults> getGlobalSymmetry() {
084                run();
085                return globalSymmetry;
086        }
087
088        /**
089         * Returns a list of lists of local quaternary structure symmetry results
090         *
091         * @return list of lists of local quaternary structure symmetry results
092         */
093        public List<List<QuatSymmetryResults>> getLocalSymmetries() {
094                run();
095                return localSymmetries;
096        }
097
098        private void run() {
099                if (complete) {
100                        return;
101                }
102                complete = true;
103                //Cluster chains by sequence
104                ClusterProteinChains clusterer = new ClusterProteinChains(structure, parameters);
105                proteinChainCount = clusterer.getProteinChainCount();
106
107                if (! hasProteinSubunits()) {
108                        return;
109                }
110
111                int nucleicAcidChainCount = clusterer.getNucleicAcidChainCount();
112
113                // sort seq. identity thresholds from smallest to largest. This reduces the total number of calculations necessary.
114                double[] thresholds = parameters.getSequenceIdentityThresholds().clone();
115                Arrays.sort(thresholds);
116
117                for (int index = 0; index < thresholds.length; index++) {
118                        // Map structure info to the sequence cluster
119                        ChainClusterer chainClusterer = new ChainClusterer(clusterer.getSequenceAlignmentClusters(thresholds[index]));
120
121                        // determine global symmetry
122                        Subunits globalSubunits = createGlobalSubunits(chainClusterer, nucleicAcidChainCount);
123                        QuatSymmetryResults gSymmetry = calcQuatSymmetry(globalSubunits);
124                        gSymmetry.setSequenceIdentityThreshold(thresholds[index]);
125                        globalSymmetry.add(gSymmetry);
126//                      SymmetryDeviation sd = new SymmetryDeviation(globalSubunits, gSymmetry.getRotationGroup());
127
128
129                        // determine local symmetry if global structure is
130                        // (1) asymmetric (C1)
131                        // (2) heteromeric (belongs to more than 1 sequence cluster)
132                        // (3) more than 2 chains (heteromers with just 2 chains cannot have local symmetry)
133
134                        // TODO example 2PT7: global C2, but local C6 symm., should that be included here ...?
135                        // i.e., include all heteromers here, for example if higher symmetry is possible by stoichiometry? A6B2 -> local A6  can have higher symmetry
136                        if (parameters.isLocalSymmetry() && globalSubunits.getSubunitCount() <= parameters.getMaximumLocalSubunits()) {
137                                if (gSymmetry.getSymmetry().equals("C1") && proteinChainCount > 2) {
138                                        List<QuatSymmetryResults> lSymmetry = new ArrayList<QuatSymmetryResults>();
139
140                                        long start = System.nanoTime();
141
142                                        for (Subunits subunits: createLocalSubunits(chainClusterer)) {
143                                                QuatSymmetryResults result = calcQuatSymmetry(subunits);
144                                                addToLocalSymmetry(result, lSymmetry);
145
146                                                double time = (System.nanoTime()- start)/1000000000;
147                                                if (time > parameters.getLocalTimeLimit()) {
148                                                        logger.warn("Exceeded time limit for local symmetry calculations: " + time +
149                                                                        " seconds. Quat symmetry results may be incomplete");
150                                                        break;
151                                                }
152                                        }
153                                        localSymmetries.add(lSymmetry);
154                                }
155                        }
156
157                        if (! gSymmetry.getSubunits().isPseudoStoichiometric()) {
158                                break;
159                        }
160                }
161
162
163                trimGlobalSymmetryResults();
164                trimLocalSymmetryResults();
165                setPseudoSymmetry();
166                setPreferredResults();
167        }
168
169
170        /**
171         * trims asymmetric global symmetry results that are C1 and have pseudoStoichiometry
172         */
173        private void trimGlobalSymmetryResults() {
174                for (Iterator<QuatSymmetryResults> iter = globalSymmetry.iterator(); iter.hasNext();) {
175                        QuatSymmetryResults result = iter.next();
176                        if (result.getSymmetry().equals("C1") && result.getSubunits().isPseudoStoichiometric()) {
177                                iter.remove();
178                        }
179                }
180        }
181
182        /**
183         * trims local symmetry results if any global symmetry is found. This only happens in special cases.
184         */
185        private void trimLocalSymmetryResults() {
186                boolean hasGlobalSymmetry = false;
187                for (QuatSymmetryResults result: globalSymmetry) {
188                        if (! result.getSymmetry().equals("C1")) {
189                                hasGlobalSymmetry = true;
190                                break;
191                        }
192                }
193
194                if (hasGlobalSymmetry) {
195                        localSymmetries.clear();
196                }
197        }
198
199        /**
200         * Sets pseudosymmetry flag for results that have pseudosymmetry
201         * @param thresholds sequence identity thresholds
202         */
203        private void setPseudoSymmetry() {
204                setPseudoSymmetry(globalSymmetry);
205                for (List<QuatSymmetryResults> localSymmetry: localSymmetries) {
206                        setPseudoSymmetry(localSymmetry);
207                }
208        }
209
210        /**
211         * Sets pseudosymmetry based on the analysis of all symmetry results
212         */
213        private void setPseudoSymmetry(List<QuatSymmetryResults> results) {
214                // find maximum symmetry order for cases of non-pseudostoichiometry
215                int maxOrder = 0;
216                for (QuatSymmetryResults result: results) {
217                        if (result.getRotationGroup() != null && !result.getSubunits().isPseudoStoichiometric()) {
218                                if (result.getRotationGroup().getOrder() > maxOrder) {
219                                        maxOrder = result.getRotationGroup().getOrder();
220                                }
221                        }
222                }
223
224                // if the order of symmetry for a pseudstoichiometric case is higher
225                // than the order for a non-pseudostoichiometry, set the pseudoSymmetry flag to true (i.e. 4HHB)
226                for (QuatSymmetryResults result: results) {
227                        if (result.getRotationGroup() != null) {
228                                if (result.getRotationGroup().getOrder() > maxOrder) {
229                                        result.getSubunits().setPseudoSymmetric(true);
230                                }
231                        }
232                }
233        }
234
235        /**
236         * Sets preferred results flag for symmetry result that should be shown by default in visualization programs
237         * @param thresholds sequence identity thresholds
238         */
239        private void setPreferredResults() {
240                int[] score = new int[globalSymmetry.size()];
241
242                // check global symmetry
243                for (int i = 0; i < globalSymmetry.size(); i++) {
244                        QuatSymmetryResults result = globalSymmetry.get(i);
245                        if (! result.getSymmetry().equals("C1")) {
246                                score[i] += 2;
247                        }
248                        if (! result.getSubunits().isPseudoStoichiometric()) {
249                                score[i]++;
250                        }
251                }
252
253                int bestGlobal = 0;
254                int bestScore = 0;
255                for (int i = 0; i < score.length; i++) {
256                        if (score[i] > bestScore) {
257                                bestScore = score[i];
258                                bestGlobal = i;
259                        }
260                }
261                if (bestScore >= 2) {
262                        QuatSymmetryResults g = globalSymmetry.get(bestGlobal);
263                        g.setPreferredResult(true);
264                        return;
265                }
266
267                // check local symmetry
268                score = new int[localSymmetries.size()];
269
270                for (int i = 0; i < localSymmetries.size(); i++) {
271                        List<QuatSymmetryResults> results = localSymmetries.get(i);
272                        for (QuatSymmetryResults result: results) {
273                                if (! result.getSymmetry().equals("C1")) {
274                                        score[i] += 2;
275                                }
276                                if (! result.getSubunits().isPseudoStoichiometric()) {
277                                        score[i]++;
278                                }
279                        }
280                }
281
282                int bestLocal = 0;
283                bestScore = 0;
284                for (int i = 0; i < score.length; i++) {
285                        if (score[i] > bestScore) {
286                                bestScore = score[i];
287                                bestLocal = i;
288                        }
289                }
290                if (bestScore > 0) {
291                        List<QuatSymmetryResults> results = localSymmetries.get(bestLocal);
292                        for (QuatSymmetryResults result: results) {
293                                result.setPreferredResult(true);
294                        }
295                } else {
296                        QuatSymmetryResults g = globalSymmetry.get(bestGlobal);
297                        g.setPreferredResult(true);
298                }
299        }
300
301        private void addToLocalSymmetry(QuatSymmetryResults testResults, List<QuatSymmetryResults> localSymmetry) {
302                if (testResults.getSymmetry().equals("C1")) {
303                        return;
304                }
305
306                for (QuatSymmetryResults results: localSymmetry) {
307                        if (results.getSubunits().overlaps(testResults.getSubunits())) {
308                                return;
309                        }
310                }
311                testResults.setLocal(true);
312                localSymmetry.add(testResults);
313        }
314
315        private Subunits createGlobalSubunits(ChainClusterer chainClusterer, int nucleicAcidChainCount) {
316                Subunits subunits = new Subunits(chainClusterer.getCalphaCoordinates(),
317                                chainClusterer.getSequenceClusterIds(),
318                                chainClusterer.getPseudoStoichiometry(),
319                                chainClusterer.getMinSequenceIdentity(),
320                                chainClusterer.getMaxSequenceIdentity(),
321                                chainClusterer.getFolds(),
322                                chainClusterer.getChainIds(),
323                                chainClusterer.getModelNumbers());
324                subunits.setNucleicAcidChainCount(nucleicAcidChainCount);
325                return subunits;
326        }
327
328        private List<Subunits> createLocalSubunits(ChainClusterer chainClusterer) {
329                List<Subunits> subunits = new ArrayList<Subunits>();
330                List<List<Integer>> subClusters = decomposeClusters(chainClusterer.getCalphaCoordinates(), chainClusterer.getSequenceClusterIds());
331                for (List<Integer> subCluster: subClusters) {
332                        subunits.add(createLocalSubunit(subCluster, chainClusterer));
333                }
334                return subunits;
335        }
336
337        private Subunits createLocalSubunit(List<Integer> subCluster, ChainClusterer chainClusterer) {
338              List<Point3d[]> subCalphaCoordinates = new ArrayList<Point3d[]>(subCluster.size());
339              List<Integer> subSequenceIds = new ArrayList<Integer>(subCluster.size());
340              List<Boolean> subPseudoStoichiometry = new ArrayList<Boolean>(subCluster.size());
341              List<Double> subMinSequenceIdentity = new ArrayList<Double>(subCluster.size());
342              List<Double> subMaxSequenceIdentity = new ArrayList<Double>(subCluster.size());
343              List<String> subChainIds = new ArrayList<String>(subCluster.size());
344              List<Integer> subModelNumbers = new ArrayList<Integer>(subCluster.size());
345
346              for (int index: subCluster) {
347                  subCalphaCoordinates.add(chainClusterer.getCalphaCoordinates().get(index));
348                  subSequenceIds.add(chainClusterer.getSequenceClusterIds().get(index));
349                  subPseudoStoichiometry.add(chainClusterer.getPseudoStoichiometry().get(index));
350                  subMinSequenceIdentity.add(chainClusterer.getMinSequenceIdentity().get(index));
351                  subMaxSequenceIdentity.add(chainClusterer.getMaxSequenceIdentity().get(index));
352                  subChainIds.add(chainClusterer.getChainIds().get(index));
353                  subModelNumbers.add(chainClusterer.getModelNumbers().get(index));
354              }
355
356              standardizeSequenceIds(subSequenceIds);
357
358              Integer[] array = subSequenceIds.toArray(new Integer[subSequenceIds.size()]);
359              List<Integer> subFolds = getFolds(array, subSequenceIds.size());
360              Subunits subunits = new Subunits(subCalphaCoordinates,
361                                        subSequenceIds,
362                                        subPseudoStoichiometry,
363                                        subMinSequenceIdentity,
364                                        subMaxSequenceIdentity,
365                                subFolds,
366                                        subChainIds,
367                                        subModelNumbers);
368                        return subunits;
369        }
370
371        /**
372         * Resets list of arbitrary sequence ids into integer order: 0, 1, ...
373         * @param subSequenceIds
374         */
375        private static void standardizeSequenceIds(List<Integer> subSequenceIds) {
376                int count = 0;
377              int current = subSequenceIds.get(0);
378              for (int i = 0; i < subSequenceIds.size(); i++) {
379                  if (subSequenceIds.get(i) > current) {
380                          current = subSequenceIds.get(i);
381                          count++;
382                  }
383                  subSequenceIds.set(i, count);
384              }
385        }
386
387        private List<List<Integer>> decomposeClusters(List<Point3d[]> caCoords, List<Integer> clusterIds) {
388                List<List<Integer>> subClusters = new ArrayList<List<Integer>>();
389
390                int last = getLastMultiSubunit(clusterIds);
391                List<Point3d[]> subList = caCoords;
392                if (last < caCoords.size()) {
393                        subList = caCoords.subList(0, last);
394                } else {
395                        last = caCoords.size();
396                }
397
398                SubunitGraph subunitGraph = new SubunitGraph(subList);
399                UndirectedGraph<Integer, DefaultEdge> graph = subunitGraph.getProteinGraph();
400                logger.debug("Graph: {}", graph.toString());
401
402                for (int i = last; i > 1; i--) {
403                        CombinationGenerator generator = new CombinationGenerator(last, i);
404                        int[] indices = null;
405                        Integer[] subCluster = new Integer[i];
406
407                        // avoid combinatorial explosion, i.e. for 1FNT
408                        BigInteger maxCombinations = BigInteger.valueOf(parameters.getMaximumLocalCombinations());
409                        logger.debug("Number of combinations: {}", generator.getTotal());
410                    if (generator.getTotal().compareTo(maxCombinations) > 0) {
411                        logger.warn("Number of combinations exceeds limit for biounit with {} subunits in groups of {} subunits. Will not check local symmetry for them", last, i);
412                        continue;
413                    }
414
415                        while (generator.hasNext()) {
416                                indices = generator.getNext();
417
418
419                                // only consider sub clusters that can have rotational symmetry based on the number of subunits
420                                // TODO this however may exclude a few cases of helical symmetry. Checking all combinations for
421                                // helical symmetry would be prohibitively slow.
422                                for (int j = 0; j < indices.length; j++) {
423                                        subCluster[j] = clusterIds.get(indices[j]);
424                                }
425                                List<Integer> folds = getFolds(subCluster, last);
426
427                                if (folds.size() < 2) {
428                                        continue;
429                                }
430
431                                List<Integer> subSet = new ArrayList<Integer>(indices.length);
432                                for (int index: indices) {
433                                        subSet.add(index);
434                                }
435
436                                // check if this subset of subunits interact with each other
437                                UndirectedGraph<Integer, DefaultEdge> subGraph = new UndirectedSubgraph<Integer, DefaultEdge>(graph, new HashSet<Integer>(subSet), null);
438                                if (isConnectedGraph(subGraph)) {
439                                        subClusters.add(subSet);
440                                        if (subClusters.size() > parameters.getMaximumLocalResults()) {
441                                                return subClusters;
442                                        }
443                                }
444                        }
445                }
446
447                return subClusters;
448        }
449
450        private static int getLastMultiSubunit(List<Integer> clusterIds) {
451                for (int i = 0, n = clusterIds.size(); i < n; i++) {
452                        if (i < n-2) {
453                                if (clusterIds.get(i)!=clusterIds.get(i+1) &&
454                                                clusterIds.get(i+1) != clusterIds.get(i+2)) {
455                                        return i+1;
456                                }
457                        }
458                        if (i == n-2) {
459                                if (clusterIds.get(i)!=clusterIds.get(i+1)) {
460                                        return i+1;
461                                }
462                        }
463                }
464                return clusterIds.size();
465        }
466
467        private static boolean isConnectedGraph(UndirectedGraph<Integer, DefaultEdge> graph) {
468                ConnectivityInspector<Integer, DefaultEdge> inspector = new ConnectivityInspector<Integer, DefaultEdge>(graph);
469                return inspector.isGraphConnected();
470        }
471
472        private static List<Integer> getFolds(Integer[] subCluster, int size) {
473                List<Integer> denominators = new ArrayList<Integer>();
474                int[] counts = new int[size];
475                for (int element: subCluster) {
476                        counts[element]++;
477                }
478
479                for (int d = 1; d <= subCluster.length; d++) {
480                        int count = 0;
481                        for (int i = 0; i < size; i++) {
482                                if (counts[i] > 0 && (counts[i] % d == 0)) {
483                                        count += counts[i];
484                                }
485                        }
486                        if (count == subCluster.length) {
487                                denominators.add(d);
488                        }
489                }
490
491                Collections.sort(denominators);
492                return denominators;
493        }
494
495        private QuatSymmetryResults calcQuatSymmetry(Subunits subunits){
496                return calcQuatSymmetry(subunits, parameters);
497        }
498
499        public static QuatSymmetryResults calcQuatSymmetry(Subunits subunits,
500                        QuatSymmetryParameters parameters) {
501                if (subunits.getSubunitCount() == 0) {
502                        return null;
503                }
504
505                RotationGroup rotationGroup = null;
506                String method = null;
507                if (subunits.getFolds().size() == 1) {
508                        // no symmetry possible, create empty ("C1") rotation group
509                        method = "norotation";
510                        rotationGroup =  new RotationGroup();
511                        rotationGroup.setC1(subunits.getSubunitCount());
512                } else if (subunits.getSubunitCount() == 2 && subunits.getFolds().contains(2)) {
513                        method = "C2rotation";
514                        QuatSymmetrySolver solver = new C2RotationSolver(subunits, parameters);
515                        rotationGroup = solver.getSymmetryOperations();
516                } else {
517                        method = "rotation";
518                        QuatSymmetrySolver solver = new RotationSolver(subunits, parameters);
519                        rotationGroup = solver.getSymmetryOperations();
520                }
521
522                QuatSymmetryResults results = new QuatSymmetryResults(subunits, rotationGroup, method);
523
524                // asymmetric structures cannot be pseudosymmetric
525                String symmetry = results.getSymmetry();
526                if (symmetry.equals("C1")) {
527                        subunits.setPseudoSymmetric(false);
528                }
529
530                // Check structures with Cn symmetry (n = 1, ...) for helical symmetry
531                if (symmetry.startsWith("C")) {
532                        HelixSolver hc = new HelixSolver(subunits, rotationGroup.getOrder(), parameters);
533                        HelixLayers helixLayers = hc.getSymmetryOperations();
534
535                        if (helixLayers.size() > 0) {
536                                // if the complex has no symmetry (c1) or has Cn (n>=2) symmetry and the
537                                // helical symmetry has a lower RMSD than the cyclic symmetry, set helical symmetry
538                                // If the RMSD for helical and cyclic symmetry is similar, a slight preference is
539                                // given to the helical symmetry by the helixRmsdThreshold parameter.
540                                double cRmsd = rotationGroup.getScores().getRmsd();
541                                double hRmsd = helixLayers.getScores().getRmsd();
542//                              System.out.println("cRMSD: " + cRmsd + " hRMSD: " + hRmsd);
543                                double deltaRmsd = hRmsd - cRmsd;
544                                if (symmetry.equals("C1") ||
545                                                (!symmetry.equals("C1") && deltaRmsd <= parameters.getHelixRmsdThreshold())) {
546                                        method = "rottranslation";
547                                        results = new QuatSymmetryResults(subunits, helixLayers, method);
548                                }
549                        }
550                }
551
552                return results;
553        }
554}