001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.symmetry.core;
022
023import java.util.*;
024
025/**
026 * Merges clusters based on their sequence identity. This class does the actual
027 * agglomerative clustering calculation, while {@link SequenceAlignmentCluster}
028 * stores the results.
029 */
030public class ClusterMerger {
031        private List<SequenceAlignmentCluster> clusters = null;
032        private QuatSymmetryParameters parameters = null;
033
034        List<PairwiseAlignment> pairwiseAlignments = Collections.emptyList();
035
036        public ClusterMerger(List<SequenceAlignmentCluster> clusters, QuatSymmetryParameters parameters) {
037                this.clusters = clusters;
038                this.parameters = parameters;
039        }
040
041        /**
042         * Aligns all pairs of input clusters, calculating their pairwise alignments
043         */
044        public void calcPairwiseAlignments() {
045                pairwiseAlignments = new ArrayList<PairwiseAlignment>();
046
047                boolean[] merged = new boolean[clusters.size()];
048                Arrays.fill(merged, false);
049
050                for (int i = 0, n = clusters.size(); i < n-1; i++) {
051                        if (! merged[i]) {
052                                SequenceAlignmentCluster c1 = clusters.get(i);
053                                for (int j = i + 1; j < n; j++) {
054                                        SequenceAlignmentCluster c2 = clusters.get(j);
055                                        PairwiseAlignment alignment = c1.getPairwiseAlignment(c2);
056                                        if (alignment != null &&
057                                                        alignment.getAlignmentLengthFraction() >= parameters.getAlignmentFractionThreshold() &&
058                                                        alignment.getRmsd() <= parameters.getRmsdThreshold()) {
059                                                merged[j] = true;
060                                                pairwiseAlignments.add(alignment);
061                                                if (parameters.isVerbose()) {
062                                                        System.out.println("ClusterMerger: pairwise cluster alignment: " + i + "-" + j + " seq. identity: " + alignment.getSequenceIdentity());
063                                                        System.out.println(alignment);
064                                                }
065                                        }
066                                }
067                        }
068                }
069        }
070
071        /**
072         * Combine clusters based on the given sequence identity
073         * @param sequenceIdentityCutoff
074         * @return
075         */
076        public List<SequenceAlignmentCluster> getMergedClusters(double sequenceIdentityCutoff) {
077                List<SequenceAlignmentCluster> mergedClusters = new ArrayList<SequenceAlignmentCluster>();
078                Map<SequenceAlignmentCluster, Integer> map = getClusterMap();
079
080                boolean[] processed = new boolean[clusters.size()];
081                Arrays.fill(processed, false);
082
083                for (int i = 0, n = clusters.size(); i < n; i++) {
084                        SequenceAlignmentCluster cluster = clusters.get(i);
085                        SequenceAlignmentCluster clone = null;
086                        if (! processed[i]) {
087                                clone = (SequenceAlignmentCluster) cluster.clone();
088                                mergedClusters.add(clone);
089                                processed[i] = true;
090                        }
091
092                        for (PairwiseAlignment alignment: pairwiseAlignments) {
093                                if (alignment.getCluster1() == cluster && alignment.getSequenceIdentity() >= sequenceIdentityCutoff) {
094                                        clone.setMinSequenceIdentity(Math.min(clone.getMinSequenceIdentity(), alignment.getSequenceIdentity()));
095                                        clone.setMaxSequenceIdentity(Math.max(clone.getMaxSequenceIdentity(), alignment.getSequenceIdentity()));
096                                        combineClusters(clone, alignment);
097                                        int index = map.get(alignment.getCluster2());
098                                        processed[index] = true;
099                                }
100                        }
101                }
102
103                ProteinSequenceClusterer.sortSequenceClustersBySize(mergedClusters);
104                return mergedClusters;
105        }
106
107
108        private Map<SequenceAlignmentCluster, Integer> getClusterMap() {
109                 Map<SequenceAlignmentCluster, Integer> map = new HashMap<SequenceAlignmentCluster, Integer>();
110                 for (int i = 0, n = clusters.size(); i < n; i++) {
111                         map.put(clusters.get(i), i);
112                 }
113                 return map;
114        }
115
116        private void combineClusters(SequenceAlignmentCluster c1, PairwiseAlignment alignment) {
117                SequenceAlignmentCluster c2 = (SequenceAlignmentCluster) alignment.getCluster2().clone();
118                int[][][] align = alignment.getAlignment();
119
120                // add alignment for reference sequence
121                UniqueSequenceList u =c2.getUniqueSequenceList().get(0);
122                // set new sequence alignment
123                List<Integer> align1 = new ArrayList<Integer>(align[0][0].length);
124                for (Integer a1: align[0][0]) {
125                        align1.add(a1);
126                }
127                u.setAlignment1(align1);
128
129                List<Integer> align2 = new ArrayList<Integer>(align[0][1].length);
130                for (Integer a2: align[0][1]) {
131                        align2.add(a2);
132                }
133                u.setAlignment2(align2);
134                c1.addUniqueSequenceList(u);
135
136                // note, i starts at 1 (ONE), since i = 0 corresponds to reference sequence,
137                // which has already been processed above
138                for (int i = 1; i < c2.getUniqueSequenceList().size(); i++) {
139                        u =c2.getUniqueSequenceList().get(i);
140                        List<Integer> oldAlign1 = u.getAlignment1();
141                        List<Integer> oldAlign2 = u.getAlignment2();
142                        List<Integer> newAlign1 = new ArrayList<Integer>();
143                        List<Integer> newAlign2 = new ArrayList<Integer>();
144                        for (int j = 0; j < align2.size(); j++) {
145                                Integer element = align2.get(j);
146                                Integer index = oldAlign1.indexOf(element);
147                                // map alignment to first reference alignment
148                                if (index != null && index >= 0) {
149                                        newAlign1.add(align1.get(j));
150                                        newAlign2.add(oldAlign2.get(index));
151                                }
152                        }
153                        u.setAlignment1(newAlign1);
154                        u.setAlignment2(newAlign2);
155                        c1.addUniqueSequenceList(u);
156                }
157        }
158}