001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on July 1, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.alignment;
025
026import org.biojava.nbio.core.alignment.SimpleProfile;
027import org.biojava.nbio.alignment.template.GuideTreeNode;
028import org.biojava.nbio.alignment.template.PairwiseSequenceScorer;
029import org.biojava.nbio.core.alignment.template.Profile;
030import org.biojava.nbio.core.alignment.template.ProfilePair;
031import org.biojava.nbio.core.sequence.AccessionID;
032import org.biojava.nbio.core.sequence.template.Compound;
033import org.biojava.nbio.core.sequence.template.Sequence;
034import org.biojava.nbio.phylo.ForesterWrapper;
035import org.biojava.nbio.phylo.TreeConstructor;
036import org.biojava.nbio.phylo.TreeConstructorType;
037import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
038import org.forester.phylogeny.Phylogeny;
039import org.forester.phylogeny.PhylogenyNode;
040
041import javax.swing.tree.TreeNode;
042
043import java.util.*;
044import java.util.concurrent.Future;
045
046/**
047 * Implements a data structure for a guide tree used during progressive multiple sequence alignment.  Leaf
048 * {@link Node}s correspond to single {@link Sequence}s.  Internal {@link Node}s correspond to multiple sequence
049 * alignments.  The root {@link Node} corresponds to the full multiple sequence alignment.
050 *
051 * @author Mark Chapman
052 * @param <S> each {@link Sequence} in the tree is of type S
053 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
054 */
055public class GuideTree<S extends Sequence<C>, C extends Compound> implements Iterable<GuideTreeNode<S, C>> {
056
057        private List<S> sequences;
058        private List<PairwiseSequenceScorer<S, C>> scorers;
059        private BasicSymmetricalDistanceMatrix distances;
060        private String newick;
061        private Node root;
062
063        /**
064         * Creates a guide tree for use during progressive multiple sequence alignment.
065         *
066         * @param sequences the {@link List} of {@link Sequence}s to align
067         * @param scorers list of sequence pair scorers, one for each pair of sequences given
068         */
069        public GuideTree(List<S> sequences, List<PairwiseSequenceScorer<S, C>> scorers) {
070                this.sequences = Collections.unmodifiableList(sequences);
071                this.scorers = Collections.unmodifiableList(scorers);
072                distances = new BasicSymmetricalDistanceMatrix(sequences.size());
073                for (int i = 0, n = 0; i < sequences.size(); i++) {
074                        AccessionID id = sequences.get(i).getAccession();
075                        String str = (id == null) ? Integer.toString(i + 1) : id.getID();
076                        distances.setIdentifier(i, str);
077                        for (int j = i+1; j < sequences.size(); j++) {
078                                double dist = scorers.get(n++).getDistance();
079                                distances.setValue(i, j, dist);
080                        }
081                }
082                BasicSymmetricalDistanceMatrix distclone = ForesterWrapper.cloneDM(distances);
083                Phylogeny phylogeny = TreeConstructor.distanceTree(distclone, TreeConstructorType.NJ);
084                newick = phylogeny.toString();
085                root = new Node(phylogeny.getRoot(), null);
086        }
087
088        /**
089         * Returns a sequence pair score for all {@link Sequence} pairs in the given {@link List}.
090         *
091         * @return list of sequence pair scores
092         */
093        public double[] getAllPairsScores() {
094                double[] scores = new double[scorers.size()];
095                int n = 0;
096                for (PairwiseSequenceScorer<S, C> scorer : scorers) {
097                        scores[n++] = scorer.getScore();
098                }
099                return scores;
100        }
101
102        /**
103         * Returns the distance matrix used to construct this guide tree.  The scores have been normalized.
104         *
105         * @return the distance matrix used to construct this guide tree
106         */
107        public double[][] getDistanceMatrix() {
108                double[][] matrix = new double[distances.getSize()][distances.getSize()];
109                for (int i = 0; i < matrix.length; i++) {
110                        for (int j = i+1; j < matrix.length; j++) {
111                                matrix[i][j] = matrix[j][i] = distances.getValue(i, j);
112                        }
113                }
114                return matrix;
115        }
116
117        /**
118         * Returns the root {@link Node} which corresponds to the full multiple sequence alignment.
119         *
120         * @return the root node
121         */
122        public Node getRoot() {
123                return root;
124        }
125
126        /**
127         * Returns the similarity matrix used to construct this guide tree.  The scores have not been normalized.
128         *
129         * @return the similarity matrix used to construct this guide tree
130         */
131        public double[][] getScoreMatrix() {
132                double[][] matrix = new double[sequences.size()][sequences.size()];
133                for (int i = 0, n = 0; i < matrix.length; i++) {
134                        matrix[i][i] = scorers.get(i).getMaxScore();
135                        for (int j = i+1; j < matrix.length; j++) {
136                                matrix[i][j] = matrix[j][i] = scorers.get(n++).getScore();
137                        }
138                }
139                return matrix;
140        }
141
142        /**
143         * Returns the {@link Sequence}s which make up the leaves of this tree.
144         *
145         * @return the sequences which make up the leaves of this tree
146         */
147        public List<S> getSequences() {
148                return sequences;
149        }
150
151        // method for Iterable
152
153        /**
154         * Returns a post-order {@link Iterator} that traverses the tree from leaves to root.
155         */
156        @Override
157        public Iterator<GuideTreeNode<S, C>> iterator() {
158                return new PostOrderIterator();
159        }
160
161        // method from Object
162
163        @Override
164        public String toString() {
165                return newick;
166        }
167
168        /**
169         * Implements a data structure for the node in a guide tree used during progressive multiple sequence alignment.
170         */
171        public class Node implements GuideTreeNode<S, C> {
172
173                private GuideTreeNode<S, C> parent, child1, child2;
174                private double distance;
175                private String name;
176                private boolean isLeaf, isVisited;
177                private Profile<S, C> profile;
178                private Future<ProfilePair<S, C>> profileFuture;
179
180                private Node(PhylogenyNode node, Node parent) {
181                        this.parent = parent;
182                        distance = node.getDistanceToParent();
183                        name = node.getName();
184                        if(isLeaf = node.isExternal()) {
185                                profile = new SimpleProfile<>(sequences.get(distances.getIndex(name)));
186                        } else {
187                                child1 = new Node(node.getChildNode1(), this);
188                                child2 = new Node(node.getChildNode2(), this);
189                        }
190                }
191
192                // methods for GuideTreeNode
193
194                @Override
195                public GuideTreeNode<S, C> getChild1() {
196                        return child1;
197                }
198
199                @Override
200                public GuideTreeNode<S, C> getChild2() {
201                        return child2;
202                }
203
204                @Override
205                public double getDistanceToParent() {
206                        return distance;
207                }
208
209                @Override
210                public String getName() {
211                        return name;
212                }
213
214                @Override
215                public Profile<S, C> getProfile() {
216                        return profile;
217                }
218
219                @Override
220                public Future<ProfilePair<S, C>> getProfileFuture() {
221                        return profileFuture;
222                }
223
224                @Override
225                public void setProfile(Profile<S, C> profile) {
226                        this.profile = profile;
227                        profileFuture = null;
228                }
229
230                @Override
231                public void setProfileFuture(Future<ProfilePair<S, C>> profileFuture) {
232                        this.profileFuture = profileFuture;
233                        profile = null;
234                }
235
236                // methods for TreeNode
237
238                @Override
239                public Enumeration<GuideTreeNode<S, C>> children() {
240                        Vector<GuideTreeNode<S, C>> children = new Vector<>();
241                        children.add(getChild1());
242                        children.add(getChild2());
243                        return children.elements();
244                }
245
246                @Override
247                public boolean getAllowsChildren() {
248                        return !isLeaf();
249                }
250
251                @Override
252                public GuideTreeNode<S, C> getChildAt(int childIndex) {
253                        if (childIndex == 1) {
254                                return getChild1();
255                        } else if (childIndex == 2) {
256                                return getChild2();
257                        }
258                        throw new IndexOutOfBoundsException();
259                }
260
261                @Override
262                public int getChildCount() {
263                        return 2;
264                }
265
266                @Override
267                public int getIndex(TreeNode child) {
268                        return getChildAt(1) == child ? 1 : (getChildAt(2) == child ? 2 : -1);
269                }
270
271                @Override
272                public GuideTreeNode<S, C> getParent() {
273                        return parent;
274                }
275
276                @Override
277                public boolean isLeaf() {
278                        return isLeaf;
279                }
280
281                // helper methods for iterator
282
283                private void clearVisited() {
284                        isVisited = false;
285                        if (!isLeaf()) {
286                                ((Node) getChild1()).clearVisited();
287                                ((Node) getChild2()).clearVisited();
288                        }
289                }
290
291                private boolean isVisited() {
292                        return isVisited;
293                }
294
295                private void visit() {
296                        isVisited = true;
297                }
298
299        }
300
301        // helper class that defines the default post-order (leaves to root) traversal
302        private class PostOrderIterator implements Iterator<GuideTreeNode<S, C>> {
303
304                private Stack<Node> nodes;
305
306                private PostOrderIterator() {
307                        getRoot().clearVisited();
308                        nodes = new Stack<>();
309                        nodes.push(getRoot());
310                }
311
312                // methods for Iterator
313
314                @Override
315                public boolean hasNext() {
316                        return !nodes.isEmpty();
317                }
318
319                @Override
320                public GuideTreeNode<S, C> next() {
321                        if(!hasNext()){
322                                throw new NoSuchElementException();
323                        }
324
325                        while (hasNext()) {
326                                Node next = nodes.peek(), child1 = (Node) next.getChild1(), child2 = (Node) next.getChild2();
327                                if (child1 != null && !child1.isVisited()) {
328                                        nodes.push(child1);
329                                } else if (child2 != null && !child2.isVisited()) {
330                                        nodes.push(child2);
331                                } else {
332                                        next.visit();
333                                        return nodes.pop();
334                                }
335                        }
336                        return null;
337                }
338
339                @Override
340                public void remove() {
341                        throw new UnsupportedOperationException();
342                }
343
344        }
345
346}