001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on July 1, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.alignment; 025 026import org.biojava.nbio.core.alignment.SimpleProfile; 027import org.biojava.nbio.alignment.template.GuideTreeNode; 028import org.biojava.nbio.alignment.template.PairwiseSequenceScorer; 029import org.biojava.nbio.core.alignment.template.Profile; 030import org.biojava.nbio.core.alignment.template.ProfilePair; 031import org.biojava.nbio.core.sequence.AccessionID; 032import org.biojava.nbio.core.sequence.template.Compound; 033import org.biojava.nbio.core.sequence.template.Sequence; 034import org.biojava.nbio.phylo.ForesterWrapper; 035import org.biojava.nbio.phylo.TreeConstructor; 036import org.biojava.nbio.phylo.TreeConstructorType; 037import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; 038import org.forester.phylogeny.Phylogeny; 039import org.forester.phylogeny.PhylogenyNode; 040 041import javax.swing.tree.TreeNode; 042 043import java.util.*; 044import java.util.concurrent.Future; 045 046/** 047 * Implements a data structure for a guide tree used during progressive multiple sequence alignment. Leaf 048 * {@link Node}s correspond to single {@link Sequence}s. Internal {@link Node}s correspond to multiple sequence 049 * alignments. The root {@link Node} corresponds to the full multiple sequence alignment. 050 * 051 * @author Mark Chapman 052 * @param <S> each {@link Sequence} in the tree is of type S 053 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C 054 */ 055public class GuideTree<S extends Sequence<C>, C extends Compound> implements Iterable<GuideTreeNode<S, C>> { 056 057 private List<S> sequences; 058 private List<PairwiseSequenceScorer<S, C>> scorers; 059 private BasicSymmetricalDistanceMatrix distances; 060 private String newick; 061 private Node root; 062 063 /** 064 * Creates a guide tree for use during progressive multiple sequence alignment. 065 * 066 * @param sequences the {@link List} of {@link Sequence}s to align 067 * @param scorers list of sequence pair scorers, one for each pair of sequences given 068 */ 069 public GuideTree(List<S> sequences, List<PairwiseSequenceScorer<S, C>> scorers) { 070 this.sequences = Collections.unmodifiableList(sequences); 071 this.scorers = Collections.unmodifiableList(scorers); 072 distances = new BasicSymmetricalDistanceMatrix(sequences.size()); 073 for (int i = 0, n = 0; i < sequences.size(); i++) { 074 AccessionID id = sequences.get(i).getAccession(); 075 String str = (id == null) ? Integer.toString(i + 1) : id.getID(); 076 distances.setIdentifier(i, str); 077 for (int j = i+1; j < sequences.size(); j++) { 078 double dist = scorers.get(n++).getDistance(); 079 distances.setValue(i, j, dist); 080 } 081 } 082 BasicSymmetricalDistanceMatrix distclone = ForesterWrapper.cloneDM(distances); 083 Phylogeny phylogeny = TreeConstructor.distanceTree(distclone, TreeConstructorType.NJ); 084 newick = phylogeny.toString(); 085 root = new Node(phylogeny.getRoot(), null); 086 } 087 088 /** 089 * Returns a sequence pair score for all {@link Sequence} pairs in the given {@link List}. 090 * 091 * @return list of sequence pair scores 092 */ 093 public double[] getAllPairsScores() { 094 double[] scores = new double[scorers.size()]; 095 int n = 0; 096 for (PairwiseSequenceScorer<S, C> scorer : scorers) { 097 scores[n++] = scorer.getScore(); 098 } 099 return scores; 100 } 101 102 /** 103 * Returns the distance matrix used to construct this guide tree. The scores have been normalized. 104 * 105 * @return the distance matrix used to construct this guide tree 106 */ 107 public double[][] getDistanceMatrix() { 108 double[][] matrix = new double[distances.getSize()][distances.getSize()]; 109 for (int i = 0; i < matrix.length; i++) { 110 for (int j = i+1; j < matrix.length; j++) { 111 matrix[i][j] = matrix[j][i] = distances.getValue(i, j); 112 } 113 } 114 return matrix; 115 } 116 117 /** 118 * Returns the root {@link Node} which corresponds to the full multiple sequence alignment. 119 * 120 * @return the root node 121 */ 122 public Node getRoot() { 123 return root; 124 } 125 126 /** 127 * Returns the similarity matrix used to construct this guide tree. The scores have not been normalized. 128 * 129 * @return the similarity matrix used to construct this guide tree 130 */ 131 public double[][] getScoreMatrix() { 132 double[][] matrix = new double[sequences.size()][sequences.size()]; 133 for (int i = 0, n = 0; i < matrix.length; i++) { 134 matrix[i][i] = scorers.get(i).getMaxScore(); 135 for (int j = i+1; j < matrix.length; j++) { 136 matrix[i][j] = matrix[j][i] = scorers.get(n++).getScore(); 137 } 138 } 139 return matrix; 140 } 141 142 /** 143 * Returns the {@link Sequence}s which make up the leaves of this tree. 144 * 145 * @return the sequences which make up the leaves of this tree 146 */ 147 public List<S> getSequences() { 148 return sequences; 149 } 150 151 // method for Iterable 152 153 /** 154 * Returns a post-order {@link Iterator} that traverses the tree from leaves to root. 155 */ 156 @Override 157 public Iterator<GuideTreeNode<S, C>> iterator() { 158 return new PostOrderIterator(); 159 } 160 161 // method from Object 162 163 @Override 164 public String toString() { 165 return newick; 166 } 167 168 /** 169 * Implements a data structure for the node in a guide tree used during progressive multiple sequence alignment. 170 */ 171 public class Node implements GuideTreeNode<S, C> { 172 173 private GuideTreeNode<S, C> parent, child1, child2; 174 private double distance; 175 private String name; 176 private boolean isLeaf, isVisited; 177 private Profile<S, C> profile; 178 private Future<ProfilePair<S, C>> profileFuture; 179 180 private Node(PhylogenyNode node, Node parent) { 181 this.parent = parent; 182 distance = node.getDistanceToParent(); 183 name = node.getName(); 184 if(isLeaf = node.isExternal()) { 185 profile = new SimpleProfile<S, C>(sequences.get(distances.getIndex(name))); 186 } else { 187 child1 = new Node(node.getChildNode1(), this); 188 child2 = new Node(node.getChildNode2(), this); 189 } 190 } 191 192 // methods for GuideTreeNode 193 194 @Override 195 public GuideTreeNode<S, C> getChild1() { 196 return child1; 197 } 198 199 @Override 200 public GuideTreeNode<S, C> getChild2() { 201 return child2; 202 } 203 204 @Override 205 public double getDistanceToParent() { 206 return distance; 207 } 208 209 @Override 210 public String getName() { 211 return name; 212 } 213 214 @Override 215 public Profile<S, C> getProfile() { 216 return profile; 217 } 218 219 @Override 220 public Future<ProfilePair<S, C>> getProfileFuture() { 221 return profileFuture; 222 } 223 224 @Override 225 public void setProfile(Profile<S, C> profile) { 226 this.profile = profile; 227 profileFuture = null; 228 } 229 230 @Override 231 public void setProfileFuture(Future<ProfilePair<S, C>> profileFuture) { 232 this.profileFuture = profileFuture; 233 profile = null; 234 } 235 236 // methods for TreeNode 237 238 @Override 239 public Enumeration<GuideTreeNode<S, C>> children() { 240 Vector<GuideTreeNode<S, C>> children = new Vector<GuideTreeNode<S, C>>(); 241 children.add(getChild1()); 242 children.add(getChild2()); 243 return children.elements(); 244 } 245 246 @Override 247 public boolean getAllowsChildren() { 248 return !isLeaf(); 249 } 250 251 @Override 252 public GuideTreeNode<S, C> getChildAt(int childIndex) { 253 if (childIndex == 1) { 254 return getChild1(); 255 } else if (childIndex == 2) { 256 return getChild2(); 257 } 258 throw new IndexOutOfBoundsException(); 259 } 260 261 @Override 262 public int getChildCount() { 263 return 2; 264 } 265 266 @Override 267 public int getIndex(TreeNode child) { 268 return getChildAt(1) == child ? 1 : (getChildAt(2) == child ? 2 : -1); 269 } 270 271 @Override 272 public GuideTreeNode<S, C> getParent() { 273 return parent; 274 } 275 276 @Override 277 public boolean isLeaf() { 278 return isLeaf; 279 } 280 281 // helper methods for iterator 282 283 private void clearVisited() { 284 isVisited = false; 285 if (!isLeaf()) { 286 ((Node) getChild1()).clearVisited(); 287 ((Node) getChild2()).clearVisited(); 288 } 289 } 290 291 private boolean isVisited() { 292 return isVisited; 293 } 294 295 private void visit() { 296 isVisited = true; 297 } 298 299 } 300 301 // helper class that defines the default post-order (leaves to root) traversal 302 private class PostOrderIterator implements Iterator<GuideTreeNode<S, C>> { 303 304 private Stack<Node> nodes; 305 306 private PostOrderIterator() { 307 getRoot().clearVisited(); 308 nodes = new Stack<Node>(); 309 nodes.push(getRoot()); 310 } 311 312 // methods for Iterator 313 314 @Override 315 public boolean hasNext() { 316 return !nodes.isEmpty(); 317 } 318 319 @Override 320 public GuideTreeNode<S, C> next() { 321 if(!hasNext()){ 322 throw new NoSuchElementException(); 323 } 324 325 while (hasNext()) { 326 Node next = nodes.peek(), child1 = (Node) next.getChild1(), child2 = (Node) next.getChild2(); 327 if (child1 != null && !child1.isVisited()) { 328 nodes.push(child1); 329 } else if (child2 != null && !child2.isVisited()) { 330 nodes.push(child2); 331 } else { 332 next.visit(); 333 return nodes.pop(); 334 } 335 } 336 return null; 337 } 338 339 @Override 340 public void remove() { 341 throw new UnsupportedOperationException(); 342 } 343 344 } 345 346}