001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojavax.bio.phylo.io.nexus; 022 023import java.io.IOException; 024import java.io.Writer; 025import java.util.ArrayList; 026import java.util.Iterator; 027import java.util.LinkedHashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.UUID; 032import java.util.Vector; 033 034import org.jgrapht.WeightedGraph; 035import org.jgrapht.UndirectedGraph; 036import org.jgrapht.graph.DefaultEdge; 037import org.jgrapht.graph.DefaultWeightedEdge; 038import org.jgrapht.graph.SimpleWeightedGraph; 039 040import org.biojava.bio.seq.io.ParseException; 041 042 043/** 044 * Represents Nexus trees blocks. 045 * 046 * @author Richard Holland 047 * @author Tobias Thierer 048 * @author Jim Balhoff 049 * @author Tiago Antao 050 * @since 1.6 051 */ 052public class TreesBlock extends NexusBlock.Abstract { 053 054 /** 055 * A constant representing the name of Trees blocks. 056 */ 057 public static final String TREES_BLOCK = "TREES"; 058 059 private Map translations = new LinkedHashMap(); 060 061 private List comments = new ArrayList(); 062 063 private Map trees = new LinkedHashMap(); 064 065 private WeightedGraph<String, DefaultWeightedEdge> weighted = new SimpleWeightedGraph<String, DefaultWeightedEdge>(DefaultWeightedEdge.class); 066 067 private String topNode = null; 068 069 private String nodePrefix = "p"; 070 071 private int pValue = 0; 072 private Vector<String> uuids; 073 074 /** 075 * A simple representation of a Newick tree as a single string. 076 */ 077 public static class NewickTreeString { 078 private String rootType; 079 080 private String treeString; 081 082 private boolean starred; 083 084 /** 085 * Make the tree (un)rooted. 086 * 087 * @param rootType 088 * 'U' for unrooted, 'R' for rooted, <tt>null</tt> for 089 * unsure. 090 */ 091 public void setRootType(final String rootType) { 092 this.rootType = rootType; 093 } 094 095 /** 096 * Set the Newick string describing the tree. 097 */ 098 public void setTreeString(final String treeString) { 099 this.treeString = treeString; 100 } 101 102 /** 103 * Sets whether this tree has a star before it's name. 104 * 105 * @param starred 106 * <tt>true</tt> if it has one. 107 */ 108 public void setStarred(boolean starred) { 109 this.starred = starred; 110 } 111 112 /** 113 * Tests whether this tree has a star before it's name. 114 * 115 * @return starred <tt>true</tt> if it has one. 116 */ 117 public boolean isStarred() { 118 return this.starred; 119 } 120 121 /** 122 * See if the tree is rooted. 123 * 124 * @return 'U' for unrooted, 'R' for rooted, <tt>null</tt> for unsure. 125 */ 126 public String getRootType() { 127 return this.rootType; 128 } 129 130 /** 131 * Get the Newick string describing the tree. 132 * 133 * @return the tree string. 134 */ 135 public String getTreeString() { 136 return this.treeString; 137 } 138 } 139 140 /** 141 * Delegates to NexusBlock.Abstract constructor using TreesBlock.TREES_BLOCK 142 * as the name. 143 */ 144 public TreesBlock() { 145 super(TreesBlock.TREES_BLOCK); 146 } 147 148 /** 149 * Add a translation. 150 * 151 * @param label 152 * the label to add. 153 * @param taxa 154 * the taxa name this label will represent. 155 */ 156 public void addTranslation(final String label, final String taxa) { 157 this.translations.put(label, taxa); 158 } 159 160 /** 161 * Removes the given translation. 162 * 163 * @param label 164 * the label to remove. 165 */ 166 public void removeTranslation(final String label) { 167 this.translations.remove(label); 168 } 169 170 /** 171 * Checks to see if we contain the given translation. 172 * 173 * @param label 174 * the label to check for. 175 * @return <tt>true</tt> if we already contain it. 176 */ 177 public boolean containsTranslation(final String label) { 178 return this.translations.containsKey(label); 179 } 180 181 /** 182 * Get the translations added so far. 183 * 184 * @return the translations added so far. 185 */ 186 public Map getTranslations() { 187 return this.translations; 188 } 189 190 /** 191 * Adds a tree. 192 * 193 * @param label 194 * the label to give the tree. 195 * @param tree 196 * the tree to add. 197 */ 198 public void addTree(final String label, final NewickTreeString tree) { 199 this.trees.put(label, tree); 200 } 201 202 /** 203 * Removes a tree. 204 * 205 * @param label 206 * the label to remove. 207 */ 208 public void removeTree(final String label) { 209 this.trees.remove(label); 210 } 211 212 /** 213 * Checks to see if we contain the given tree. 214 * 215 * @param label 216 * the label to check for. 217 * @return <tt>true</tt> if we already contain it. 218 */ 219 public boolean containsTree(final String label) { 220 return this.trees.containsKey(label); 221 } 222 223 /** 224 * Returns all trees. 225 * 226 * @return all the selected trees. 227 */ 228 public Map getTrees() { 229 return this.trees; 230 } 231 232 /** 233 * Returns a tree for given label 234 * @param label 235 * the label to select. 236 * 237 * @return selected tree. 238 */ 239 public Object getTree(final String label) { 240 return this.trees.get(label); 241 } 242 243 public String getTreeText(UndirectedGraph<String, DefaultEdge> treegraph, String node, String parent){ 244 Set<DefaultEdge> edges = treegraph.edgesOf(node); 245 int numOffspring = 0; 246 StringBuffer nodeText = new StringBuffer(""); 247 for(DefaultEdge e : edges) { 248 String child; 249 if (treegraph.getEdgeSource(e).equals(node)) { 250 child = treegraph.getEdgeTarget(e); 251 } 252 else { 253 child = treegraph.getEdgeSource(e); 254 } 255 if (child.equals(parent)) { 256 break; //We dont want to go up the tree... 257 } 258 if (numOffspring>0) { 259 nodeText.append(", "); 260 } 261 nodeText.append(getTreeText(treegraph, child, node)); 262 numOffspring++; 263 } 264 if (numOffspring==0) { 265 return node; 266 } 267 else { 268 return "(" + nodeText + ")"; 269 } 270 } 271 272 273 274 275 /** 276 * Add a tree, converting weighted graph (JGraphT) to NewickString 277 * 278 * This will assume a (arbitrary) root node using the old convention 279 * of labeling intermediate nodes as p*. 280 * 281 * 282 * @deprecated 283 * @param label 284 * the label to add 285 * 286 * @param treegraph 287 * the treegraph to convert. 288 */ 289 public void addTree(final String label, WeightedGraph<String, DefaultWeightedEdge> treegraph) { 290 addTree(label, treegraph, "p0"); 291 } 292 293 public String getTreeText(WeightedGraph<String, DefaultWeightedEdge> treegraph, String node, String parent){ 294 Set<DefaultWeightedEdge> edges = treegraph.edgesOf(node); 295 int numOffspring = 0; 296 StringBuffer nodeText = new StringBuffer(""); 297 for(DefaultWeightedEdge e : edges) { 298 String child; 299 if (treegraph.getEdgeSource(e).equals(node)) { 300 child = treegraph.getEdgeTarget(e); 301 } 302 else { 303 child = treegraph.getEdgeSource(e); 304 } 305 if (child.equals(parent)) { 306 break; //We dont want to go up the tree... 307 } 308 if (numOffspring>0) { 309 nodeText.append(", "); 310 } 311 nodeText.append(getTreeText(treegraph, child, node)); 312 nodeText.append(":"+treegraph.getEdgeWeight(e)); 313 numOffspring++; 314 } 315 if (numOffspring==0) { 316 return node; 317 } 318 else { 319 return "(" + nodeText + ")"; 320 } 321 } 322 323 /** 324 * Add a tree, converting weighted graph (JGraphT) to NewickString. 325 * 326 * @param label 327 * the label to add 328 * 329 * @param treegraph 330 * the treegraph to convert. 331 * 332 * @param topLabel 333 * the label of the top (root if rooted tree) node. 334 */ 335 public void addTree(final String label, 336 WeightedGraph<String, DefaultWeightedEdge> treegraph, String topLabel) { 337 final NewickTreeString tree = new NewickTreeString(); 338 String temp; 339 340 for(String vertex : treegraph.vertexSet()) { 341 if (vertex.equals(topLabel)) { 342 topLabel = vertex; //String equality is not enough, has to be the same object 343 } 344 } 345 temp = getTreeText(treegraph, topLabel, null); 346 tree.setTreeString(temp); 347 this.trees.put(label, tree); 348 } 349 350 351 /** 352 * Renames a vertex of the weighted graph. 353 * 354 * @param oldName old name. 355 * @param newName new name. 356 */ 357 private void renameVertex(String oldName, String newName) { 358 Set<DefaultWeightedEdge> oldEdges = this.weighted.edgesOf(oldName); 359 360 this.weighted.addVertex(newName); 361 for (DefaultWeightedEdge e : oldEdges) { 362 DefaultWeightedEdge tempEdge; 363 if (this.weighted.getEdgeSource(e).equals(oldName)) { 364 tempEdge = this.weighted.addEdge(newName, 365 this.weighted.getEdgeTarget(e)); 366 } 367 else { 368 tempEdge = this.weighted.addEdge(this.weighted.getEdgeSource(e), 369 newName); 370 } 371 this.weighted.setEdgeWeight(tempEdge,this.weighted.getEdgeWeight(e)); 372 //this.unweighted.removeEdge(e); Not needed 373 } 374 this.weighted.removeVertex(oldName); 375 } 376 377 378 379 /** 380 * Tokenizes a string representing a newick tree. 381 * 382 * Simple tokenizing, removing spaces and so on. 383 * 384 * @param text the string representing the tree 385 * @return An array of tokens 386 */ 387 Vector<String> tokenize(String text) { 388 Vector<String> toks = new Vector<String>(); 389 while (!text.equals("")) { 390 text = text.trim(); 391 String delims = "(),: \t"; 392 String c = text.substring(0, 1); 393 if (delims.contains(c)) { 394 toks.add(c); 395 text = text.substring(1); 396 } 397 else { 398 String currTok = ""; 399 int pos = 0; 400 while(!delims.contains(c)) { 401 //StringBuffer here might be faster.... 402 currTok += c; 403 if (text.length()>1) { 404 text = text.substring(1); 405 c = text.substring(0, 1); 406 } 407 else { 408 toks.add(currTok); 409 return toks; 410 } 411 } 412 toks.add(currTok); 413 } 414 } 415 416 return toks; 417 } 418 419 /* 420 * Checks if the graph has a name as vertex. 421 */ 422 private boolean hasVertex(String name) { 423 for(String vertex : this.weighted.vertexSet()) { 424 if (vertex.equals(name)) { 425 return true; 426 } 427 } 428 return false; 429 } 430 431 /** 432 * Processes the weight part (if it exists).# 433 * 434 * @param tokens 435 */ 436 void processWeight(Vector<String> tokens, String parent, String myNode) { 437 if (tokens.size() == 0) { 438 return; 439 } 440 if (tokens.get(0).equals(":")) { 441 tokens.remove(0); 442 this.weighted.setEdgeWeight( 443 this.weighted.getEdge(parent, myNode), 444 Double.parseDouble(tokens.get(0))); 445 tokens.remove(0); 446 } 447 } 448 449 /** 450 * Parses a Newick tree. 451 * 452 * Alters this.weighted! 453 * 454 * The tree is passed as a set of tokens. 455 * 456 * If some tokens are not processed, these are maintained in the vector. 457 * All consumed ones are removed. 458 * 459 * @param tokens Stream of tokens to be parser 460 */ 461 void parseTree(Vector<String> tokens, String parent) throws ParseException { 462 String myNode; 463 if (parent == null) { 464 pValue = 0; 465 uuids = new Vector<String>(); 466 } 467 //System.out.println("Top: " + parent + " "); 468 //for(String tok: tokens) { 469 // System.out.print(" " + tok); 470 //} 471 //System.out.println(); 472 473 if (tokens.get(0).equals("(")) { 474 tokens.remove(0); 475 myNode = UUID.randomUUID().toString(); 476 uuids.add(myNode); 477 this.weighted.addVertex(myNode); 478 if (parent != null) { 479 this.weighted.addEdge(parent, myNode); 480 } 481 while (!tokens.get(0).equals(")")) { 482 parseTree(tokens, myNode); 483 if (tokens.get(0).equals(",")) { 484 tokens.remove(0); 485 } 486 else if (!tokens.get(0).equals(")") ) { 487 throw new ParseException ("Expecting ), got " + tokens.get(0)); 488 } 489 } 490 tokens.remove(0); 491 if (tokens.size() > 0) { 492 String nextToken = tokens.get(0); 493 char firstChar = nextToken.charAt(0); 494 if (Character.isLetter(firstChar)) { 495 uuids.remove(myNode); 496 renameVertex(myNode, nextToken); 497 myNode = nextToken; 498 tokens.remove(0); 499 } 500 processWeight(tokens, parent, myNode); 501 } 502 if (parent == null) { 503 String finalName; 504 //Lets solve clashes 505 for (String uuid: uuids) { 506 do { 507 finalName = this.nodePrefix + (this.pValue++); 508 } while (hasVertex(finalName)); 509 if (uuid.equals(myNode)) { 510 this.topNode = finalName; 511 } 512 renameVertex(uuid, finalName); 513 } 514 515 uuids = null; //for gc 516 } 517 518 } 519 else { 520 myNode = tokens.get(0); 521 tokens.remove(0); 522 this.weighted.addVertex(myNode); 523 this.weighted.addEdge(parent, myNode); 524 if (tokens.get(0).equals(":")) { 525 tokens.remove(0); 526 this.weighted.setEdgeWeight( 527 this.weighted.getEdge(parent, myNode), 528 Double.parseDouble(tokens.get(0))); 529 tokens.remove(0); 530 } 531 } 532 533 } 534 535 /** 536 * Get given (NewickString) tree by label, converts it to weighted graph (JGraphT). 537 * 538 * Alters this.weighted! 539 * 540 * @param label 541 * label for tree selection 542 * 543 * @return converted tree as undirectedGraph 544 */ 545 public WeightedGraph<String, DefaultWeightedEdge> getTreeAsWeightedJGraphT(final String label) 546 throws ParseException { 547 String temp; 548 TreesBlock.NewickTreeString t = new TreesBlock.NewickTreeString(); 549 550 this.weighted = new SimpleWeightedGraph<String, DefaultWeightedEdge>(DefaultWeightedEdge.class); 551 t = (TreesBlock.NewickTreeString) this.trees.get(label); 552 temp = t.getTreeString(); 553 parseTree(tokenize(temp), null); 554 555 return this.weighted; 556 } 557 558 /** 559 * Adds a comment. 560 * 561 * @param comment 562 * the comment to add. 563 */ 564 public void addComment(final NexusComment comment) { 565 this.comments.add(comment); 566 } 567 568 /** 569 * Removes a comment. 570 * 571 * @param comment 572 * the comment to remove. 573 */ 574 public void removeComment(final NexusComment comment) { 575 this.comments.remove(comment); 576 } 577 578 /** 579 * Returns all comments. 580 * 581 * @return all the selected comments. 582 */ 583 public List getComments() { 584 return this.comments; 585 } 586 587 protected void writeBlockContents(Writer writer) throws IOException { 588 for (final Iterator i = this.comments.iterator(); i.hasNext();) { 589 ((NexusComment) i.next()).writeObject(writer); 590 writer.write(NexusFileFormat.NEW_LINE); 591 } 592 writer.write(" TRANSLATE" + NexusFileFormat.NEW_LINE); 593 for (final Iterator i = this.translations.entrySet().iterator(); i 594 .hasNext();) { 595 final Map.Entry entry = (Map.Entry) i.next(); 596 writer.write('\t'); 597 this.writeToken(writer, "" + entry.getKey()); 598 writer.write('\t'); 599 this.writeToken(writer, "" + entry.getValue()); 600 if (i.hasNext()) 601 writer.write(','); 602 else 603 writer.write(';'); 604 writer.write(NexusFileFormat.NEW_LINE); 605 } 606 for (final Iterator i = this.trees.entrySet().iterator(); i.hasNext();) { 607 final Map.Entry entry = (Map.Entry) i.next(); 608 final NewickTreeString treeStr = (NewickTreeString) entry 609 .getValue(); 610 writer.write(" TREE "); 611 if (treeStr.isStarred()) 612 writer.write("* "); 613 this.writeToken(writer, "" + entry.getKey()); 614 writer.write('='); 615 if (treeStr.getRootType() != null) 616 writer.write("[" + treeStr.getRootType() + "]"); 617 this.writeToken(writer, treeStr.getTreeString()); 618 writer.write(";" + NexusFileFormat.NEW_LINE); 619 } 620 } 621 622 /** 623 * Returns the top node of the previously requested graph. 624 * 625 * The topNode will be the root node if the tree is rooted, if not 626 * then it will just be the top most node of the newick string with 627 * no biological meaning. 628 * 629 * The top node from the {@link #getTreeAsJGraphT(java.lang.String)} 630 * and {@link #getTreeAsWeightedJGraphT(java.lang.String)} might vary, 631 * and this function will return the top node of the previously called 632 * method only. If no method was called, null is returned. 633 * 634 * Note: the top node between graphs, probably does not vary, but, 635 * just in case, the note is here and the user should get a different 636 * top for each type of graph. 637 * 638 * @return the top node. 639 */ 640 public String getTopNode() { 641 return topNode; 642 } 643 644 /** 645 * Sets the node prefix of intermediate nodes for returned graphs. 646 * 647 * The intermediate nodes of graphs have to be named, the conventions 648 * being: 649 * 1. Generate a new node name using a prefix plus an integer 650 * 2. If the node name clashes with any taxon name, use another integer 651 * 652 * @param prefix The prefix 653 */ 654 public void setNodePrefix(String prefix) { 655 this.nodePrefix = prefix; 656 } 657 658 /** 659 * Returns the node prefix. 660 * 661 * @return the node prefix 662 */ 663 public String getNodePrefix() { 664 return this.nodePrefix; 665 } 666 667}