001package org.biojava.bio.symbol; 002 003import java.util.ArrayList; 004import java.util.HashMap; 005import java.util.HashSet; 006import java.util.Iterator; 007import java.util.List; 008import java.util.Map; 009import java.util.Set; 010 011import org.biojava.bio.BioError; 012import org.biojava.bio.BioException; 013import org.biojava.bio.BioRuntimeException; 014import org.biojava.bio.seq.io.CharacterTokenization; 015import org.biojava.bio.seq.io.SymbolListCharSequence; 016import org.biojava.bio.seq.io.SymbolTokenization; 017import org.biojava.utils.AssertionFailure; 018import org.biojava.utils.ChangeListener; 019import org.biojava.utils.ChangeType; 020import org.biojava.utils.ChangeVetoException; 021 022 023/** 024 * <p> 025 * A suffix tree is an efficient method for encoding the frequencies 026 * of motifs in a sequence. They are sometimes used to quickly screen 027 * for similar sequences. For instance, all motifs of length up to 028 * 2 in the sequence <code>AAGT</code> could be encoded as: 029 * </p> 030 * 031 * <pre> 032 * root(4) 033 * | 034 * A(2)--------G(1)-----T(1) 035 * | | 036 * A(1)--G(1) T(1) 037 * </pre> 038 *<p> 039 * It supports addition of elements both as String and SymbolList. They should 040 * not be mixed together. The strings are also terminated internally, so it is 041 * possible to go and add more than one string to the suffix tree. 042 *</p> 043 *<p> 044 * Some more work need to be done on how data should be generated from this 045 * class. If you need something that's not in there, please e-mail the list at 046 * biojava-dev@biojava.org and I'll add it in there. 047 *<\p> 048 * @author Francois Pepin 049 * @version 1.3 050 */ 051public class UkkonenSuffixTree{ 052 053 public static final char DEFAULT_TERM_CHAR ='$'; 054 private char terminationChar; 055 056 SuffixNode root; 057 058 public static final int TO_A_LEAF = -1; 059 private int e; 060 061 private CharSequence sequences; 062 063 064 /** Describes the rule that needs to be applied after walking down a tree. Put as a class variable because it can only return a single object (and I don't want to extend Node any further. 065 * rule 1: ended up at a leaf. 066 * rule 2: need to extend an internalNode. 067 * rule 3: would split an edge. 068 * rule 4: ended up in the middle of an edge. 069 * rule 5: ended up at an InternalNode 070 * 071 * Production 5 counts as rule 4 when adding a sequence, but the rules are also used to when searching the tree. 072 */ 073 private int rule; 074 075 076 /** Initializes a new <code>UkkonenSuffixTree</code> instance. 077 */ 078 public UkkonenSuffixTree(){ 079 terminationChar = DEFAULT_TERM_CHAR; 080 root = new SimpleNode(); 081 e=0; 082 sequences = ""; 083 } 084 085 public UkkonenSuffixTree(String seqs){ 086 this(); 087 addSequence(seqs, "unnamed", false); 088 } 089 090 public UkkonenSuffixTree(FiniteAlphabet alpha){ 091 this(); 092 } 093 094 public void addSymbolList(SymbolList list, String name, boolean doNotTerminate) throws IllegalSymbolException{ 095 if (!doNotTerminate) 096 list = new TerminatedSymbolList(list); 097 098 CharSequence seq =new SymbolListCharSequence(list); 099 System.out.println("Adding symbol list " + seq.toString()); 100 //if (!doNotTerminate) 101 102 //if(!doNotTerminate) 103 // seq = seq+terminationChar; 104 addPreppedSequence(seq); 105 } 106 107 108 109 /** 110 * Add a sequence into the tree. If there are more sequences, they should be separated by a terminationChar ($ by default). If none exist, it is assumed that there is only 1 continuous sequence to be added. 111 * @param seq the sequence/sequences to be added into the tree. 112 * @param doNotTerminate whether we should terminate the sequence if it's non-terminated. 113 */ 114 public void addSequence(String seq, String name, boolean doNotTerminate){ 115 int i; 116 int start, end; 117 ArrayList toBeAdded = new ArrayList(); 118 Iterator iterator; 119 String subseq; 120 121 if (seq==null||seq.length()==0) 122 return; 123 124 //terminate the String if it's not terminated. 125 if(!doNotTerminate&&seq.charAt(seq.length()-1)!=terminationChar) 126 seq = seq+terminationChar; 127 128 //count how many termination Chars in in. 129 start =0; 130 for (i=0;seq.indexOf(terminationChar, i)!=-1;i=seq.indexOf(terminationChar, i)+1){ 131 start=i; 132 end=seq.indexOf(terminationChar, i); 133 toBeAdded.add(seq.substring(start,end+1)); 134 } 135 136 iterator = toBeAdded.iterator(); 137 i=0; 138 while (iterator.hasNext()){ 139 subseq=(String)iterator.next(); 140 addPreppedSequence(subseq); 141 i++; 142 } 143 } 144 145 /** Add a single sequence into the tree. 146 * 147 * @param seq a <code>String</code> value 148 */ 149 private void addPreppedSequence(CharSequence seq){ 150 int i, gammaStart; 151 int j=0; 152 SuffixNode oldNode=null, newNode; 153 SuffixNode currentNode; 154 boolean canLinkJump = false; 155 156 //Puts i at the end of the previous sequences 157 i = sequences.length(); 158 j=i; 159 160 sequences= sequences.toString()+seq.toString(); 161 162 currentNode = root; 163 //phase i 164 for (; i<sequences.length();i++){ 165 //System.out.println("Phase "+i); 166 167 e+=1; 168 //extension j; 169 for (;j<=i;j++){ 170 //System.out.println("extension "+j); 171 172 //reset a couple of things... 173 newNode = null; 174 175 //find first node v at or above s[j-1,i] that is root or has a suffixLink 176 while (currentNode!=root&¤tNode.suffixLink==null&&canLinkJump) 177 currentNode = currentNode.parent; 178 179 if (root==currentNode){ 180 currentNode=jumpTo(root,sequences,j,i+1); 181 }else{ 182 if (canLinkJump) 183 currentNode = currentNode.suffixLink; 184 gammaStart = j+getPathLength(currentNode); 185 186 currentNode = jumpTo(currentNode,sequences,gammaStart,i+1); 187 } 188 189 if (rule==1) 190 addPositionToLeaf(j, currentNode); 191 if (rule==2) 192 doRule2(currentNode,i,j); 193 if (rule==3){ 194 newNode=doRule3(currentNode,i,j); 195 currentNode=newNode; 196 } 197 198 if (rule==1||rule==4||rule==5) 199 currentNode = currentNode.parent; 200 201 if (oldNode!=null){ 202 if (currentNode.isTerminal()) 203 currentNode=currentNode.parent; 204 205 oldNode.suffixLink=currentNode; 206 207 } 208 oldNode=newNode; 209 newNode=null; 210 211 if (rule==1||rule==4||rule==5){ 212 oldNode=null; 213 canLinkJump=false; 214 break; 215 }else 216 canLinkJump=true; 217 218 219 }//for phase i 220 }//for extension j 221 finishAddition(); 222 } 223 224 /** This method is used to walk down the tree, from a given node. The 225 * <code>rule</code> variable can be used to check where the walk 226 * stopped. Note that rule 3 means that the string used to walk down the 227 * tree does not match (which is a bit different from the construction 228 * where rule 3 implies that only the last character doesn't match. 229 *<p> 230 * The String is encoded as a substring of a given source. This is done to 231 * avoid replicating the string. To send walk down the string 232 * <code>x</code> from the root, one would call walkTo(root,x,0,x.length()). 233 * 234 * @param starting the root of the subtree we're walking down form. 235 * @param source a superstring that contains the string we're using to 236 * walking down. source.subtring(from,to) should give the string we're 237 * walking down from. 238 * @param from the start position (inclusive) of the target string in the 239 * source. 240 * @param to the end position (exclusive) of the target string in the node. 241 * @return a <code>SuffixNode</code> that the walk stopped at. If the walk 242 * stopped inside an edge. (check the rule variable to see where it stopped). 243 */ 244 public SuffixNode walkTo(SuffixNode starting, String source, int from, int to){ 245 SuffixNode currentNode; 246 SuffixNode arrivedAt; 247 CharSequence edgeLabel; 248 249 250 currentNode=starting; 251 arrivedAt=starting; 252 while (from<to){ 253 arrivedAt=(SuffixNode)currentNode.children.get( 254 new Character(source.charAt(from))); 255 if (arrivedAt==null){ 256 from=to; 257 arrivedAt=currentNode; 258 rule=2; 259 break; 260 } 261 262 edgeLabel = getEdgeLabel(arrivedAt); 263 if (edgeLabel.length()>=to-from){ 264 if (edgeLabel.equals(source.substring(from,to))){ 265 //rule 1 or 5, 266 if (arrivedAt.isTerminal()) 267 rule=1; 268 else 269 rule=5; 270 } 271 if (edgeLabel.subSequence(0, to-from).equals(source.substring(from,to))) 272 rule=4; 273 else 274 rule=3; 275 from=to; 276 } else if (source.subSequence(from,from+edgeLabel.length()) 277 .equals(edgeLabel)) { 278 from+=edgeLabel.length(); 279 currentNode=arrivedAt; 280 } 281 282 else{ 283 rule=3; 284 from=to; 285 } 286 } 287 288 return arrivedAt; 289 290 } 291 292 293 294 295 /** 296 * Just like walkTo, but faster when used during tree construction, as it 297 * assumes that a mismatch can only occurs with the last character of the 298 * target string. 299 * 300 * @param starting the root of the subtree we're walking down form. 301 * @param source a superstring that contains the string we're using to 302 * walking down. source.subtring(from,to) should give the string we're 303 * walking down from. 304 * @param from the start position (inclusive) of the target string in the 305 * source. 306 * @param to the end position (exclusive) of the target string in the node. 307 * @return a <code>SuffixNode</code> that the walk stopped at. If the walk 308 * stopped inside an edge. (check the rule variable to see where it 309 * stopped). 310 */ 311 public SuffixNode jumpTo(SuffixNode starting, CharSequence source, int from, int to){ 312 SuffixNode currentNode; 313 SuffixNode arrivedAt; 314 boolean canGoDown = true; 315 int edgeLength; 316 int original=from; 317 SuffixNode originalNode=starting; 318 319 320 currentNode=starting; 321 arrivedAt=starting; 322 323 rule=0; 324 325 if (from==to){ 326 rule=5; 327 return starting; 328 } 329 330 331 while (canGoDown){ 332 // if (source.substring(from, to).equals("CAGCG")) 333 // System.out.println(to+" here to "+source.substring(from, to)+" "+(i++)); 334 335 if (currentNode.isTerminal()){ 336 System.out.println("ARRGH! at "+source.subSequence(original, to)+ 337 "("+from+","+original+","+to+ 338 ") from "+getLabel(originalNode)); 339 //Something truly awful happened if this line is ever reached. 340 //This bug should be dead, but it it came back from the dead a couple 341 //of times already. 342 } 343 344 arrivedAt=(SuffixNode)currentNode.children.get( 345 new Character(source.charAt(from))); 346 if (arrivedAt==null){ 347 canGoDown=false; 348 arrivedAt=currentNode; 349 rule=2; 350 break; 351 } 352 353 edgeLength = getEdgeLength(arrivedAt); 354 if (edgeLength>=to-from){ 355 int after = getPathEnd(arrivedAt)-getEdgeLength(arrivedAt)+to-from-1; 356 if (sequences.charAt(after)== 357 source.charAt(to-1)){ 358 if (getEdgeLength(arrivedAt)==to-from){ 359 if (arrivedAt.isTerminal()) 360 rule=1; 361 else 362 rule=5; 363 } 364 else 365 rule=4; 366 } 367 else 368 rule=3; 369 canGoDown=false; 370 break; 371 } 372 from+=edgeLength; 373 currentNode=arrivedAt; 374 375 }//while canGoDOwn 376 377 return arrivedAt; 378 } 379 380 /****************************************************************** 381 * Tree navigation methods 382 ******************************************************************/ 383 384 protected int getEdgeLength(SuffixNode child){ 385 int parentLength, childLength; 386 SuffixNode parent; 387 if (child==root) 388 return 0; 389 parent=child.parent; 390 parentLength = getPathLength(parent); 391 childLength = getPathLength(child); 392 if (childLength-parentLength<=0){ 393 394 System.out.println("negative length "+(childLength-parentLength)); 395 396 System.out.println(getLabel(child)+","+getLabel(parent)); 397 } 398 399 return childLength-parentLength; 400 } 401 402 protected CharSequence getEdgeLabel(SuffixNode child){ 403 return sequences.subSequence( 404 child.labelStart+ 405 (getPathLength(child)-getEdgeLength(child)), 406 (child.labelEnd==TO_A_LEAF)? 407 e:child.labelEnd); 408 } 409 410 411 protected int getPathLength(SuffixNode node){ 412 return getPathEnd(node)-node.labelStart; 413 } 414 415 protected int getPathEnd(SuffixNode node){ 416 return node.labelEnd==TO_A_LEAF?e:node.labelEnd; 417 } 418 419 protected CharSequence getLabel(SuffixNode node){ 420 if (node==root) 421 return "root"; 422 else 423 return sequences.subSequence( 424 node.labelStart, 425 (node.labelEnd==TO_A_LEAF)?e:node.labelEnd).toString(); 426 } 427 428 429 protected ArrayList getAllNodes(SuffixNode root, ArrayList list, boolean leavesOnly){ 430 Iterator iterator; 431 if (list==null) 432 list= new ArrayList(); 433 if (!leavesOnly||(leavesOnly&&root.isTerminal())) 434 list.add(root); 435 if (!root.isTerminal()){ 436 iterator = root.children.values().iterator(); 437 while (iterator.hasNext()) 438 list=getAllNodes((SuffixNode)iterator.next(), list, leavesOnly); 439 } 440 441 return list; 442 } 443 444 public void printTree(){ 445 ArrayList allNodes = getAllNodes(root, null, false); 446 for (int i=0;i<allNodes.size();i++){ 447 SuffixNode node = (SuffixNode)allNodes.get(i); 448 if (node==root) 449 System.out.println("root"); 450 else 451 System.out.println("node "+i+" label "+getLabel(node)+" attached to "+getLabel(node.parent)); 452 } 453 } 454 455 456 public SuffixNode getRoot(){return root;} 457 458 /****************************************************************** 459 * End Tree Navigation Methods 460 ******************************************************************/ 461 462 463 /****************************************************************** 464 * Tree modification methods 465 ******************************************************************/ 466 private void addPositionToLeaf(int pos, SuffixNode leaf){ 467 int[] moreLabels; 468 if (leaf.additionalLabels==null) 469 leaf.additionalLabels = new int[]{pos}; 470 else{ 471 moreLabels = new int[leaf.additionalLabels.length+1]; 472 System.arraycopy(leaf.additionalLabels, 0, moreLabels, 0, leaf.additionalLabels.length); 473 moreLabels[moreLabels.length-1]=pos; 474 leaf.additionalLabels=moreLabels; 475 } 476 477 } 478 479 private void doRule2(SuffixNode parent, int splittingPos, int suffixStart){ 480 //int number = getAllNodes(root, null, false).size(); 481 SuffixNode leaf = new SimpleNode (parent, suffixStart); 482 483 parent.children.put(new Character(sequences.charAt(splittingPos)), leaf); 484 //System.out.println("rule 2: "+sequences.charAt(splittingPos)+" from "+getLabel(parent)+ " to "+getLabel(leaf)); 485 486 } 487 488 private SuffixNode doRule3(SuffixNode child, int splittingPos, int suffixStart){ 489 // return toBeSplit.splitEdge(endOfSubSeq, sequences.charAt(endOfSubSeq), 490 // toBeSplit.getStart()+endOfSubSeq-rule3Position, 491 // suffixStart); 492 //int number = getAllNodes(root, null, false).size(); 493 SuffixNode parent = child.parent; 494 SuffixNode middle= new SimpleNode(parent,suffixStart,splittingPos); 495 Character x=new Character( 496 sequences.charAt(child.labelStart+getPathLength(child)-getEdgeLength(child))); 497 498 //System.out.println(parent.children.get(x)==child); 499 500 Character y=new Character(sequences.charAt( 501 child.labelStart 502 +getPathLength(child)-getEdgeLength(child) 503 +getEdgeLength(middle) 504 )); 505 506 parent.children.remove(x); 507 parent.children.put(x,middle); 508 509 middle.children.put(y,child); 510 child.parent=middle; 511 //System.out.println("rule 3: "+sequences.charAt(splittingPos)+" between "+getLabel(parent)+" and "+getLabel(child) + " Addition made:"+(number==getAllNodes(root, null,false).size()-1)); 512 doRule2(middle,splittingPos,suffixStart); 513 return middle; 514 } 515 516 private void finishAddition(){ 517 SuffixNode leaf; 518 ArrayList leaves = getAllNodes(root, null, true); 519 for (int i=0;i<leaves.size();i++){ 520 leaf = (SuffixNode)leaves.get(i); 521 if (leaf.labelEnd==TO_A_LEAF) 522 leaf.labelEnd=e; 523 } 524 525 } 526 /****************************************************************** 527 * end Tree modification methods 528 ******************************************************************/ 529 530 public static abstract class SuffixNode { 531 532 static final int A_LEAF=-1; 533 SuffixNode parent; 534 SuffixNode suffixLink; 535 int labelStart, labelEnd; 536 HashMap children; 537 int[] additionalLabels; 538 539 /** 540 * Determine is this node is terminal (has no children). 541 *<p> 542 * Note that this only happens at the terminated node (if the sequences 543 * have been terminated. 544 * 545 * @return <code>true</code> if and only if this node has no children. 546 */ 547 548 abstract public boolean isTerminal(); 549 550 /** 551 * Determine if this node has a child corresponding to a given character 552 * 553 * @param i the first <code>Character</code> of the edge coming down this node. 554 * @return <code>true</code> if the node has a child going down from that character, 555 * false otherwise 556 */ 557 abstract public boolean hasChild(Character i); 558 559 /** Gets the child of of a node that is coming down from that particular 560 * node. It returns null if no child exists or if no child exists starting 561 * on that particular character. 562 * 563 * @param i the first <code>Character</code> of the edge coming down this node 564 * @return the appropriate child <code>SuffixNode</code>, or null if it 565 * doesn't exists. 566 */ 567 abstract SuffixNode getChild(Character i); 568 //abstract void addChild(SuffixTree tree, int i, SuffixNode n); 569 570 /** 571 * Returns the parent of this node, null if it's the root. 572 * 573 * @return the parent of this node, null if it's the root. 574 */ 575 abstract SuffixNode getParent(); 576 577 } 578 579 580 class SimpleNode extends SuffixNode{ 581 582 //static final int A_LEAF=-1; 583 //SuffixNode parent; 584 //SuffixNode suffixLink; 585 //int labelStart, labelEnd; 586 //HashMap children; 587 //int[] additionalLabels; 588 589 /** Creates a root 590 */ 591 public SimpleNode(){ 592 parent=null; 593 suffixLink=null; 594 labelStart=0; 595 labelEnd=0; 596 children=new HashMap(); 597 additionalLabels=null; 598 } 599 600 /** creates a leaf 601 * @param parent the parent node 602 * @param position the starting value of the suffix 603 */ 604 public SimpleNode(SuffixNode parent, int position){ 605 this(); 606 this.parent=parent; 607 labelStart=position; 608 labelEnd = A_LEAF; 609 children=null; 610 //checkParent(this); 611 } 612 613 /** creates an internal node 614 * @param parent the parent of this node 615 * @param labelStart the starting point of the path label 616 * @param labelStop the ending point of the path label 617 */ 618 public SimpleNode(SuffixNode parent, int labelStart, int labelStop){ 619 this(); 620 this.parent=parent; 621 this.labelStart=labelStart; 622 this.labelEnd=labelStop; 623 //checkParent(this); 624 } 625 626 627 public boolean isTerminal(){return children==null;} 628 public boolean hasChild(Character x){return getChild(x)!=null;} 629 public SuffixNode getChild(Character x){ 630 return (children==null)?null:(SuffixNode)children.get(x); 631 } 632 public SuffixNode getParent(){return parent;} 633 } 634 635 public boolean subStringExists(String str) 636 { 637 walkTo(root, str, 0, str.length()); 638 return (rule==1||rule==4||rule==5); 639 } 640 641 /** 642 * Stupid little wrapper to put a termination symbol at the end. A sublist 643 * that doesn't include the termination symbol will have the same alphabet as 644 * the original, one that does will have an alphabet that includes the 645 * termination symbol. 646 * 647 * Now includes some ugly hacks by Thomas Down to make ambiguities work 648 * nicer. 649 * 650 */ 651 private class TerminatedSymbolList implements SymbolList 652 { 653 private SymbolList unterminated; 654 final Symbol TERMINATION_SYMBOL; 655 private AbstractAlphabet alpha; 656 private Map translationTable = new HashMap(); 657 658 public TerminatedSymbolList(SymbolList unterminated) 659 { 660 this.unterminated=unterminated; 661 TERMINATION_SYMBOL = AlphabetManager.createSymbol("Termination"); 662 FiniteAlphabet oldAlphabet = (FiniteAlphabet)unterminated.getAlphabet(); 663 // Set set = AlphabetManager.getAllSymbols(oldAlphabet); 664 Set set = new HashSet(); 665 for (Iterator i = oldAlphabet.iterator(); i.hasNext(); ) { 666 set.add(i.next()); 667 } 668 set.add(TERMINATION_SYMBOL); 669 alpha = new SimpleAlphabet(set); 670 CharacterTokenization tokenizer =new CharacterTokenization(alpha, true); 671 tokenizer.bindSymbol(TERMINATION_SYMBOL, DEFAULT_TERM_CHAR); 672 SymbolTokenization sToke; 673 try{ 674 sToke = oldAlphabet.getTokenization("token"); 675 } 676 catch (BioException be){ 677 throw new BioError("Internal error: failed to get SymbolTokenization for SymbolList alphabet", be); 678 } 679 if (sToke.getTokenType() != SymbolTokenization.CHARACTER) 680 throw new IllegalArgumentException("Only FiniteAlphabets using a char token are supported by UkkonenSuffixTree"); 681 try{ 682 for (Iterator i= AlphabetManager.getAllSymbols(oldAlphabet).iterator();i.hasNext();){ 683 Symbol oldSymbol = (Symbol) i.next(); 684 Symbol newSymbol; 685 if (oldSymbol instanceof AtomicSymbol) { 686 // The atomic symbols are identical between the two alphabets 687 newSymbol = oldSymbol; 688 } else { 689 // Port ambiguous symbols across 690 Set s = new HashSet(); 691 for (Iterator si = ((FiniteAlphabet) oldSymbol.getMatches()).iterator(); si.hasNext(); ) { 692 s.add(si.next()); 693 } 694 newSymbol = alpha.getAmbiguity(s); 695 } 696 //takes first char of String, should work because we checked 697 //getTokenType above 698 699 char c = sToke.tokenizeSymbol(oldSymbol).charAt(0); 700 // System.err.println("Binding " + c); 701 tokenizer.bindSymbol(newSymbol, c); 702 translationTable.put(oldSymbol, newSymbol); 703 } 704 //This is really hacky, ambiguous symbols containing 705 //TERMINATION_SYMBOL are impossible at this point, so I just define 706 //the Tokenization to treat them as TERMINATION_SYMBOL so that the 707 //code that likes to loop through doesn't go titty up. 708 for (Iterator i= AlphabetManager.getAllSymbols(alpha).iterator();i.hasNext();){ 709 Symbol s = (Symbol) i.next(); 710 Alphabet mathes = s.getMatches(); 711 if (mathes.contains(TERMINATION_SYMBOL)) 712 tokenizer.bindSymbol(s,DEFAULT_TERM_CHAR); 713 } 714 715 }catch(IllegalSymbolException ise){ 716 throw new AssertionFailure("Assertion Failure: This alphabet has been custom made so this doesn't happen",ise); 717 } 718 719 alpha.putTokenization("token", tokenizer); 720 } 721 722 723 // Implementation of org.biojava.utils.Changeable 724 725 public void addChangeListener(ChangeListener changeListener, ChangeType changeType) { 726 unterminated.addChangeListener(changeListener,changeType); 727 } 728 729 public void addChangeListener(ChangeListener changeListener) { 730 unterminated.addChangeListener(changeListener); 731 } 732 733 public void removeChangeListener(ChangeListener changeListener, ChangeType changeType) { 734 unterminated.removeChangeListener(changeListener,changeType); 735 } 736 737 public void removeChangeListener(ChangeListener changeListener) { 738 unterminated.removeChangeListener(changeListener); 739 } 740 741 public boolean isUnchanging(ChangeType changeType) { 742 return unterminated.isUnchanging(changeType); 743 } 744 745 // Implementation of org.biojava.bio.symbol.SymbolList 746 747 public int length() { 748 return unterminated.length()+1; 749 } 750 751 public Iterator iterator() { 752 return unterminated.iterator(); 753 } 754 755 public SymbolList subList(int n, int n1) throws IndexOutOfBoundsException { 756 List list; 757 if (n1!=unterminated.length()+1) 758 return unterminated.subList(n,n1); 759 else{ 760 list = unterminated.subList(n,n1-1).toList(); 761 list.add(TERMINATION_SYMBOL); 762 try{ 763 return new SimpleSymbolList(getAlphabet(),list); 764 } 765 catch(IllegalSymbolException e){ 766 throw new AssertionFailure("Assertion Failure: This alphabet was created just so it doesn't do this",e); 767 } 768 } 769 } 770 771 public Alphabet getAlphabet() { 772 return alpha; 773 } 774 775 public Symbol symbolAt(int n) throws IndexOutOfBoundsException { 776 if (n!=length()) 777 return (Symbol) translationTable.get(unterminated.symbolAt(n)); 778 else 779 return TERMINATION_SYMBOL; 780 } 781 782 public List toList() { 783 List answer = unterminated.toList(); 784 answer.add(TERMINATION_SYMBOL); 785 return answer; 786 } 787 788 public String seqString() { 789 try{ 790 SymbolTokenization toke = getAlphabet().getTokenization("token"); 791 return unterminated.seqString()+toke.tokenizeSymbol(TERMINATION_SYMBOL); 792 } catch (BioException ex) { 793 throw new BioRuntimeException("Couldn't tokenize sequence", ex); 794 } 795 } 796 797 public String subStr(int n, int n1) throws IndexOutOfBoundsException { 798 return subList(n, n1).seqString(); 799 } 800 801 /** 802 * Describe <code>edit</code> method here. 803 * 804 * @param edit an <code>Edit</code> value 805 * @exception IndexOutOfBoundsException if an error occurs 806 * @exception IllegalAlphabetException if an error occurs 807 * @exception ChangeVetoException if an error occurs 808 */ 809 public void edit(Edit edit) throws IndexOutOfBoundsException, IllegalAlphabetException, ChangeVetoException { 810 throw new ChangeVetoException("TerminatedSymbolList is immutable"); 811 } 812 } 813}