001package org.biojava.bio.symbol;
002
003import java.util.ArrayList;
004import java.util.HashMap;
005import java.util.HashSet;
006import java.util.Iterator;
007import java.util.List;
008import java.util.Map;
009import java.util.Set;
010
011import org.biojava.bio.BioError;
012import org.biojava.bio.BioException;
013import org.biojava.bio.BioRuntimeException;
014import org.biojava.bio.seq.io.CharacterTokenization;
015import org.biojava.bio.seq.io.SymbolListCharSequence;
016import org.biojava.bio.seq.io.SymbolTokenization;
017import org.biojava.utils.AssertionFailure;
018import org.biojava.utils.ChangeListener;
019import org.biojava.utils.ChangeType;
020import org.biojava.utils.ChangeVetoException;
021
022
023/**
024 * <p>
025 * A suffix tree is an efficient method for encoding the frequencies
026 * of motifs in a sequence.  They are sometimes used to quickly screen
027 * for similar sequences.  For instance, all motifs of length up to
028 * 2 in the sequence <code>AAGT</code> could be encoded as:
029 * </p>
030 *
031 * <pre>
032 * root(4)
033 * |
034 * A(2)--------G(1)-----T(1)
035 * |           |
036 * A(1)--G(1)  T(1)
037 * </pre>
038 *<p>
039 * It supports addition of elements both as String and SymbolList. They should
040 * not be mixed together. The strings are also terminated internally, so it is
041 * possible to go and add more than one string to the suffix tree.
042 *</p>
043 *<p>
044 * Some more work need to be done on how data should be generated from this
045 * class. If you need something that's not in there, please e-mail the list at
046 * biojava-dev@biojava.org and I'll add it in there.
047 *<\p>
048 * @author Francois Pepin
049 * @version 1.3
050 */
051public class UkkonenSuffixTree{
052
053  public static final char DEFAULT_TERM_CHAR ='$';
054  private char terminationChar;
055
056  SuffixNode root;
057
058  public static final int TO_A_LEAF = -1;
059  private int e;
060
061  private CharSequence sequences;
062
063
064  /** Describes the rule that needs to be applied after walking down a tree. Put as a class variable because it can only return a single object (and I don't want to extend Node any further.
065   * rule 1: ended up at a leaf.
066   * rule 2: need to extend an internalNode.
067   * rule 3: would split an edge.
068   * rule 4: ended up in the middle of an edge.
069   * rule 5: ended up at an InternalNode
070   *
071   * Production 5 counts as rule 4 when adding a sequence, but the rules are also used to when searching the tree.
072   */
073  private int rule;
074
075
076  /** Initializes a new <code>UkkonenSuffixTree</code> instance.
077   */
078  public UkkonenSuffixTree(){
079    terminationChar = DEFAULT_TERM_CHAR;
080    root = new SimpleNode();
081    e=0;
082    sequences = "";
083  }
084
085  public UkkonenSuffixTree(String seqs){
086    this();
087    addSequence(seqs, "unnamed", false);
088  }
089
090  public UkkonenSuffixTree(FiniteAlphabet alpha){
091    this();
092  }
093
094  public void addSymbolList(SymbolList list, String name, boolean doNotTerminate) throws IllegalSymbolException{
095    if (!doNotTerminate)
096      list = new TerminatedSymbolList(list);
097
098    CharSequence seq =new SymbolListCharSequence(list);
099    System.out.println("Adding symbol list " + seq.toString());
100    //if (!doNotTerminate)
101
102    //if(!doNotTerminate)
103    //  seq = seq+terminationChar;
104    addPreppedSequence(seq);
105  }
106
107
108
109  /**
110   * Add a sequence into the tree. If there are more sequences, they should be separated by a terminationChar ($ by default). If none exist, it is assumed that there is only 1 continuous sequence to be added.
111   * @param seq the sequence/sequences to be added into the tree.
112   * @param doNotTerminate whether we should terminate the sequence if it's non-terminated.
113   */
114  public void addSequence(String seq, String name, boolean doNotTerminate){
115    int i;
116    int start, end;
117    ArrayList toBeAdded = new ArrayList();
118    Iterator iterator;
119    String subseq;
120
121    if (seq==null||seq.length()==0)
122      return;
123
124    //terminate the String if it's not terminated.
125    if(!doNotTerminate&&seq.charAt(seq.length()-1)!=terminationChar)
126      seq = seq+terminationChar;
127
128    //count how many termination Chars in in.
129    start =0;
130    for (i=0;seq.indexOf(terminationChar, i)!=-1;i=seq.indexOf(terminationChar, i)+1){
131      start=i;
132      end=seq.indexOf(terminationChar, i);
133      toBeAdded.add(seq.substring(start,end+1));
134    }
135
136    iterator = toBeAdded.iterator();
137    i=0;
138    while (iterator.hasNext()){
139      subseq=(String)iterator.next();
140      addPreppedSequence(subseq);
141      i++;
142    }
143  }
144
145  /** Add a single sequence into the tree.
146   *
147   * @param seq a <code>String</code> value
148   */
149  private void addPreppedSequence(CharSequence seq){
150    int i, gammaStart;
151    int j=0;
152    SuffixNode oldNode=null, newNode;
153    SuffixNode currentNode;
154    boolean canLinkJump = false;
155
156    //Puts i at the end of the previous sequences
157    i = sequences.length();
158    j=i;
159
160    sequences= sequences.toString()+seq.toString();
161
162    currentNode = root;
163    //phase i
164    for (; i<sequences.length();i++){
165      //System.out.println("Phase "+i);
166
167      e+=1;
168      //extension j;
169      for (;j<=i;j++){
170        //System.out.println("extension "+j);
171
172        //reset a couple of things...
173        newNode = null;
174
175        //find first node v at or above s[j-1,i] that is root or has a suffixLink
176        while (currentNode!=root&&currentNode.suffixLink==null&&canLinkJump)
177          currentNode = currentNode.parent;
178
179        if (root==currentNode){
180          currentNode=jumpTo(root,sequences,j,i+1);
181        }else{
182          if (canLinkJump)
183            currentNode = currentNode.suffixLink;
184          gammaStart = j+getPathLength(currentNode);
185
186          currentNode = jumpTo(currentNode,sequences,gammaStart,i+1);
187        }
188
189        if (rule==1)
190          addPositionToLeaf(j, currentNode);
191        if (rule==2)
192          doRule2(currentNode,i,j);
193        if (rule==3){
194          newNode=doRule3(currentNode,i,j);
195          currentNode=newNode;
196        }
197
198        if (rule==1||rule==4||rule==5)
199          currentNode = currentNode.parent;
200
201        if (oldNode!=null){
202          if (currentNode.isTerminal())
203            currentNode=currentNode.parent;
204
205          oldNode.suffixLink=currentNode;
206
207        }
208        oldNode=newNode;
209        newNode=null;
210
211        if (rule==1||rule==4||rule==5){
212          oldNode=null;
213          canLinkJump=false;
214          break;
215        }else
216          canLinkJump=true;
217
218
219      }//for phase i
220    }//for extension j
221    finishAddition();
222  }
223
224  /** This method is used to walk down the tree, from a given node. The
225   * <code>rule</code> variable can be used to check where the walk
226   *  stopped. Note that rule 3 means that the string used to walk down the
227   *  tree does not match (which is a bit different from the construction
228   *  where rule 3 implies that only the last character doesn't match.
229   *<p>
230   *  The String is encoded as a substring of a given source. This is done to
231   *  avoid replicating the string. To send walk down the string
232   *  <code>x</code> from the root, one would call walkTo(root,x,0,x.length()).
233   *
234   * @param starting the root of the subtree we're walking down form.
235   * @param source a superstring that contains the string we're using to
236   * walking down. source.subtring(from,to) should give the string we're
237   * walking down from.
238   * @param from the start position (inclusive) of the target string in the
239   * source.
240   * @param to the end position (exclusive) of the target string in the node.
241   * @return a <code>SuffixNode</code> that the walk stopped at. If the walk
242   * stopped inside an edge. (check the rule variable to see where it stopped).
243   */
244  public SuffixNode walkTo(SuffixNode starting, String source, int from, int to){
245    SuffixNode currentNode;
246    SuffixNode arrivedAt;
247    CharSequence edgeLabel;
248
249
250    currentNode=starting;
251    arrivedAt=starting;
252    while (from<to){
253      arrivedAt=(SuffixNode)currentNode.children.get(
254                                                     new Character(source.charAt(from)));
255      if (arrivedAt==null){
256        from=to;
257        arrivedAt=currentNode;
258        rule=2;
259        break;
260      }
261
262      edgeLabel = getEdgeLabel(arrivedAt);
263      if (edgeLabel.length()>=to-from){
264        if (edgeLabel.equals(source.substring(from,to))){
265          //rule 1 or 5,
266          if (arrivedAt.isTerminal())
267            rule=1;
268          else
269            rule=5;
270        }
271        if (edgeLabel.subSequence(0, to-from).equals(source.substring(from,to)))
272          rule=4;
273        else
274          rule=3;
275        from=to;
276      } else if (source.subSequence(from,from+edgeLabel.length())
277                 .equals(edgeLabel)) {
278        from+=edgeLabel.length();
279        currentNode=arrivedAt;
280      }
281
282      else{
283        rule=3;
284        from=to;
285      }
286    }
287
288    return arrivedAt;
289
290  }
291
292
293
294
295  /**
296   * Just like walkTo, but faster when used during tree construction, as it
297   * assumes that a mismatch can only occurs with the last character of the
298   * target string.
299   *
300   * @param starting the root of the subtree we're walking down form.
301   * @param source a superstring that contains the string we're using to
302   * walking down. source.subtring(from,to) should give the string we're
303   * walking down from.
304   * @param from the start position (inclusive) of the target string in the
305   * source.
306   * @param to the end position (exclusive) of the target string in the node.
307   * @return a <code>SuffixNode</code> that the walk stopped at. If the walk
308   * stopped inside an edge. (check the rule variable to see where it
309   * stopped).
310   */
311  public SuffixNode jumpTo(SuffixNode starting, CharSequence source, int from, int to){
312    SuffixNode currentNode;
313    SuffixNode arrivedAt;
314    boolean canGoDown = true;
315    int edgeLength;
316    int original=from;
317    SuffixNode originalNode=starting;
318
319
320    currentNode=starting;
321    arrivedAt=starting;
322
323    rule=0;
324
325    if (from==to){
326      rule=5;
327      return starting;
328    }
329
330
331    while (canGoDown){
332      //    if (source.substring(from, to).equals("CAGCG"))
333      //  System.out.println(to+" here to "+source.substring(from, to)+" "+(i++));
334
335      if (currentNode.isTerminal()){
336        System.out.println("ARRGH! at "+source.subSequence(original, to)+
337                           "("+from+","+original+","+to+
338                           ") from "+getLabel(originalNode));
339        //Something truly awful happened if this line is ever reached.
340        //This bug should be dead, but it it came back from the dead a couple
341        //of times already.
342      }
343
344      arrivedAt=(SuffixNode)currentNode.children.get(
345                                                     new Character(source.charAt(from)));
346      if (arrivedAt==null){
347        canGoDown=false;
348        arrivedAt=currentNode;
349        rule=2;
350        break;
351      }
352
353      edgeLength = getEdgeLength(arrivedAt);
354      if (edgeLength>=to-from){
355        int after = getPathEnd(arrivedAt)-getEdgeLength(arrivedAt)+to-from-1;
356        if (sequences.charAt(after)==
357            source.charAt(to-1)){
358          if (getEdgeLength(arrivedAt)==to-from){
359            if (arrivedAt.isTerminal())
360              rule=1;
361            else
362              rule=5;
363          }
364          else
365            rule=4;
366        }
367        else
368          rule=3;
369        canGoDown=false;
370        break;
371      }
372      from+=edgeLength;
373      currentNode=arrivedAt;
374
375    }//while canGoDOwn
376
377    return arrivedAt;
378  }
379
380  /******************************************************************
381   * Tree navigation methods
382   ******************************************************************/
383
384  protected int getEdgeLength(SuffixNode child){
385    int parentLength, childLength;
386    SuffixNode parent;
387    if (child==root)
388      return 0;
389    parent=child.parent;
390    parentLength = getPathLength(parent);
391    childLength = getPathLength(child);
392    if (childLength-parentLength<=0){
393
394      System.out.println("negative length "+(childLength-parentLength));
395
396      System.out.println(getLabel(child)+","+getLabel(parent));
397    }
398
399    return childLength-parentLength;
400  }
401
402  protected CharSequence getEdgeLabel(SuffixNode child){
403    return sequences.subSequence(
404                               child.labelStart+
405                               (getPathLength(child)-getEdgeLength(child)),
406                               (child.labelEnd==TO_A_LEAF)?
407                               e:child.labelEnd);
408  }
409
410
411  protected int getPathLength(SuffixNode node){
412    return getPathEnd(node)-node.labelStart;
413  }
414
415  protected int getPathEnd(SuffixNode node){
416    return node.labelEnd==TO_A_LEAF?e:node.labelEnd;
417  }
418
419  protected CharSequence getLabel(SuffixNode node){
420    if (node==root)
421      return "root";
422    else
423      return sequences.subSequence(
424                                 node.labelStart,
425                                 (node.labelEnd==TO_A_LEAF)?e:node.labelEnd).toString();
426  }
427
428
429  protected ArrayList getAllNodes(SuffixNode root, ArrayList list, boolean leavesOnly){
430    Iterator iterator;
431    if (list==null)
432      list= new ArrayList();
433    if (!leavesOnly||(leavesOnly&&root.isTerminal()))
434      list.add(root);
435    if (!root.isTerminal()){
436      iterator = root.children.values().iterator();
437      while (iterator.hasNext())
438        list=getAllNodes((SuffixNode)iterator.next(), list, leavesOnly);
439    }
440
441    return list;
442  }
443
444  public void printTree(){
445    ArrayList allNodes = getAllNodes(root, null, false);
446    for (int i=0;i<allNodes.size();i++){
447      SuffixNode node = (SuffixNode)allNodes.get(i);
448      if (node==root)
449        System.out.println("root");
450      else
451        System.out.println("node "+i+" label "+getLabel(node)+" attached to "+getLabel(node.parent));
452    }
453  }
454
455
456  public SuffixNode getRoot(){return root;}
457
458  /******************************************************************
459   * End Tree Navigation Methods
460   ******************************************************************/
461
462
463  /******************************************************************
464   * Tree modification methods
465   ******************************************************************/
466  private void addPositionToLeaf(int pos, SuffixNode leaf){
467    int[] moreLabels;
468    if (leaf.additionalLabels==null)
469      leaf.additionalLabels = new int[]{pos};
470    else{
471      moreLabels = new int[leaf.additionalLabels.length+1];
472      System.arraycopy(leaf.additionalLabels, 0, moreLabels, 0, leaf.additionalLabels.length);
473      moreLabels[moreLabels.length-1]=pos;
474      leaf.additionalLabels=moreLabels;
475    }
476
477  }
478
479  private void doRule2(SuffixNode parent, int splittingPos, int suffixStart){
480    //int number = getAllNodes(root, null, false).size();
481    SuffixNode leaf = new SimpleNode (parent, suffixStart);
482
483    parent.children.put(new Character(sequences.charAt(splittingPos)), leaf);
484    //System.out.println("rule 2: "+sequences.charAt(splittingPos)+" from "+getLabel(parent)+ " to "+getLabel(leaf));
485
486  }
487
488  private SuffixNode doRule3(SuffixNode child, int splittingPos, int suffixStart){
489    //      return toBeSplit.splitEdge(endOfSubSeq, sequences.charAt(endOfSubSeq),
490    //                         toBeSplit.getStart()+endOfSubSeq-rule3Position,
491    //                         suffixStart);
492    //int number = getAllNodes(root, null, false).size();
493    SuffixNode parent = child.parent;
494    SuffixNode middle= new SimpleNode(parent,suffixStart,splittingPos);
495    Character x=new Character(
496                              sequences.charAt(child.labelStart+getPathLength(child)-getEdgeLength(child)));
497
498    //System.out.println(parent.children.get(x)==child);
499
500    Character y=new Character(sequences.charAt(
501                                               child.labelStart
502                                               +getPathLength(child)-getEdgeLength(child)
503                                               +getEdgeLength(middle)
504                                               ));
505
506    parent.children.remove(x);
507    parent.children.put(x,middle);
508
509    middle.children.put(y,child);
510    child.parent=middle;
511    //System.out.println("rule 3: "+sequences.charAt(splittingPos)+" between "+getLabel(parent)+" and "+getLabel(child) + " Addition made:"+(number==getAllNodes(root, null,false).size()-1));
512    doRule2(middle,splittingPos,suffixStart);
513    return middle;
514  }
515
516  private void finishAddition(){
517    SuffixNode leaf;
518    ArrayList leaves = getAllNodes(root, null, true);
519    for (int i=0;i<leaves.size();i++){
520      leaf = (SuffixNode)leaves.get(i);
521      if (leaf.labelEnd==TO_A_LEAF)
522        leaf.labelEnd=e;
523    }
524
525  }
526  /******************************************************************
527   * end Tree modification methods
528   ******************************************************************/
529
530  public static abstract class SuffixNode {
531
532    static final int A_LEAF=-1;
533    SuffixNode parent;
534    SuffixNode suffixLink;
535    int labelStart, labelEnd;
536    HashMap children;
537    int[] additionalLabels;
538
539    /**
540       * Determine is this node is terminal (has no children).
541       *<p>
542       * Note that this only happens at the terminated node (if the sequences
543       * have been terminated.
544       *
545       * @return <code>true</code> if and only if this node has no children.
546       */
547
548    abstract public boolean isTerminal();
549
550    /**
551     * Determine if this node has a child corresponding to a given character
552     *
553     * @param i the first <code>Character</code> of the edge coming down this node.
554     * @return <code>true</code> if the node has a child going down from that character,
555     * false otherwise
556     */
557    abstract public boolean hasChild(Character i);
558
559    /** Gets the child of of a node that is coming down from that particular
560     * node. It returns null if no child exists or if no child exists starting
561     * on that particular character.
562     *
563     * @param i the first <code>Character</code> of the edge coming down this node
564     * @return the appropriate child <code>SuffixNode</code>, or null if it
565     * doesn't exists.
566     */
567    abstract SuffixNode getChild(Character i);
568    //abstract void addChild(SuffixTree tree, int i, SuffixNode n);
569
570    /**
571     * Returns the parent of this node, null if it's the root.
572     *
573     * @return the parent of this node, null if it's the root.
574     */
575    abstract SuffixNode getParent();
576
577  }
578
579
580  class SimpleNode extends SuffixNode{
581
582    //static final int A_LEAF=-1;
583    //SuffixNode parent;
584    //SuffixNode suffixLink;
585    //int labelStart, labelEnd;
586    //HashMap children;
587    //int[] additionalLabels;
588
589    /** Creates a root
590     */
591    public SimpleNode(){
592      parent=null;
593      suffixLink=null;
594      labelStart=0;
595      labelEnd=0;
596      children=new HashMap();
597      additionalLabels=null;
598    }
599
600    /** creates a leaf
601     * @param parent the parent node
602     * @param position the starting value of the suffix
603     */
604    public SimpleNode(SuffixNode parent, int position){
605      this();
606      this.parent=parent;
607      labelStart=position;
608      labelEnd = A_LEAF;
609      children=null;
610      //checkParent(this);
611    }
612
613    /** creates an internal node
614     * @param parent the parent of this node
615     * @param labelStart the starting point of the path label
616     * @param labelStop the ending point of the path label
617     */
618    public SimpleNode(SuffixNode parent, int labelStart, int labelStop){
619      this();
620      this.parent=parent;
621      this.labelStart=labelStart;
622      this.labelEnd=labelStop;
623      //checkParent(this);
624    }
625
626
627    public boolean isTerminal(){return children==null;}
628    public boolean hasChild(Character x){return getChild(x)!=null;}
629    public SuffixNode getChild(Character x){
630      return (children==null)?null:(SuffixNode)children.get(x);
631    }
632    public SuffixNode getParent(){return parent;}
633  }
634
635  public boolean subStringExists(String str)
636  {
637    walkTo(root, str, 0, str.length());
638    return (rule==1||rule==4||rule==5);
639  }
640
641  /**
642   * Stupid little wrapper to put a termination symbol at the end. A sublist
643   * that doesn't include the termination symbol will have the same alphabet as
644   * the original, one that does will have an alphabet that includes the
645   * termination symbol.
646   *
647   * Now includes some ugly hacks by Thomas Down to make ambiguities work
648   * nicer.
649   *
650   */
651  private class TerminatedSymbolList implements SymbolList
652  {
653    private SymbolList unterminated;
654    final Symbol TERMINATION_SYMBOL;
655    private AbstractAlphabet alpha;
656    private Map translationTable = new HashMap();
657
658    public TerminatedSymbolList(SymbolList unterminated)
659    {
660      this.unterminated=unterminated;
661      TERMINATION_SYMBOL = AlphabetManager.createSymbol("Termination");
662      FiniteAlphabet oldAlphabet = (FiniteAlphabet)unterminated.getAlphabet();
663      // Set set = AlphabetManager.getAllSymbols(oldAlphabet);
664      Set set = new HashSet();
665      for (Iterator i = oldAlphabet.iterator(); i.hasNext(); ) {
666          set.add(i.next());
667      }
668      set.add(TERMINATION_SYMBOL);
669      alpha = new SimpleAlphabet(set);
670      CharacterTokenization tokenizer =new CharacterTokenization(alpha, true);
671      tokenizer.bindSymbol(TERMINATION_SYMBOL, DEFAULT_TERM_CHAR);
672      SymbolTokenization sToke;
673      try{
674        sToke = oldAlphabet.getTokenization("token");
675      }
676      catch (BioException be){
677          throw new BioError("Internal error: failed to get SymbolTokenization for SymbolList alphabet", be);
678      }
679      if (sToke.getTokenType() != SymbolTokenization.CHARACTER)
680        throw new IllegalArgumentException("Only FiniteAlphabets using a char token are supported by UkkonenSuffixTree");
681      try{
682        for (Iterator i= AlphabetManager.getAllSymbols(oldAlphabet).iterator();i.hasNext();){
683          Symbol oldSymbol = (Symbol) i.next();
684          Symbol newSymbol;
685          if (oldSymbol instanceof AtomicSymbol) {
686              // The atomic symbols are identical between the two alphabets
687              newSymbol = oldSymbol;
688          } else {
689              // Port ambiguous symbols across
690              Set s = new HashSet();
691              for (Iterator si = ((FiniteAlphabet) oldSymbol.getMatches()).iterator(); si.hasNext(); ) {
692                  s.add(si.next());
693              }
694              newSymbol = alpha.getAmbiguity(s);
695          }
696          //takes first char of String, should work because we checked
697          //getTokenType above
698
699          char c = sToke.tokenizeSymbol(oldSymbol).charAt(0);
700          // System.err.println("Binding " + c);
701          tokenizer.bindSymbol(newSymbol, c);
702          translationTable.put(oldSymbol, newSymbol);
703        }
704        //This is really hacky, ambiguous symbols containing
705        //TERMINATION_SYMBOL are impossible at this point, so I just define
706        //the Tokenization to treat them as TERMINATION_SYMBOL so that the
707        //code that likes to loop through doesn't go titty up.
708        for (Iterator i= AlphabetManager.getAllSymbols(alpha).iterator();i.hasNext();){
709          Symbol s = (Symbol) i.next();
710          Alphabet mathes = s.getMatches();
711          if (mathes.contains(TERMINATION_SYMBOL))
712              tokenizer.bindSymbol(s,DEFAULT_TERM_CHAR);
713        }
714
715      }catch(IllegalSymbolException ise){
716        throw new AssertionFailure("Assertion Failure: This alphabet has been custom made so this doesn't happen",ise);
717      }
718
719      alpha.putTokenization("token", tokenizer);
720    }
721
722
723      // Implementation of org.biojava.utils.Changeable
724
725    public void addChangeListener(ChangeListener changeListener, ChangeType changeType) {
726      unterminated.addChangeListener(changeListener,changeType);
727    }
728
729    public void addChangeListener(ChangeListener changeListener) {
730      unterminated.addChangeListener(changeListener);
731    }
732
733    public void removeChangeListener(ChangeListener changeListener, ChangeType changeType) {
734      unterminated.removeChangeListener(changeListener,changeType);
735    }
736
737    public void removeChangeListener(ChangeListener changeListener) {
738      unterminated.removeChangeListener(changeListener);
739    }
740
741    public boolean isUnchanging(ChangeType changeType) {
742      return unterminated.isUnchanging(changeType);
743    }
744
745    // Implementation of org.biojava.bio.symbol.SymbolList
746
747    public int length() {
748      return unterminated.length()+1;
749    }
750
751    public Iterator iterator() {
752      return unterminated.iterator();
753    }
754
755    public SymbolList subList(int n, int n1) throws IndexOutOfBoundsException {
756      List list;
757      if (n1!=unterminated.length()+1)
758        return unterminated.subList(n,n1);
759      else{
760        list = unterminated.subList(n,n1-1).toList();
761        list.add(TERMINATION_SYMBOL);
762        try{
763          return new SimpleSymbolList(getAlphabet(),list);
764        }
765        catch(IllegalSymbolException e){
766          throw new AssertionFailure("Assertion Failure: This alphabet was created just so it doesn't do this",e);
767        }
768      }
769    }
770
771    public Alphabet getAlphabet() {
772      return alpha;
773    }
774
775    public Symbol symbolAt(int n) throws IndexOutOfBoundsException {
776      if (n!=length())
777        return (Symbol) translationTable.get(unterminated.symbolAt(n));
778      else
779        return TERMINATION_SYMBOL;
780    }
781
782    public List toList() {
783      List answer = unterminated.toList();
784      answer.add(TERMINATION_SYMBOL);
785      return answer;
786    }
787
788    public String seqString() {
789      try{
790        SymbolTokenization toke = getAlphabet().getTokenization("token");
791        return unterminated.seqString()+toke.tokenizeSymbol(TERMINATION_SYMBOL);
792      } catch (BioException ex) {
793        throw new BioRuntimeException("Couldn't tokenize sequence", ex);
794      }
795    }
796
797      public String subStr(int n, int n1) throws IndexOutOfBoundsException {
798      return subList(n, n1).seqString();
799    }
800
801    /**
802     * Describe <code>edit</code> method here.
803     *
804     * @param edit an <code>Edit</code> value
805     * @exception IndexOutOfBoundsException if an error occurs
806     * @exception IllegalAlphabetException if an error occurs
807     * @exception ChangeVetoException if an error occurs
808     */
809    public void edit(Edit edit) throws IndexOutOfBoundsException, IllegalAlphabetException, ChangeVetoException {
810      throw new ChangeVetoException("TerminatedSymbolList is immutable");
811    }
812  }
813}