001/*
002 * RichSequenceHandler.java
003 *
004 * Created on March 7, 2006, 3:12 PM
005 */
006
007package org.biojavax.bio.seq;
008
009import java.util.Iterator;
010import java.util.List;
011
012import org.biojava.bio.symbol.Edit;
013import org.biojava.bio.symbol.IllegalAlphabetException;
014import org.biojava.bio.symbol.Symbol;
015import org.biojava.bio.symbol.SymbolList;
016import org.biojava.utils.ChangeVetoException;
017
018/**
019 * An interface for classes that know how to handle subsequence operations.
020 * Implementations may be optimized so that they perform more efficiently in
021 * certain conditions. For example a subsequence operation on a huge BioSQL
022 * backed <code>RichSequence</code> could be optimized so that the operation
023 * is performed more efficiently than dragging the whole sequence to memory and
024 * then doing the operation.
025 *
026 * Implementations of <code>RichSequence</code> should generally delegate
027 * <code>symbolAt(int index)</code>, <code>subStr(int start, int end)</code>,
028 * <code>subList(int start, int end)</code> and subSequence(int start, int end)
029 * to some implementation of this interface.
030 *
031 * @author Mark Schreiber
032 * @author Richard Holland
033 * @since 1.5
034 */
035public interface RichSequenceHandler {
036    
037  /**
038   * Apply an edit to the Sequence as specified by the edit object.
039   *
040   * <h2>Description</h2>
041   *
042   * <p>
043   * All edits can be broken down into a series of operations that change
044   * contiguous blocks of the sequence. This represent a one of those operations.
045   * </p>
046   *
047   * <p>
048   * When applied, this Edit will replace 'length' number of symbols starting a
049   * position 'pos' by the SymbolList 'replacement'. This allow to do insertions
050   * (length=0), deletions (replacement=SymbolList.EMPTY_LIST) and replacements
051   * (length>=1 and replacement.length()>=1).
052   * </p>
053   *
054   * <p>
055   * The pos and pos+length should always be valid positions on the SymbolList
056   * to:
057   * <ul>
058   * <li>be edited (between 0 and symL.length()+1).</li>
059   * <li>To append to a sequence, pos=symL.length()+1, pos=0.</li>
060   * <li>To insert something at the beginning of the sequence, set pos=1 and
061   * length=0.</li>
062   * </ul>
063   * </p>
064   *
065   * <h2>Examples</h2>
066   *
067   * <code><pre>
068   * RichSequence seq = //code to initialize RichSequence
069   * System.out.println(seq.seqString());
070   *
071   * // delete 5 bases from position 4
072   * Edit ed = new Edit(4, 5, SymbolList.EMPTY_LIST);
073   * seq.edit(ed);
074   * System.out.println(seq.seqString());
075   *
076   * // delete one base from the start
077   * ed = new Edit(1, 1, SymbolList.EMPTY_LIST);
078   * seq.edit(ed);
079   *
080   * // delete one base from the end
081   * ed = new Edit(seq.length(), 1, SymbolList.EMPTY_LIST);
082   * seq.edit(ed);
083   * System.out.println(seq.seqString());
084   *
085   * // overwrite 2 bases from position 3 with "tt"
086   * ed = new Edit(3, 2, DNATools.createDNA("tt"));
087   * seq.edit(ed);
088   * System.out.println(seq.seqString());
089   *
090   * // add 6 bases to the start
091   * ed = new Edit(1, 0, DNATools.createDNA("aattgg");
092   * seq.edit(ed);
093   * System.out.println(seq.seqString());
094   *
095   * // add 4 bases to the end
096   * ed = new Edit(seq.length() + 1, 0, DNATools.createDNA("tttt"));
097   * seq.edit(ed);
098   * System.out.println(seq.seqString());
099   *
100   * // full edit
101   * ed = new Edit(3, 2, DNATools.createDNA("aatagaa");
102   * seq.edit(ed);
103   * System.out.println(seq.seqString());
104   * </pre></code>
105   *
106   * @param edit the Edit to perform
107   * @throws IndexOutOfBoundsException if the edit does not lie within the
108   *         SymbolList
109   * @throws IllegalAlphabetException if the SymbolList to insert has an
110   *         incompatible alphabet
111   * @throws ChangeVetoException  if either the SymboList does not support the
112   *         edit, or if the change was vetoed
113   */
114    public void edit(RichSequence seq, Edit edit) throws IndexOutOfBoundsException, IllegalAlphabetException, ChangeVetoException;
115    
116  /**
117   * Return the symbol at index, counting from 1.
118   *
119   * @param index the offset into this SymbolList
120   * @return  the Symbol at that index
121   * @throws IndexOutOfBoundsException if index is less than 1, or greater than
122   *                                   the length of the symbol list
123   */
124    public Symbol symbolAt(RichSequence seq, int index) throws IndexOutOfBoundsException;
125    
126   /**
127   * Returns a List of symbols.
128   * <p>
129   * This should be an immutable list of symbols or a copy.
130   *
131   * @return  a List of Symbols
132   */
133    public List<Symbol> toList(RichSequence seq);
134    
135  /**
136   * Return a region of this sequence as a String.
137   * <p>
138   * This should use the same rules as seqString.
139   *
140   * @param start  the first symbol to include
141   * @param end the last symbol to include
142   * @return the string representation
143   * @throws IndexOutOfBoundsException if either start or end are not within the
144   *         SymbolList
145   */
146    public String subStr(RichSequence seq, int start, int end) throws IndexOutOfBoundsException;
147    
148  /**
149   * Return a new SymbolList for the symbols start to end inclusive.
150   * <p>
151   * The resulting SymbolList will count from 1 to (end-start + 1) inclusive, and
152   * refer to the symbols start to end of the original sequence.
153   *
154   * @param start the first symbol of the new SymbolList
155   * @param end the last symbol (inclusive) of the new SymbolList
156   */
157    public SymbolList subList(RichSequence seq, int start, int end) throws IndexOutOfBoundsException;
158    
159   /**
160   * Stringify this Sequences.
161   * <p>
162   * It is expected that this will use the symbol's token to render each
163   * symbol. It should be parsable back into a SymbolList using the default
164   * token parser for this alphabet.
165   *
166   * @return  a string representation of the symbol list
167   */
168    public String seqString(RichSequence seq);
169    
170   /**
171   * An Iterator over all Symbols in this SymbolList.
172   * <p>
173   * This is an ordered iterator over the Symbols. It cannot be used
174   * to edit the underlying symbols.
175   *
176   * @return  an iterator
177   */
178    public Iterator<Symbol> iterator(RichSequence seq);
179}