001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.symbol;
024
025import java.util.Iterator;
026import java.util.List;
027
028import org.biojava.utils.ChangeType;
029import org.biojava.utils.ChangeVetoException;
030import org.biojava.utils.Changeable;
031
032/**
033 * A sequence of symbols that belong to an alphabet.
034 * <p>
035 * This uses biological coordinates (1 to length).
036 *
037 * @author Matthew Pocock
038 * @author Mark Schreiber
039 * @author Francois Pepin
040 */
041public interface SymbolList extends Changeable {
042  /**
043   * Signals that the SymbolList is being edited. The getChange field of the
044   * event should contain the SymbolList.Edit object describing the change.
045   */
046  public static final ChangeType EDIT = new ChangeType(
047    "the SymbolList has been edited",
048    "org.biojava.bio.symbol.SymbolList",
049    "EDIT"
050  );
051  
052  /**
053   * The alphabet that this SymbolList is over.
054   * <p>
055   * Every symbol within this SymbolList is a member of this alphabet.
056   * <code>alphabet.contains(symbol) == true</code>
057   * for each symbol that is within this sequence.
058   *
059   * @return  the alphabet
060   */
061  Alphabet getAlphabet();
062  
063  /**
064   * The number of symbols in this SymbolList.
065   *
066   * @return  the length
067   */
068  int length();
069
070  /**
071   * Return the symbol at index, counting from 1.
072   *
073   * @param index the offset into this SymbolList
074   * @return  the Symbol at that index
075   * @throws IndexOutOfBoundsException if index is less than 1, or greater than
076   *                                   the length of the symbol list
077   */
078  Symbol symbolAt(int index) throws IndexOutOfBoundsException;
079  
080  /**
081   * Returns a List of symbols.
082   * <p>
083   * This is an immutable list of symbols. Do not edit it.
084   *
085   * @return  a List of Symbols
086   */
087  List<Symbol> toList();
088  
089  /**
090   * An Iterator over all Symbols in this SymbolList.
091   * <p>
092   * This is an ordered iterator over the Symbols. It cannot be used
093   * to edit the underlying symbols.
094   *
095   * @return  an iterator
096   */
097  Iterator<Symbol> iterator();
098  
099  /**
100   * Return a new SymbolList for the symbols start to end inclusive.
101   * <p>
102   * The resulting SymbolList will count from 1 to (end-start + 1) inclusive, and
103   * refer to the symbols start to end of the original sequence.
104   *
105   * @param start the first symbol of the new SymbolList
106   * @param end the last symbol (inclusive) of the new SymbolList
107   */
108  SymbolList subList(int start, int end) throws IndexOutOfBoundsException;
109    
110  /**
111   * Stringify this symbol list.
112   * <p>
113   * It is expected that this will use the symbol's token to render each
114   * symbol. It should be parsable back into a SymbolList using the default
115   * token parser for this alphabet.
116   *
117   * @return  a string representation of the symbol list
118   */
119  String seqString();
120  
121  /**
122   * Return a region of this symbol list as a String.
123   * <p>
124   * This should use the same rules as seqString.
125   *
126   * @param start  the first symbol to include
127   * @param end the last symbol to include
128   * @return the string representation
129   * @throws IndexOutOfBoundsException if either start or end are not within the
130   *         SymbolList
131   */
132  String subStr(int start, int end) throws IndexOutOfBoundsException;
133  
134  /**
135   * Apply an edit to the SymbolList as specified by the edit object.
136   *
137   * <h2>Description</h2>
138   *
139   * <p>
140   * All edits can be broken down into a series of operations that change
141   * contiguous blocks of the sequence. This represent a one of those operations.
142   * </p>
143   *
144   * <p>
145   * When applied, this Edit will replace 'length' number of symbols starting a
146   * position 'pos' by the SymbolList 'replacement'. This allow to do insertions
147   * (length=0), deletions (replacement=SymbolList.EMPTY_LIST) and replacements
148   * (length>=1 and replacement.length()>=1).
149   * </p>
150   *
151   * <p>
152   * The pos and pos+length should always be valid positions on the SymbolList
153   * to:
154   * <ul>
155   * <li>be edited (between 0 and symL.length()+1).</li>
156   * <li>To append to a sequence, pos=symL.length()+1, pos=0.</li>
157   * <li>To insert something at the beginning of the sequence, set pos=1 and
158   * length=0.</li>
159   * </ul>
160   * </p>
161   *
162   * <h2>Examples</h2>
163   *
164   * <code><pre>
165   * SymbolList seq = DNATools.createDNA("atcaaaaacgctagc");
166   * System.out.println(seq.seqString());
167   *
168   * // delete 5 bases from position 4
169   * Edit ed = new Edit(4, 5, SymbolList.EMPTY_LIST);
170   * seq.edit(ed);
171   * System.out.println(seq.seqString());
172   *
173   * // delete one base from the start
174   * ed = new Edit(1, 1, SymbolList.EMPTY_LIST);
175   * seq.edit(ed);
176   *
177   * // delete one base from the end
178   * ed = new Edit(seq.length(), 1, SymbolList.EMPTY_LIST);
179   * seq.edit(ed);
180   * System.out.println(seq.seqString());
181   *
182   * // overwrite 2 bases from position 3 with "tt"
183   * ed = new Edit(3, 2, DNATools.createDNA("tt"));
184   * seq.edit(ed);
185   * System.out.println(seq.seqString());
186   *
187   * // add 6 bases to the start
188   * ed = new Edit(1, 0, DNATools.createDNA("aattgg");
189   * seq.edit(ed);
190   * System.out.println(seq.seqString());
191   *
192   * // add 4 bases to the end
193   * ed = new Edit(seq.length() + 1, 0, DNATools.createDNA("tttt"));
194   * seq.edit(ed);
195   * System.out.println(seq.seqString());
196   *
197   * // full edit
198   * ed = new Edit(3, 2, DNATools.createDNA("aatagaa");
199   * seq.edit(ed);
200   * System.out.println(seq.seqString());
201   * </pre></code>
202   *
203   * @param edit the Edit to perform
204   * @throws IndexOutOfBoundsException if the edit does not lie within the
205   *         SymbolList
206   * @throws IllegalAlphabetException if the SymbolList to insert has an
207   *         incompatible alphabet
208   * @throws ChangeVetoException  if either the SymboList does not support the
209   *         edit, or if the change was vetoed
210   */
211  void edit(Edit edit)
212  throws IndexOutOfBoundsException, IllegalAlphabetException,
213  ChangeVetoException;
214  
215  /**
216   * A useful object that represents an empty symbol list, to avoid returning
217   * null.
218   *
219   */
220  static final SymbolList EMPTY_LIST = new EmptySymbolList();
221
222}