001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.symbol; 024 025import java.util.Iterator; 026import java.util.List; 027 028import org.biojava.utils.ChangeType; 029import org.biojava.utils.ChangeVetoException; 030import org.biojava.utils.Changeable; 031 032/** 033 * A sequence of symbols that belong to an alphabet. 034 * <p> 035 * This uses biological coordinates (1 to length). 036 * 037 * @author Matthew Pocock 038 * @author Mark Schreiber 039 * @author Francois Pepin 040 */ 041public interface SymbolList extends Changeable { 042 /** 043 * Signals that the SymbolList is being edited. The getChange field of the 044 * event should contain the SymbolList.Edit object describing the change. 045 */ 046 public static final ChangeType EDIT = new ChangeType( 047 "the SymbolList has been edited", 048 "org.biojava.bio.symbol.SymbolList", 049 "EDIT" 050 ); 051 052 /** 053 * The alphabet that this SymbolList is over. 054 * <p> 055 * Every symbol within this SymbolList is a member of this alphabet. 056 * <code>alphabet.contains(symbol) == true</code> 057 * for each symbol that is within this sequence. 058 * 059 * @return the alphabet 060 */ 061 Alphabet getAlphabet(); 062 063 /** 064 * The number of symbols in this SymbolList. 065 * 066 * @return the length 067 */ 068 int length(); 069 070 /** 071 * Return the symbol at index, counting from 1. 072 * 073 * @param index the offset into this SymbolList 074 * @return the Symbol at that index 075 * @throws IndexOutOfBoundsException if index is less than 1, or greater than 076 * the length of the symbol list 077 */ 078 Symbol symbolAt(int index) throws IndexOutOfBoundsException; 079 080 /** 081 * Returns a List of symbols. 082 * <p> 083 * This is an immutable list of symbols. Do not edit it. 084 * 085 * @return a List of Symbols 086 */ 087 List<Symbol> toList(); 088 089 /** 090 * An Iterator over all Symbols in this SymbolList. 091 * <p> 092 * This is an ordered iterator over the Symbols. It cannot be used 093 * to edit the underlying symbols. 094 * 095 * @return an iterator 096 */ 097 Iterator<Symbol> iterator(); 098 099 /** 100 * Return a new SymbolList for the symbols start to end inclusive. 101 * <p> 102 * The resulting SymbolList will count from 1 to (end-start + 1) inclusive, and 103 * refer to the symbols start to end of the original sequence. 104 * 105 * @param start the first symbol of the new SymbolList 106 * @param end the last symbol (inclusive) of the new SymbolList 107 */ 108 SymbolList subList(int start, int end) throws IndexOutOfBoundsException; 109 110 /** 111 * Stringify this symbol list. 112 * <p> 113 * It is expected that this will use the symbol's token to render each 114 * symbol. It should be parsable back into a SymbolList using the default 115 * token parser for this alphabet. 116 * 117 * @return a string representation of the symbol list 118 */ 119 String seqString(); 120 121 /** 122 * Return a region of this symbol list as a String. 123 * <p> 124 * This should use the same rules as seqString. 125 * 126 * @param start the first symbol to include 127 * @param end the last symbol to include 128 * @return the string representation 129 * @throws IndexOutOfBoundsException if either start or end are not within the 130 * SymbolList 131 */ 132 String subStr(int start, int end) throws IndexOutOfBoundsException; 133 134 /** 135 * Apply an edit to the SymbolList as specified by the edit object. 136 * 137 * <h2>Description</h2> 138 * 139 * <p> 140 * All edits can be broken down into a series of operations that change 141 * contiguous blocks of the sequence. This represent a one of those operations. 142 * </p> 143 * 144 * <p> 145 * When applied, this Edit will replace 'length' number of symbols starting a 146 * position 'pos' by the SymbolList 'replacement'. This allow to do insertions 147 * (length=0), deletions (replacement=SymbolList.EMPTY_LIST) and replacements 148 * (length>=1 and replacement.length()>=1). 149 * </p> 150 * 151 * <p> 152 * The pos and pos+length should always be valid positions on the SymbolList 153 * to: 154 * <ul> 155 * <li>be edited (between 0 and symL.length()+1).</li> 156 * <li>To append to a sequence, pos=symL.length()+1, pos=0.</li> 157 * <li>To insert something at the beginning of the sequence, set pos=1 and 158 * length=0.</li> 159 * </ul> 160 * </p> 161 * 162 * <h2>Examples</h2> 163 * 164 * <code><pre> 165 * SymbolList seq = DNATools.createDNA("atcaaaaacgctagc"); 166 * System.out.println(seq.seqString()); 167 * 168 * // delete 5 bases from position 4 169 * Edit ed = new Edit(4, 5, SymbolList.EMPTY_LIST); 170 * seq.edit(ed); 171 * System.out.println(seq.seqString()); 172 * 173 * // delete one base from the start 174 * ed = new Edit(1, 1, SymbolList.EMPTY_LIST); 175 * seq.edit(ed); 176 * 177 * // delete one base from the end 178 * ed = new Edit(seq.length(), 1, SymbolList.EMPTY_LIST); 179 * seq.edit(ed); 180 * System.out.println(seq.seqString()); 181 * 182 * // overwrite 2 bases from position 3 with "tt" 183 * ed = new Edit(3, 2, DNATools.createDNA("tt")); 184 * seq.edit(ed); 185 * System.out.println(seq.seqString()); 186 * 187 * // add 6 bases to the start 188 * ed = new Edit(1, 0, DNATools.createDNA("aattgg"); 189 * seq.edit(ed); 190 * System.out.println(seq.seqString()); 191 * 192 * // add 4 bases to the end 193 * ed = new Edit(seq.length() + 1, 0, DNATools.createDNA("tttt")); 194 * seq.edit(ed); 195 * System.out.println(seq.seqString()); 196 * 197 * // full edit 198 * ed = new Edit(3, 2, DNATools.createDNA("aatagaa"); 199 * seq.edit(ed); 200 * System.out.println(seq.seqString()); 201 * </pre></code> 202 * 203 * @param edit the Edit to perform 204 * @throws IndexOutOfBoundsException if the edit does not lie within the 205 * SymbolList 206 * @throws IllegalAlphabetException if the SymbolList to insert has an 207 * incompatible alphabet 208 * @throws ChangeVetoException if either the SymboList does not support the 209 * edit, or if the change was vetoed 210 */ 211 void edit(Edit edit) 212 throws IndexOutOfBoundsException, IllegalAlphabetException, 213 ChangeVetoException; 214 215 /** 216 * A useful object that represents an empty symbol list, to avoid returning 217 * null. 218 * 219 */ 220 static final SymbolList EMPTY_LIST = new EmptySymbolList(); 221 222}