001/* 002 * RichSequenceHandler.java 003 * 004 * Created on March 7, 2006, 3:12 PM 005 */ 006 007package org.biojavax.bio.seq; 008 009import java.util.Iterator; 010import java.util.List; 011 012import org.biojava.bio.symbol.Edit; 013import org.biojava.bio.symbol.IllegalAlphabetException; 014import org.biojava.bio.symbol.Symbol; 015import org.biojava.bio.symbol.SymbolList; 016import org.biojava.utils.ChangeVetoException; 017 018/** 019 * An interface for classes that know how to handle subsequence operations. 020 * Implementations may be optimized so that they perform more efficiently in 021 * certain conditions. For example a subsequence operation on a huge BioSQL 022 * backed <code>RichSequence</code> could be optimized so that the operation 023 * is performed more efficiently than dragging the whole sequence to memory and 024 * then doing the operation. 025 * 026 * Implementations of <code>RichSequence</code> should generally delegate 027 * <code>symbolAt(int index)</code>, <code>subStr(int start, int end)</code>, 028 * <code>subList(int start, int end)</code> and subSequence(int start, int end) 029 * to some implementation of this interface. 030 * 031 * @author Mark Schreiber 032 * @author Richard Holland 033 * @since 1.5 034 */ 035public interface RichSequenceHandler { 036 037 /** 038 * Apply an edit to the Sequence as specified by the edit object. 039 * 040 * <h2>Description</h2> 041 * 042 * <p> 043 * All edits can be broken down into a series of operations that change 044 * contiguous blocks of the sequence. This represent a one of those operations. 045 * </p> 046 * 047 * <p> 048 * When applied, this Edit will replace 'length' number of symbols starting a 049 * position 'pos' by the SymbolList 'replacement'. This allow to do insertions 050 * (length=0), deletions (replacement=SymbolList.EMPTY_LIST) and replacements 051 * (length>=1 and replacement.length()>=1). 052 * </p> 053 * 054 * <p> 055 * The pos and pos+length should always be valid positions on the SymbolList 056 * to: 057 * <ul> 058 * <li>be edited (between 0 and symL.length()+1).</li> 059 * <li>To append to a sequence, pos=symL.length()+1, pos=0.</li> 060 * <li>To insert something at the beginning of the sequence, set pos=1 and 061 * length=0.</li> 062 * </ul> 063 * </p> 064 * 065 * <h2>Examples</h2> 066 * 067 * <code><pre> 068 * RichSequence seq = //code to initialize RichSequence 069 * System.out.println(seq.seqString()); 070 * 071 * // delete 5 bases from position 4 072 * Edit ed = new Edit(4, 5, SymbolList.EMPTY_LIST); 073 * seq.edit(ed); 074 * System.out.println(seq.seqString()); 075 * 076 * // delete one base from the start 077 * ed = new Edit(1, 1, SymbolList.EMPTY_LIST); 078 * seq.edit(ed); 079 * 080 * // delete one base from the end 081 * ed = new Edit(seq.length(), 1, SymbolList.EMPTY_LIST); 082 * seq.edit(ed); 083 * System.out.println(seq.seqString()); 084 * 085 * // overwrite 2 bases from position 3 with "tt" 086 * ed = new Edit(3, 2, DNATools.createDNA("tt")); 087 * seq.edit(ed); 088 * System.out.println(seq.seqString()); 089 * 090 * // add 6 bases to the start 091 * ed = new Edit(1, 0, DNATools.createDNA("aattgg"); 092 * seq.edit(ed); 093 * System.out.println(seq.seqString()); 094 * 095 * // add 4 bases to the end 096 * ed = new Edit(seq.length() + 1, 0, DNATools.createDNA("tttt")); 097 * seq.edit(ed); 098 * System.out.println(seq.seqString()); 099 * 100 * // full edit 101 * ed = new Edit(3, 2, DNATools.createDNA("aatagaa"); 102 * seq.edit(ed); 103 * System.out.println(seq.seqString()); 104 * </pre></code> 105 * 106 * @param edit the Edit to perform 107 * @throws IndexOutOfBoundsException if the edit does not lie within the 108 * SymbolList 109 * @throws IllegalAlphabetException if the SymbolList to insert has an 110 * incompatible alphabet 111 * @throws ChangeVetoException if either the SymboList does not support the 112 * edit, or if the change was vetoed 113 */ 114 public void edit(RichSequence seq, Edit edit) throws IndexOutOfBoundsException, IllegalAlphabetException, ChangeVetoException; 115 116 /** 117 * Return the symbol at index, counting from 1. 118 * 119 * @param index the offset into this SymbolList 120 * @return the Symbol at that index 121 * @throws IndexOutOfBoundsException if index is less than 1, or greater than 122 * the length of the symbol list 123 */ 124 public Symbol symbolAt(RichSequence seq, int index) throws IndexOutOfBoundsException; 125 126 /** 127 * Returns a List of symbols. 128 * <p> 129 * This should be an immutable list of symbols or a copy. 130 * 131 * @return a List of Symbols 132 */ 133 public List<Symbol> toList(RichSequence seq); 134 135 /** 136 * Return a region of this sequence as a String. 137 * <p> 138 * This should use the same rules as seqString. 139 * 140 * @param start the first symbol to include 141 * @param end the last symbol to include 142 * @return the string representation 143 * @throws IndexOutOfBoundsException if either start or end are not within the 144 * SymbolList 145 */ 146 public String subStr(RichSequence seq, int start, int end) throws IndexOutOfBoundsException; 147 148 /** 149 * Return a new SymbolList for the symbols start to end inclusive. 150 * <p> 151 * The resulting SymbolList will count from 1 to (end-start + 1) inclusive, and 152 * refer to the symbols start to end of the original sequence. 153 * 154 * @param start the first symbol of the new SymbolList 155 * @param end the last symbol (inclusive) of the new SymbolList 156 */ 157 public SymbolList subList(RichSequence seq, int start, int end) throws IndexOutOfBoundsException; 158 159 /** 160 * Stringify this Sequences. 161 * <p> 162 * It is expected that this will use the symbol's token to render each 163 * symbol. It should be parsable back into a SymbolList using the default 164 * token parser for this alphabet. 165 * 166 * @return a string representation of the symbol list 167 */ 168 public String seqString(RichSequence seq); 169 170 /** 171 * An Iterator over all Symbols in this SymbolList. 172 * <p> 173 * This is an ordered iterator over the Symbols. It cannot be used 174 * to edit the underlying symbols. 175 * 176 * @return an iterator 177 */ 178 public Iterator<Symbol> iterator(RichSequence seq); 179}