001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq; 023 024import java.util.Iterator; 025 026import org.biojava.bio.Annotation; 027import org.biojava.bio.BioException; 028import org.biojava.bio.SmallAnnotation; 029import org.biojava.bio.seq.impl.DummySequence; 030import org.biojava.bio.seq.impl.RevCompSequence; 031import org.biojava.bio.seq.impl.SimpleGappedSequence; 032import org.biojava.bio.seq.impl.SimpleSequence; 033import org.biojava.bio.seq.impl.SubSequence; 034import org.biojava.bio.seq.impl.ViewSequence; 035import org.biojava.bio.symbol.Alphabet; 036import org.biojava.bio.symbol.DummySymbolList; 037import org.biojava.bio.symbol.IllegalAlphabetException; 038import org.biojava.bio.symbol.IllegalSymbolException; 039import org.biojava.bio.symbol.RangeLocation; 040import org.biojava.bio.symbol.Symbol; 041import org.biojava.bio.symbol.SymbolList; 042import org.biojava.utils.ChangeVetoException; 043 044/** 045 * Methods for manipulating sequences. 046 * 047 * @author Matthew Pocock 048 */ 049public final class SequenceTools { 050 private SequenceTools() { 051 } 052 053 public static Sequence createSequence( 054 SymbolList syms, String uri, String name, Annotation ann 055 ) { 056 return new SimpleSequence(syms, uri, name, ann); 057 } 058 059 /** 060 * Extract a sub-sequence from a sequence. 061 * 062 * <p> 063 * The sub-sequence will be indexed from 1 through to (end-start+1). An index 064 * of i in the sub-sequence corresponds to (i+start-1) in the original. 065 * All features from the original sequence will be projected down into this 066 * co-ordinate system. All features overlapping the edges will be given fuzzy 067 * locations. 068 * </p> 069 * 070 * @param seq the sequence to sub-sequence 071 * @param start the first index to include in the sub-sequence 072 * @param end the last index to include in the sub-sequence 073 * @return a view Sequence for this region 074 * @throws IndexOutOfBoundsException if start or end are not in seq, or if 075 * end < start 076 */ 077 public static Sequence subSequence(Sequence seq, int start, int end) 078 throws IndexOutOfBoundsException { 079 return new SubSequence(seq, start, end); 080 } 081 082 /** 083 * Extract a sub-sequence from a sequence. 084 * 085 * <p> 086 * The sub-sequence will be indexed from 1 through to (end-start+1). An index 087 * of i in the sub-sequence corresponds to (i+start-1) in the original. 088 * All features from the original sequence will be projected down into this 089 * co-ordinate system. All features overlapping the edges will be given fuzzy 090 * locations. 091 * </p> 092 * 093 * @param seq the sequence to sub-sequence 094 * @param start the first index to include in the sub-sequence 095 * @param end the last index to include in the sub-sequence 096 * @param name a new name to give to this sub-sequence 097 * @return a view Sequence for this region 098 * @throws IndexOutOfBoundsException if start or end are not in seq, or if 099 * end < start 100 */ 101 public static Sequence subSequence(Sequence seq, int start, int end, String name) 102 throws IndexOutOfBoundsException { 103 return new SubSequence(seq, start, end, name); 104 } 105 106 /** 107 * Extract a sub-sequence from a sequence. 108 * 109 * <p> 110 * The sub-sequence will be indexed from 1 through to (end-start+1). If the 111 * strand is NEGATIVE, all features will be flipped in the same manner as 112 * the reverseComplement method. If it is UNKNOWN or 113 * POSITIVE, then this is identical to the other subSequence methods. 114 * </p> 115 * 116 * @param seq the sequence to sub-sequence 117 * @param start the first index to include in the sub-sequence 118 * @param end the last index to include in the sub-sequence 119 * @param name a new name to give to this sub-sequence 120 * @param strand a StrandedFeature.Strand indicating which strand the 121 * sub-sequence should be on 122 * @return a view Sequence for this region 123 * @throws IndexOutOfBoundsException if start or end are not in seq, or if 124 * end < start 125 */ 126 public static Sequence subSequence( 127 Sequence seq, 128 int start, 129 int end, 130 String name, 131 StrandedFeature.Strand strand 132 ) throws IndexOutOfBoundsException, IllegalAlphabetException { 133 Sequence s = subSequence(seq, start, end, name); 134 if(strand == StrandedFeature.NEGATIVE) { 135 s = reverseComplement(s); 136 } 137 return s; 138 } 139 140 /** 141 * Reverse-complement a sequence, and flip all of its features. 142 * 143 * @param seq the Sequence to reverse-complement 144 * @return the flipped Sequence 145 * @throws IllegalAlphabetException if the symbols in the sequence can not be 146 * complemented 147 */ 148 public static Sequence reverseComplement(Sequence seq) 149 throws IllegalAlphabetException { 150 return new RevCompSequence(seq); 151 } 152 153 /** 154 * Create a new sequence that has all of the data in the original, but allows 155 * new features and top-level annotations to be added independantly. Use this 156 * as a scratch-space. 157 * 158 * @param seq the Sequence to view 159 * @return a new ViewSequence 160 */ 161 public static ViewSequence view(Sequence seq) { 162 return new ViewSequence(seq); 163 } 164 165 /** 166 * Create a new sequence that has all of the data in the original, but allows 167 * new features and top-level annotations to be added independantly. Use this 168 * as a scratch-space. 169 * 170 * @param seq the Sequence to view 171 * @param name a new name for the sequence 172 * @return a new ViewSequence with the new name 173 */ 174 public static ViewSequence view(Sequence seq, String name) { 175 return new ViewSequence(seq, name); 176 } 177 178 /** 179 * Creates a new Sequence with the data of the old but with a different 180 * FeatureRealizer that will be applied to new Features. 181 * 182 * @param seq the Sequence to wrap 183 * @param fr the new FeatureRealizer 184 * @return the new ViewSequence 185 */ 186 public static ViewSequence view(Sequence seq, FeatureRealizer fr){ 187 return new ViewSequence(seq, fr); 188 } 189 190 /** 191 * Create a new gapped sequence for a sequence. 192 * 193 * <p> 194 * The gapped sequence can be used to insert gaps. The features on the 195 * underlying sequence will be projected onto the view taking the gaps into 196 * account. 197 * </p> 198 * 199 * @param seq 200 * @return a GappedSequence view of seq 201 */ 202 public static GappedSequence gappedView(Sequence seq) { 203 return new SimpleGappedSequence(seq); 204 } 205 206 /** 207 * Mask of a sequence. 208 * 209 * <P> 210 * This will return a view of a sequence where everything outside loc is 211 * dropped. This includes all symbols, which become gaps, and all features, 212 * which behave in a similar manner to those produced by subSequence(). 213 * </p> 214 * 215 * @param seq the Sequence to mask 216 * @param loc the region to retain 217 * @return a Sequence viewing just the retained portion of seq 218 * @throws IndexOutOfBoundsException if loc is not totaly within seq 219 * @throws IllegalArgumentException fixme: not sure where this comes from 220 */ 221 public static Sequence maskSequence(Sequence seq, RangeLocation loc) 222 throws IndexOutOfBoundsException, IllegalArgumentException { 223 GappedSequence gSeq = gappedView(subSequence( 224 seq, 225 loc.getMin(), 226 loc.getMax(), 227 seq.getName() + ":" + loc.toString())); 228 gSeq.addGapsInSource(1, loc.getMin()); 229 gSeq.addGapsInSource(seq.length(), gSeq.length() - gSeq.length()); 230 231 return gSeq; 232 } 233 234 /** 235 * Create a new Sequence that has no annotation, no features and a zero-length 236 * symbol list. 237 * 238 * Instantiate this if an API requres a sequence, but you can't be bothered 239 * or are not able to provide full sequence information. 240 * 241 * It is sometimes usefull to create a dummy sequence and then wrap this in 242 * a view. 243 * 244 * @param uri the URI to give the dummy sequence 245 * @param name the name of the dummy sequence 246 * @return a dummy Sequence 247 */ 248 public static Sequence createDummy(String uri, String name) { 249 return new DummySequence(uri, name); 250 } 251 252 /** 253 * Create a new Sequence that contains a single symbol repeated over and over. 254 * 255 * @param alpha the Alphabet this sequence is over 256 * @param length the length of the sequence 257 * @param sym the symbol returned by every call to symbolAt 258 * @param uri the URI of the sequence 259 * @param name the name of the sequence 260 * @return a new sequence of the right length 261 * @throws IllegalSymbolException if sym is not in alpha 262 * 263 * @since 1.4 264 */ 265 public static Sequence createDummy( 266 Alphabet alpha, int length, Symbol sym, 267 String uri, String name) 268 throws IllegalSymbolException 269 { 270 return createSequence(new DummySymbolList(alpha, length, sym), 271 uri, name, new SmallAnnotation()); 272 } 273 274 /** 275 * Add features to a sequence that contain the same information as all 276 * those in a feature holder. 277 * 278 * @param seq the Sequence to add features to 279 * @param fh the features to add 280 * @throws ChangeVetoException if the sequence could not be modified 281 * @throws BioException if there was an error creating the features 282 */ 283 public static void addAllFeatures(Sequence seq, FeatureHolder fh) 284 throws 285 ChangeVetoException, 286 BioException 287 { 288 addFeatures(seq, fh); 289 } 290 291 private static void addFeatures(FeatureHolder toAddTo, FeatureHolder thingsToAdd) 292 throws 293 ChangeVetoException, 294 BioException 295 { 296 for(Iterator i = thingsToAdd.features(); i.hasNext(); ) { 297 Feature f2add = (Feature) i.next(); 298 Feature added = toAddTo.createFeature(f2add.makeTemplate()); 299 addFeatures(added, f2add); 300 } 301 } 302}