001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq;
023
024import java.util.Iterator;
025
026import org.biojava.bio.Annotation;
027import org.biojava.bio.BioException;
028import org.biojava.bio.SmallAnnotation;
029import org.biojava.bio.seq.impl.DummySequence;
030import org.biojava.bio.seq.impl.RevCompSequence;
031import org.biojava.bio.seq.impl.SimpleGappedSequence;
032import org.biojava.bio.seq.impl.SimpleSequence;
033import org.biojava.bio.seq.impl.SubSequence;
034import org.biojava.bio.seq.impl.ViewSequence;
035import org.biojava.bio.symbol.Alphabet;
036import org.biojava.bio.symbol.DummySymbolList;
037import org.biojava.bio.symbol.IllegalAlphabetException;
038import org.biojava.bio.symbol.IllegalSymbolException;
039import org.biojava.bio.symbol.RangeLocation;
040import org.biojava.bio.symbol.Symbol;
041import org.biojava.bio.symbol.SymbolList;
042import org.biojava.utils.ChangeVetoException;
043
044/**
045 * Methods for manipulating sequences.
046 *
047 * @author Matthew Pocock
048 */
049public final class SequenceTools {
050  private SequenceTools() {
051  }
052    
053  public static Sequence createSequence(
054    SymbolList syms, String uri, String name, Annotation ann
055  ) {
056    return new SimpleSequence(syms, uri, name, ann);
057  }
058
059  /**
060   * Extract a sub-sequence from a sequence.
061   *
062   * <p>
063   * The sub-sequence will be indexed from 1 through to (end-start+1). An index
064   * of i in the sub-sequence corresponds to (i+start-1) in the original.
065   * All features from the original sequence will be projected down into this
066   * co-ordinate system. All features overlapping the edges will be given fuzzy
067   * locations.
068   * </p>
069   *
070   * @param seq   the sequence to sub-sequence
071   * @param start the first index to include in the sub-sequence
072   * @param end   the last index to include in the sub-sequence
073   * @return a view Sequence for this region
074   * @throws IndexOutOfBoundsException if start or end are not in seq, or if
075   *     end < start
076   */
077  public static Sequence subSequence(Sequence seq, int start, int end)
078  throws IndexOutOfBoundsException {
079    return new SubSequence(seq, start, end);
080  }
081
082  /**
083   * Extract a sub-sequence from a sequence.
084   *
085   * <p>
086   * The sub-sequence will be indexed from 1 through to (end-start+1). An index
087   * of i in the sub-sequence corresponds to (i+start-1) in the original.
088   * All features from the original sequence will be projected down into this
089   * co-ordinate system. All features overlapping the edges will be given fuzzy
090   * locations.
091   * </p>
092   *
093   * @param seq   the sequence to sub-sequence
094   * @param start the first index to include in the sub-sequence
095   * @param end   the last index to include in the sub-sequence
096   * @param name  a new name to give to this sub-sequence
097   * @return a view Sequence for this region
098   * @throws IndexOutOfBoundsException if start or end are not in seq, or if
099   *     end < start
100   */
101  public static Sequence subSequence(Sequence seq, int start, int end, String name)
102  throws IndexOutOfBoundsException {
103    return new SubSequence(seq, start, end, name);
104  }
105
106  /**
107   * Extract a sub-sequence from a sequence.
108   *
109   * <p>
110   * The sub-sequence will be indexed from 1 through to (end-start+1). If the
111   * strand is NEGATIVE, all features will be flipped in the same manner as
112   * the reverseComplement method. If it is UNKNOWN or
113   * POSITIVE, then this is identical to the other subSequence methods.
114   * </p>
115   *
116   * @param seq   the sequence to sub-sequence
117   * @param start the first index to include in the sub-sequence
118   * @param end   the last index to include in the sub-sequence
119   * @param name  a new name to give to this sub-sequence
120   * @param strand a StrandedFeature.Strand indicating which strand the
121   *    sub-sequence should be on
122   * @return a view Sequence for this region
123   * @throws IndexOutOfBoundsException if start or end are not in seq, or if
124   *     end < start
125   */
126  public static Sequence subSequence(
127    Sequence seq,
128    int start,
129    int end,
130    String name,
131    StrandedFeature.Strand strand
132  ) throws IndexOutOfBoundsException, IllegalAlphabetException {
133    Sequence s = subSequence(seq, start, end, name);
134    if(strand == StrandedFeature.NEGATIVE) {
135      s = reverseComplement(s);
136    }
137    return s;
138  }
139
140  /**
141   * Reverse-complement a sequence, and flip all of its features.
142   *
143   * @param seq  the Sequence to reverse-complement
144   * @return  the flipped Sequence
145   * @throws IllegalAlphabetException  if the symbols in the sequence can not be
146   *     complemented
147   */
148  public static Sequence reverseComplement(Sequence seq)
149  throws IllegalAlphabetException {
150    return new RevCompSequence(seq);
151  }
152
153  /**
154   * Create a new sequence that has all of the data in the original, but allows
155   * new features and top-level annotations to be added independantly. Use this
156   * as a scratch-space.
157   *
158   * @param seq  the Sequence to view
159   * @return a new ViewSequence
160   */
161  public static ViewSequence view(Sequence seq) {
162    return new ViewSequence(seq);
163  }
164
165  /**
166   * Create a new sequence that has all of the data in the original, but allows
167   * new features and top-level annotations to be added independantly. Use this
168   * as a scratch-space.
169   *
170   * @param seq  the Sequence to view
171   * @param name a new name for the sequence
172   * @return a new ViewSequence with the new name
173   */
174  public static ViewSequence view(Sequence seq, String name) {
175    return new ViewSequence(seq, name);
176  }
177
178  /**
179   * Creates a new Sequence with the data of the old but with a different
180   * FeatureRealizer that will be applied to new Features.
181   *
182   * @param seq the Sequence to wrap
183   * @param fr the new FeatureRealizer
184   * @return the new ViewSequence
185   */
186  public static ViewSequence view(Sequence seq, FeatureRealizer fr){
187    return new ViewSequence(seq, fr);
188  }
189
190  /**
191   * Create a new gapped sequence for a sequence.
192   *
193   * <p>
194   * The gapped sequence can be used to insert gaps. The features on the
195   * underlying sequence will be projected onto the view taking the gaps into
196   * account.
197   * </p>
198   *
199   * @param seq
200   * @return a GappedSequence view of seq
201   */
202  public static GappedSequence gappedView(Sequence seq) {
203    return new SimpleGappedSequence(seq);
204  }
205
206  /**
207   * Mask of a sequence.
208   *
209   * <P>
210   * This will return a view of a sequence where everything outside loc is
211   * dropped. This includes all symbols, which become gaps, and all features,
212   * which behave in a similar manner to those produced by subSequence().
213   * </p>
214   *
215   * @param seq  the Sequence to mask
216   * @param loc  the region to retain
217   * @return  a Sequence viewing just the retained portion of seq
218   * @throws IndexOutOfBoundsException  if loc is not totaly within seq
219   * @throws IllegalArgumentException  fixme: not sure where this comes from
220   */
221  public static Sequence maskSequence(Sequence seq, RangeLocation loc)
222  throws IndexOutOfBoundsException, IllegalArgumentException {
223    GappedSequence gSeq = gappedView(subSequence(
224            seq,
225            loc.getMin(),
226            loc.getMax(),
227            seq.getName() + ":" + loc.toString()));
228    gSeq.addGapsInSource(1, loc.getMin());
229    gSeq.addGapsInSource(seq.length(), gSeq.length() - gSeq.length());
230
231    return gSeq;
232  }
233
234  /**
235   * Create a new Sequence that has no annotation, no features and a zero-length
236   * symbol list.
237   *
238   * Instantiate this if an API requres a sequence, but you can't be bothered
239   * or are not able to provide full sequence information.
240   * 
241   * It is sometimes usefull to create a dummy sequence and then wrap this in
242   * a view.
243   *
244   * @param uri  the URI to give the dummy sequence
245   * @param name the name of the dummy sequence
246   * @return a dummy Sequence
247   */
248  public static Sequence createDummy(String uri, String name) {
249    return new DummySequence(uri, name);
250  }
251
252  /**
253   * Create a new Sequence that contains a single symbol repeated over and over.
254   *
255   * @param alpha   the Alphabet this sequence is over
256   * @param length  the length of the sequence
257   * @param sym     the symbol returned by every call to symbolAt
258   * @param uri     the URI of the sequence
259   * @param name    the name of the sequence
260   * @return  a new sequence of the right length
261   * @throws IllegalSymbolException if sym is not in alpha
262   *
263   * @since 1.4
264   */
265  public static Sequence createDummy(
266          Alphabet alpha, int length, Symbol sym,
267          String uri, String name)
268          throws IllegalSymbolException
269  {
270    return createSequence(new DummySymbolList(alpha, length, sym),
271                          uri, name, new SmallAnnotation());
272  }
273
274  /**
275   * Add features to a sequence that contain the same information as all
276   * those in a feature holder.
277   *
278   * @param seq  the Sequence to add features to
279   * @param fh  the features to add
280   * @throws ChangeVetoException if the sequence could not be modified
281   * @throws BioException if there was an error creating the features
282   */
283  public static void addAllFeatures(Sequence seq, FeatureHolder fh)
284  throws
285    ChangeVetoException,
286    BioException
287  {
288    addFeatures(seq, fh);
289  }
290
291  private static void addFeatures(FeatureHolder toAddTo, FeatureHolder thingsToAdd)
292  throws
293    ChangeVetoException,
294    BioException
295  {
296    for(Iterator i = thingsToAdd.features(); i.hasNext(); ) {
297      Feature f2add = (Feature) i.next();
298      Feature added = toAddTo.createFeature(f2add.makeTemplate());
299      addFeatures(added, f2add);
300    }
301  }
302}