001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojavax.bio.seq;
023
024import java.io.BufferedInputStream;
025import java.io.BufferedReader;
026import java.io.File;
027import java.io.FileReader;
028import java.io.IOException;
029import java.io.InputStreamReader;
030import java.io.OutputStream;
031import java.util.ArrayList;
032import java.util.Iterator;
033import java.util.List;
034import java.util.Set;
035
036import java.util.TreeSet;
037import org.biojava.bio.BioError;
038import org.biojava.bio.BioException;
039import org.biojava.bio.seq.DNATools;
040import org.biojava.bio.seq.Feature;
041import org.biojava.bio.seq.NucleotideTools;
042import org.biojava.bio.seq.ProteinTools;
043import org.biojava.bio.seq.RNATools;
044import org.biojava.bio.seq.Sequence;
045import org.biojava.bio.seq.SequenceIterator;
046import org.biojava.bio.seq.StrandedFeature;
047import org.biojava.bio.seq.io.SymbolTokenization;
048import org.biojava.bio.symbol.Alphabet;
049import org.biojava.bio.symbol.AlphabetManager;
050import org.biojava.bio.symbol.SimpleSymbolList;
051import org.biojava.bio.symbol.SymbolList;
052import org.biojava.utils.ChangeType;
053import org.biojava.utils.ChangeVetoException;
054import org.biojavax.Namespace;
055import org.biojavax.Note;
056import org.biojavax.RichObjectFactory;
057import org.biojavax.SimpleNamespace;
058import org.biojavax.SimpleNote;
059import org.biojavax.bio.BioEntry;
060import org.biojavax.bio.seq.io.EMBLFormat;
061import org.biojavax.bio.seq.io.EMBLxmlFormat;
062import org.biojavax.bio.seq.io.FastaFormat;
063import org.biojavax.bio.seq.io.FastaHeader;
064import org.biojavax.bio.seq.io.GenbankFormat;
065import org.biojavax.bio.seq.io.HashedFastaIterator;
066import org.biojavax.bio.seq.io.INSDseqFormat;
067import org.biojavax.bio.seq.io.RichSequenceBuilderFactory;
068import org.biojavax.bio.seq.io.RichSequenceFormat;
069import org.biojavax.bio.seq.io.RichStreamReader;
070import org.biojavax.bio.seq.io.RichStreamWriter;
071import org.biojavax.bio.seq.io.UniProtFormat;
072import org.biojavax.bio.seq.io.UniProtXMLFormat;
073import org.biojavax.ontology.ComparableTerm;
074
075/**
076 * A rich sequence is a combination of a org.biojavax.bio.Bioentry and a
077 * Sequence. It inherits and merges the methods of both. The RichSequence is
078 * based on the BioSQL model and provides a richer array of methods to access
079 * information than Sequence does. Whenever possible RichSequence should be used
080 * in preference to Sequence.
081 * 
082 * @author Mark Schreiber
083 * @author Richard Holland
084 * @author George Waldon
085 * @since 1.5
086 */
087public interface RichSequence extends BioEntry, Sequence {
088
089        public static final ChangeType SYMLISTVERSION = new ChangeType(
090                        "This sequences's symbollist version has changed",
091                        "org.biojavax.bio.seq.RichSequence", "SYMLISTVERSION");
092
093        public static final ChangeType CIRCULAR = new ChangeType(
094                        "This sequences's circularity has changed",
095                        "org.biojavax.bio.seq.RichSequence", "CIRCULAR");
096
097        /**
098         * The version of the associated symbol list. Note the use of an object for
099         * the value means that it can be nulled.
100         * 
101         * @return the version
102         */
103        public Double getSeqVersion();
104
105        /**
106         * Sets the version of the associated symbol list. Note the use of an object
107         * for the value means that it can be nulled.
108         * 
109         * @param seqVersion
110         *            the version to set.
111         * @throws ChangeVetoException
112         *             if it doesn't want to change.
113         */
114        public void setSeqVersion(Double seqVersion) throws ChangeVetoException;
115
116        /**
117         * The features for this sequence.
118         * 
119         * @return a set of RichFeature objects.
120         */
121        public Set<Feature> getFeatureSet();
122
123        /**
124         * Sets the features of this sequence. Note that it is not checked to see if
125         * the features actually belong to this sequence, you'd best check that
126         * yourself and make changes using feature.setParent() if necessary.
127         * 
128         * @param features
129         *            the features to assign to this sequence, replacing all others.
130         *            Must be a set of RichFeature objects.
131         * @throws ChangeVetoException
132         *             if they could not be assigned.
133         */
134        public void setFeatureSet(Set<Feature> features) throws ChangeVetoException;
135
136        /**
137         * Circularises the <code>Sequence</code>. The circular length can then be
138         * said to be the length of the sequence itself.
139         * 
140         * @param circular
141         *            set to true if you want it to be circular
142         * @throws ChangeVetoException
143         *             if the change is blocked. Some implementations may choose not
144         *             to support circularisation and should throw an exception
145         *             here. Some implementations may only support this method for
146         *             certain Alphabets.
147         */
148        public void setCircular(boolean circular) throws ChangeVetoException;
149
150        /**
151         * Is the sequence circular? Circularity has implications for work with
152         * locations and any coordinate work eg symbolAt(int i). Classes that allow
153         * it should test this method when working with coordinates or locations /
154         * features.
155         * 
156         * @return true if the this is circular else false.
157         */
158        public boolean getCircular();
159
160        /**
161         * A special function that returns the SymbolList that this RichSequence is
162         * based around. This should _not_ be the RichSequence object itself, as
163         * this function is used to perform actions on the symbol list without
164         * referring to the RichSequence object directly.
165         * 
166         * @return the internal SymbolList of the RichSequence, NOT the RichSequence
167         *         object itself.
168         */
169        public SymbolList getInternalSymbolList();
170
171        /**
172         * Stores a number of useful terms used across many sequence formats for
173         * consistency's sake.
174         */
175        public static class Terms {
176                public static String SPECIES_KEY = "SPECIES";
177                public static String STRAIN_KEY = "STRAIN";
178                public static String TISSUE_KEY = "TISSUE";
179                public static String TRANSPOSON_KEY = "TRANSPOSON";
180                public static String PLASMID_KEY = "PLASMID";
181
182                /**
183                 * Holds a reference to the key that must be used to store PubMed
184                 * references.
185                 */
186                public static final String PUBMED_KEY = "PUBMED";
187
188                /**
189                 * Holds a reference to the key that must be used to store Medline
190                 * references.
191                 */
192                public static final String MEDLINE_KEY = "MEDLINE";
193
194                /**
195                 * Holds a reference to the key that must be used to store DOI
196                 * references.
197                 */
198                public static final String DOI_KEY = "DOI";
199
200                /**
201                 * Getter for the secondary/tertiary/additional accession term
202                 * 
203                 * @return A Term that represents the secondary accession tag
204                 */
205                public static ComparableTerm getAdditionalAccessionTerm() {
206                        return RichObjectFactory.getDefaultOntology()
207                                        .getOrCreateTerm("acc");
208                }
209
210                /**
211                 * Getter for the keyword term
212                 * 
213                 * @return a Term that represents the Keyword tag
214                 */
215                public static ComparableTerm getKeywordTerm() {
216                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm("kw");
217                }
218
219                /**
220                 * Getter for the date created term
221                 * 
222                 * @return a Term
223                 */
224                public static ComparableTerm getDateCreatedTerm() {
225                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
226                                        "cdat");
227                }
228
229                /**
230                 * Getter for the date updated term
231                 * 
232                 * @return a Term
233                 */
234                public static ComparableTerm getDateUpdatedTerm() {
235                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
236                                        "udat");
237                }
238
239                /**
240                 * Getter for the date annotated term
241                 * 
242                 * @return a Term
243                 */
244                public static ComparableTerm getDateAnnotatedTerm() {
245                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
246                                        "adat");
247                }
248
249                /**
250                 * Getter for the release created term
251                 * 
252                 * @return a Term
253                 */
254                public static ComparableTerm getRelCreatedTerm() {
255                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
256                                        "crel");
257                }
258
259                /**
260                 * Getter for the release updated term
261                 * 
262                 * @return a Term
263                 */
264                public static ComparableTerm getRelUpdatedTerm() {
265                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
266                                        "urel");
267                }
268
269                /**
270                 * Getter for the release annotated term
271                 * 
272                 * @return a Term
273                 */
274                public static ComparableTerm getRelAnnotatedTerm() {
275                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
276                                        "arel");
277                }
278
279                /**
280                 * getter for the MolType term
281                 * 
282                 * @return a Term that represents the molecule type
283                 */
284                public static ComparableTerm getMolTypeTerm() {
285                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
286                                        "moltype");
287                }
288
289                /**
290                 * Getter for the Strand term; legal values are "single", "double", and
291                 * "mixed".
292                 * 
293                 * @return a Term that represents the Strand tag
294                 */
295                public static ComparableTerm getStrandedTerm() {
296                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
297                                        "stranded");
298                }
299
300                /**
301                 * Getter for the Organelle term
302                 * 
303                 * @return a Term that represents the Organelle tag
304                 */
305                public static ComparableTerm getOrganelleTerm() {
306                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
307                                        "organelle");
308                }
309
310                /**
311                 * Getter for the GeneName term
312                 * 
313                 * @return The GeneName Term
314                 */
315                public static ComparableTerm getGeneNameTerm() {
316                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
317                                        "gene_name");
318                }
319
320                /**
321                 * Getter for the GeneSynonym term
322                 * 
323                 * @return The GeneSynonym Term
324                 */
325                public static ComparableTerm getGeneSynonymTerm() {
326                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
327                                        "gene_synonym");
328                }
329
330                /**
331                 * Getter for the OrderedLocusName term
332                 * 
333                 * @return The OrderedLocusName Term
334                 */
335                public static ComparableTerm getOrderedLocusNameTerm() {
336                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
337                                        "gene_ordloc");
338                }
339
340                /**
341                 * Getter for the ORFName term
342                 * 
343                 * @return The ORFName Term
344                 */
345                public static ComparableTerm getORFNameTerm() {
346                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
347                                        "gene_orf");
348                }
349
350                /**
351                 * Getter for the Strain term
352                 * 
353                 * @return The Strain Term
354                 */
355                public static ComparableTerm getStrainTerm() {
356                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
357                                        "strain");
358                }
359
360                /**
361                 * Getter for the Species term
362                 * 
363                 * @return The Species Term
364                 */
365                public static ComparableTerm getSpeciesTerm() {
366                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
367                                        "species");
368                }
369
370                /**
371                 * Getter for the Tissue term
372                 * 
373                 * @return The Tissue Term
374                 */
375                public static ComparableTerm getTissueTerm() {
376                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
377                                        "tissue");
378                }
379
380                /**
381                 * Getter for the Transposon term
382                 * 
383                 * @return The Transposon Term
384                 */
385                public static ComparableTerm getTransposonTerm() {
386                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
387                                        "transposon");
388                }
389
390                /**
391                 * Getter for the Plasmid term
392                 * 
393                 * @return The plasmid Term
394                 */
395                public static ComparableTerm getPlasmidTerm() {
396                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
397                                        "plasmid");
398                }
399
400                /**
401                 * Getter for the DataClass term
402                 * 
403                 * @return The DataClass Term
404                 */
405                public static ComparableTerm getDataClassTerm() {
406                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
407                                        "dataclass");
408                }
409
410                /**
411                 * Getter for the FTId term
412                 * 
413                 * @return The FTId Term
414                 */
415                public static ComparableTerm getFTIdTerm() {
416                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
417                                        "feature_id");
418                }
419
420                /**
421                 * Getter for the FeatureDesc term
422                 * 
423                 * @return The FeatureDesc Term
424                 */
425                public static ComparableTerm getFeatureDescTerm() {
426                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
427                                        "feature_desc");
428                }
429
430                /**
431                 * Getter for the copyright term
432                 * 
433                 * @return The copyright Term
434                 */
435                public static ComparableTerm getCopyrightTerm() {
436                        return RichObjectFactory.getDefaultOntology().getOrCreateTerm(
437                                        "copyright");
438                }
439        }
440
441        /**
442         * Some useful tools for working with RichSequence objects.
443         * 
444         * @since 1.5
445         */
446        public static class Tools {
447
448                // because we are static we don't want any instances
449                private Tools() {
450                }
451
452                /**
453                 * Create a new RichSequence in the default namespace.
454                 * 
455                 * @param name
456                 *            The name for the sequence. Will also be used for the
457                 *            accession.
458                 * @param seqString
459                 *            The sequence string
460                 * @param alpha
461                 *            The <CODE>Alphabet</CODE> for the sequence
462                 * @throws org.biojava.bio.BioException
463                 *             If the symbols in <CODE>seqString</CODE> are not valid in
464                 *             <CODE>alpha</CODE>
465                 * @return A new <CODE>RichSequence</CODE>. All versions are 1 or 1.0
466                 */
467                public static RichSequence createRichSequence(String name,
468                                String seqString, Alphabet alpha) throws BioException {
469                        SymbolList syms = new SimpleSymbolList(alpha
470                                        .getTokenization("token"), seqString);
471                        return createRichSequence(name, syms);
472                }
473
474                /**
475                 * Create a new RichSequence in the specified namespace.
476                 * 
477                 * @return A new <CODE>RichSequence</CODE>. All versions are 1 or 1.0
478                 * @param namespace
479                 *            the namespace to create the sequence in. A singleton
480                 *            <CODE>Namespace</CODE> will be created or retrieved as
481                 *            appropriate.
482                 * @param name
483                 *            The name for the sequence. Will also be used for the
484                 *            accession.
485                 * @param seqString
486                 *            The sequence string
487                 * @param alpha
488                 *            The <CODE>Alphabet</CODE> for the sequence
489                 * @throws org.biojava.bio.BioException
490                 *             If the symbols in <CODE>seqString</CODE> are not valid in
491                 *             <CODE>alpha</CODE>
492                 */
493                public static RichSequence createRichSequence(String namespace,
494                                String name, String seqString, Alphabet alpha)
495                                throws BioException {
496                        SymbolList syms = new SimpleSymbolList(alpha
497                                        .getTokenization("token"), seqString);
498                        Namespace ns = (Namespace) RichObjectFactory.getObject(
499                                        SimpleNamespace.class, new Object[] { namespace });
500                        return createRichSequence(ns, name, syms);
501                }
502
503                /**
504                 * Create a new RichSequence in the specified namespace.
505                 * 
506                 * @return A new <CODE>RichSequence</CODE>. All versions are 1 or 1.0
507                 * @param ns
508                 *            The namespace to create the sequence in.
509                 * @param name
510                 *            The name for the sequence. Will also be used for the
511                 *            accession.
512                 * @param seqString
513                 *            The sequence string
514                 * @param alpha
515                 *            The <CODE>Alphabet</CODE> for the sequence
516                 * @throws org.biojava.bio.BioException
517                 *             If the symbols in <CODE>seqString</CODE> are not valid in
518                 *             <CODE>alpha</CODE>
519                 */
520                public static RichSequence createRichSequence(Namespace ns,
521                                String name, String seqString, Alphabet alpha)
522                                throws BioException {
523                        SymbolList syms = new SimpleSymbolList(alpha
524                                        .getTokenization("token"), seqString);
525                        return createRichSequence(ns, name, syms);
526                }
527
528                /**
529                 * Create a new RichSequence in the default namespace.
530                 * 
531                 * @return A new <CODE>RichSequence</CODE>. All versions are 1 or 1.0
532                 * @param syms
533                 *            The symbols to add to the sequence.
534                 * @param name
535                 *            The name for the sequence. Will also be used for the
536                 *            accession.
537                 */
538                public static RichSequence createRichSequence(String name,
539                                SymbolList syms) {
540                        Namespace ns = RichObjectFactory.getDefaultNamespace();
541                        return createRichSequence(ns, name, syms);
542                }
543
544                /**
545                 * Create a new RichSequence in the specified namespace.
546                 * 
547                 * @return A new <CODE>RichSequence</CODE>. All versions are 1 or 1.0
548                 * @param ns
549                 *            the namespace to create the sequence in.
550                 * @param syms
551                 *            The symbols to add to the sequence.
552                 * @param name
553                 *            The name for the sequence. Will also be used for the
554                 *            accession.
555                 */
556                public static RichSequence createRichSequence(Namespace ns,
557                                String name, SymbolList syms) {
558                        return new SimpleRichSequence(ns, name, name, 1, syms, new Double(
559                                        1.0));
560                }
561
562                /**
563                 * Boldly attempts to convert a <CODE>Sequence</CODE> into a
564                 * <CODE>RichSequence</CODE>. <CODE>Sequence</CODE>s will be assigned to
565                 * the default namespace. The accession will be assumed to be the name
566                 * of the old sequence. The version of the sequence will be set to 0 and
567                 * the seqversion set to 0.0. <CODE>Feature</CODE>s are converted to
568                 * <CODE>RichFeature</CODE>s. The old <CODE>Annotation</CODE> bundle is
569                 * converted to a <CODE>RichAnnotation</CODE>
570                 * 
571                 * @param s
572                 *            The <CODE>Sequence</CODE> to enrich
573                 * @throws ChangeVetoException
574                 *             if <CODE>s</CODE> is locked or the conversion fails.
575                 * @return a new <CODE>RichSequence</CODE>
576                 */
577                public static RichSequence enrich(Sequence s)
578                                throws ChangeVetoException {
579                        if (s instanceof RichSequence)
580                                return (RichSequence) s;
581                        String name = s.getName();
582                        RichSequence rs = new SimpleRichSequence(RichObjectFactory
583                                        .getDefaultNamespace(),
584                                        name == null ? "UnknownName" : name,
585                                        name == null ? "UnknownAccession" : name, 0, s, new Double(
586                                                        0.0));
587                        // Transfer features
588                        for (Iterator i = s.features(); i.hasNext();) {
589                                Feature f = (Feature) i.next();
590                                try {
591                                        rs.createFeature(f.makeTemplate());
592                                } catch (BioException e) {
593                                        throw new ChangeVetoException("They hates us!", e);
594                                }
595                        }
596                        // Transfer annotations
597                        for (Iterator<Object> i = s.getAnnotation().keys().iterator(); i.hasNext();) {
598                                Object key = i.next();
599                                Object value = s.getAnnotation().getProperty(key);
600                                rs.getAnnotation().setProperty(key, value);
601                        }
602                        return rs;
603                }
604
605        /**
606         * <p>
607         * Creates a new sequence from a subregion of another sequence. The
608         * sequence is not a view. The sequence can be given a new Namespace,
609         * Accession, Name, Identifier etc. or you can copy over the old values.
610         * For unique identification in databases we recommend you change at
611         * least the name and identifier.
612         * </p>
613         * <p>
614         * The new sequence will retain all features that are fully contained by
615         * the new subsequence, the note set (annotation), Taxon, and
616         * description, modified to reflect the subsequence as follows:
617         *
618         * <pre>
619         * seq.setDescription(&quot;subsequence (&quot; + from + &quot;:&quot; + to + &quot;) of &quot;
620         *              + s.getDescription());
621         * </pre>
622         *
623         * No other properties are copied.
624         *
625         * @param newVersion
626         *            the new version number
627         * @param seqVersion
628         *            the new sequence version
629         * @param s
630         *            the original <code>RichSequence</code>.
631         * @param from
632         *            the 1st subsequence coordinate (inclusive)
633         * @param to
634         *            the last subsequence coordinate (inclusive)
635         * @param newNamespace
636         *            the new <code>Namespace</code>
637         * @param newName
638         *            the new name
639         * @param newAccession
640         *            the new accession number
641         * @param newIdentifier
642         *            the new identifier
643         * @throws java.lang.IndexOutOfBoundsException
644         *             if <CODE>from</CODE> or <CODE>to</CODE> lie outside of
645         *             the bounds of <CODE>s</CODE>.
646         * @return A new <CODE>RichSequence</CODE>
647         */
648        public static RichSequence subSequence(RichSequence s, int from,
649                int to, Namespace newNamespace, String newName,
650                String newAccession, String newIdentifier, int newVersion,
651                Double seqVersion) throws IndexOutOfBoundsException {
652            SymbolList symList = s.subList(from, to);
653            SimpleRichSequence seq = new SimpleRichSequence(newNamespace,
654                    newName, newAccession, newVersion, symList, seqVersion);
655            RichLocation subLoc = new SimpleRichLocation(new SimplePosition(
656                    from), new SimplePosition(to), 0);
657            RichLocation subLocComplement = new SimpleRichLocation(
658                    new SimplePosition(from), new SimplePosition(to), 0,
659                    RichLocation.Strand.NEGATIVE_STRAND);
660            try {
661                // copy features if appropriate
662                for (Iterator<Feature> i = s.features(); i.hasNext();) {
663                    RichFeature f = (RichFeature) i.next();
664
665                    if (f.getStrand().equals(StrandedFeature.POSITIVE)) {
666                        if (subLoc.contains(f.getLocation())) {
667                            RichFeature.Template templ = (RichFeature.Template) f.makeTemplate();
668
669                            // change the location
670                            Position min = new SimplePosition(templ.location.getMin()
671                                    - from + 1);
672
673                            // System.out.println("getMin " +
674                            // templ.location.getMin());
675
676                            Position max = new SimplePosition(templ.location.getMax()
677                                    - from + 1);
678
679                            // System.out.println("getMax " +
680                            // templ.location.getMax());
681
682                            templ.location = new SimpleRichLocation(min, max, 0);
683                            seq.createFeature(templ);
684                        }
685                    } else {
686                        if (subLocComplement.contains(f.getLocation())) {
687                            RichFeature.Template templ = (RichFeature.Template) f.makeTemplate();
688
689                            // change the location
690                            Position min = new SimplePosition(templ.location.getMin()
691                                    - from + 1);
692
693                            // System.out.println("getMin " +
694                            // templ.location.getMin());
695
696                            Position max = new SimplePosition(templ.location.getMax()
697                                    - from + 1);
698
699                            // System.out.println("getMax " +
700                            // templ.location.getMax());
701
702                            templ.location = new SimpleRichLocation(min, max,
703                                    0, RichLocation.Strand.NEGATIVE_STRAND);
704                            seq.createFeature(templ);
705                        }
706                    }
707
708                }
709
710                // clone Notes
711                if (s.getNoteSet() != null) {
712                    Set<Note> notes = s.getNoteSet();
713                    Iterator<Note> it = notes.iterator();
714                    Set ns = new TreeSet();
715                    while (it.hasNext()) {
716                        Note note = it.next();
717                        ns.add(new SimpleNote(
718                                note.getTerm(),
719                                note.getValue(),
720                                note.getRank()));
721                    }
722                    seq.setNoteSet(ns);
723                }
724
725                // copy other cruft
726                if (s.getTaxon() != null) {
727                    seq.setTaxon(s.getTaxon());
728                }
729                if (s.getDescription() != null) {
730                    seq.setDescription("subsequence (" + from + ":" + to
731                            + ") of " + s.getDescription());
732                }
733                if (s.getDivision() != null) {
734                    seq.setDivision(s.getDivision());
735                }
736            } catch (ChangeVetoException ex) {
737                throw new BioError(ex); // something is rotten in Denmark!
738            } catch (BioException ex) {
739                throw new BioError(ex); // something is rotten in Denmark!
740            }
741            return seq;
742        }
743    }
744
745        /**
746         * A set of convenience methods for handling common file formats.
747         * 
748         * @author Mark Schreiber
749         * @author Richard Holland
750         * @since 1.5
751         */
752        public final class IOTools {
753
754                private static RichSequenceBuilderFactory factory = RichSequenceBuilderFactory.FACTORY;
755
756                // This can't be instantiated.
757                private IOTools() {
758                }
759
760                /**
761                 * Register a new format with IOTools for auto-guessing.
762                 * 
763                 * @param formatClass
764                 *            the <code>RichSequenceFormat</code> object to register.
765                 */
766                public static void registerFormat(Class formatClass) {
767                        Object o;
768                        try {
769                                o = formatClass.newInstance();
770                        } catch (Exception e) {
771                                throw new BioError(e);
772                        }
773                        if (!(o instanceof RichSequenceFormat))
774                                throw new BioError("Class " + formatClass
775                                                + " is not an implementation of RichSequenceFormat!");
776                        formatClasses.add(formatClass);
777                }
778
779                // Private reference to the formats we know about.
780                private static List<Class> formatClasses = new ArrayList<Class>();
781
782                /**
783                 * Guess which format a stream is then attempt to read it.
784                 * 
785                 * @param stream
786                 *            the <code>BufferedInputStream</code> to attempt to read.
787                 * @param seqFactory
788                 *            a factory used to build a <code>RichSequence</code>
789                 * @param ns
790                 *            a <code>Namespace</code> to load the sequences into. Null
791                 *            implies that it should use the namespace specified in the
792                 *            file. If no namespace is specified in the file, then
793                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
794                 *            used.
795                 * @return a <code>RichSequenceIterator</code> over each sequence in the
796                 *         file
797                 * @throws IOException
798                 *             in case the stream is unrecognisable or problems occur in
799                 *             reading it.
800                 */
801                public static RichSequenceIterator readStream(
802                                BufferedInputStream stream,
803                                RichSequenceBuilderFactory seqFactory, Namespace ns)
804                                throws IOException {
805                        for (Iterator<Class> i = formatClasses.iterator(); i.hasNext();) {
806                                Class formatClass = i.next();
807                                RichSequenceFormat format;
808                                try {
809                                        format = (RichSequenceFormat) formatClass.newInstance();
810                                } catch (Exception e) {
811                                        throw new BioError(e);
812                                }
813                                if (format.canRead(stream)) {
814                                        SymbolTokenization sTok = format
815                                                        .guessSymbolTokenization(stream);
816                                        BufferedReader br = new BufferedReader(
817                                                        new InputStreamReader(stream));
818                                        return new RichStreamReader(br, format, sTok, seqFactory,
819                                                        ns);
820                                }
821                        }
822                        throw new IOException("Could not recognise format of stream.");
823                }
824
825                /**
826                 * Guess which format a stream is then attempt to read it.
827                 * 
828                 * @return a <code>RichSequenceIterator</code> over each sequence in the
829                 *         file
830                 * @param stream
831                 *            the <code>BufferedInputStream</code> to attempt to read.
832                 * @param ns
833                 *            a <code>Namespace</code> to load the sequences into. Null
834                 *            implies that it should use the namespace specified in the
835                 *            file. If no namespace is specified in the file, then
836                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
837                 *            used.
838                 * @throws java.io.IOException
839                 *             If the file cannot be read.
840                 */
841                public static RichSequenceIterator readStream(
842                                BufferedInputStream stream, Namespace ns) throws IOException {
843                        return readStream(stream, factory, ns);
844                }
845
846                /**
847                 * Guess which format a file is then attempt to read it.
848                 * 
849                 * @param file
850                 *            the <code>File</code> to attempt to read.
851                 * @param seqFactory
852                 *            a factory used to build a <code>RichSequence</code>
853                 * @param ns
854                 *            a <code>Namespace</code> to load the sequences into. Null
855                 *            implies that it should use the namespace specified in the
856                 *            file. If no namespace is specified in the file, then
857                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
858                 *            used.
859                 * @return a <code>RichSequenceIterator</code> over each sequence in the
860                 *         file
861                 * @throws IOException
862                 *             in case the file is unrecognisable or problems occur in
863                 *             reading it.
864                 */
865                public static RichSequenceIterator readFile(File file,
866                                RichSequenceBuilderFactory seqFactory, Namespace ns)
867                                throws IOException {
868                        for (Iterator<Class> i = formatClasses.iterator(); i.hasNext();) {
869                                Class formatClass = i.next();
870                                RichSequenceFormat format;
871                                try {
872                                        format = (RichSequenceFormat) formatClass.newInstance();
873                                } catch (Exception e) {
874                                        throw new BioError(e);
875                                }
876                                if (format.canRead(file)) {
877                                        SymbolTokenization sTok = format
878                                                        .guessSymbolTokenization(file);
879                                        BufferedReader br = new BufferedReader(new FileReader(file));
880                                        return new RichStreamReader(br, format, sTok, seqFactory,
881                                                        ns);
882                                }
883                        }
884                        throw new IOException("Could not recognise format of file: "
885                                        + file.getName());
886                }
887
888                /**
889                 * Guess which format a file is then attempt to read it.
890                 * 
891                 * @return a <code>RichSequenceIterator</code> over each sequence in the
892                 *         file
893                 * @param file
894                 *            the <code>File</code> to attempt to read.
895                 * @param ns
896                 *            a <code>Namespace</code> to load the sequences into. Null
897                 *            implies that it should use the namespace specified in the
898                 *            file. If no namespace is specified in the file, then
899                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
900                 *            used.
901                 * @throws java.io.IOException
902                 *             If the file cannot be read.
903                 */
904                public static RichSequenceIterator readFile(File file, Namespace ns)
905                                throws IOException {
906                        return readFile(file, factory, ns);
907                }
908
909                /**
910                 * Read a fasta file.
911                 * 
912                 * @param br
913                 *            the <code>BufferedReader<code> to read data from
914                 * @param sTok
915                 *            a <code>SymbolTokenization</code> that understands the
916                 *            sequences
917                 * @param ns
918                 *            a <code>Namespace</code> to load the sequences into. Null
919                 *            implies that it should use the namespace specified in the
920                 *            file. If no namespace is specified in the file, then
921                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
922                 *            used.
923                 * @return a <code>RichSequenceIterator</code> over each sequence in the
924                 *         fasta file
925                 */
926                public static RichSequenceIterator readFasta(BufferedReader br,
927                                SymbolTokenization sTok, Namespace ns) {
928                        return new RichStreamReader(br, new FastaFormat(), sTok, factory,
929                                        ns);
930                }
931
932                /**
933                 * Read a fasta file building a custom type of <code>RichSequence</code>
934                 * . For example, use <code>RichSequenceBuilderFactory.FACTORY</code> to
935                 * emulate <code>readFasta(BufferedReader, SymbolTokenization)</code>
936                 * and <code>RichSequenceBuilderFactory.PACKED</code> to force all
937                 * symbols to be encoded using bit-packing.
938                 * 
939                 * @param br
940                 *            the <code>BufferedReader</code> to read data from
941                 * @param sTok
942                 *            a <code>SymbolTokenization</code> that understands the
943                 *            sequences
944                 * @param seqFactory
945                 *            a factory used to build a <code>RichSequence</code>
946                 * @param ns
947                 *            a <code>Namespace</code> to load the sequences into. Null
948                 *            implies that it should use the namespace specified in the
949                 *            file. If no namespace is specified in the file, then
950                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
951                 *            used.
952                 * @return a <code>RichSequenceIterator</code> over each sequence in the
953                 *         fasta file
954                 */
955                public static RichSequenceIterator readFasta(BufferedReader br,
956                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
957                                Namespace ns) {
958                        return new RichStreamReader(br, new FastaFormat(), sTok,
959                                        seqFactory, ns);
960                }
961
962                /**
963                 * Iterate over the sequences in an FASTA-format stream of DNA
964                 * sequences.
965                 * 
966                 * @param br
967                 *            the <code>BufferedReader</code> to read data from
968                 * @param ns
969                 *            a <code>Namespace</code> to load the sequences into. Null
970                 *            implies that it should use the namespace specified in the
971                 *            file. If no namespace is specified in the file, then
972                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
973                 *            used.
974                 * @return a <code>RichSequenceIterator</code> over each sequence in the
975                 *         fasta file
976                 * @see #readHashedFastaDNA(BufferedInputStream, Namespace) for a
977                 *      speeded up version that can access sequences from memory.
978                 */
979                public static RichSequenceIterator readFastaDNA(BufferedReader br,
980                                Namespace ns) {
981                        return new RichStreamReader(br, new FastaFormat(), getDNAParser(),
982                                        factory, ns);
983                }
984
985                /**
986                 * Iterate over the sequences in an FASTA-format stream of DNA
987                 * sequences. In contrast to readFastaDNA, this provides a speeded up
988                 * implementation where all sequences are accessed from memory.
989                 * 
990                 * @param is
991                 *            the <code>BufferedInputStream</code> to read data from
992                 * @param ns
993                 *            a <code>Namespace</code> to load the sequences into. Null
994                 *            implies that it should use the namespace specified in the
995                 *            file. If no namespace is specified in the file, then
996                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
997                 *            used.
998                 * @return a <code>RichSequenceIterator</code> over each sequence in the
999                 *         fasta file
1000                 * @throws BioException
1001                 *             if somethings goes wrong while reading the file.
1002                 * @see #readFastaDNA
1003                 */
1004                public static RichSequenceIterator readHashedFastaDNA(
1005                                BufferedInputStream is, Namespace ns) throws BioException {
1006
1007                        Alphabet alpha = AlphabetManager.alphabetForName("DNA");
1008                        return new HashedFastaIterator(is, alpha, ns);
1009
1010                }
1011
1012                /**
1013                 * Iterate over the sequences in an FASTA-format stream of RNA
1014                 * sequences.
1015                 * 
1016                 * @param br
1017                 *            the <code>BufferedReader</code> to read data from
1018                 * @param ns
1019                 *            a <code>Namespace</code> to load the sequences into. Null
1020                 *            implies that it should use the namespace specified in the
1021                 *            file. If no namespace is specified in the file, then
1022                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1023                 *            used.
1024                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1025                 *         fasta file
1026                 */
1027                public static RichSequenceIterator readFastaRNA(BufferedReader br,
1028                                Namespace ns) {
1029                        return new RichStreamReader(br, new FastaFormat(), getRNAParser(),
1030                                        factory, ns);
1031                }
1032
1033                /**
1034                 * Iterate over the sequences in an FASTA-format stream of Protein
1035                 * sequences.
1036                 * 
1037                 * @param br
1038                 *            the <code>BufferedReader</code> to read data from
1039                 * @param ns
1040                 *            a <code>Namespace</code> to load the sequences into. Null
1041                 *            implies that it should use the namespace specified in the
1042                 *            file. If no namespace is specified in the file, then
1043                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1044                 *            used.
1045                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1046                 *         fasta file
1047                 */
1048                public static RichSequenceIterator readFastaProtein(BufferedReader br,
1049                                Namespace ns) {
1050                        return new RichStreamReader(br, new FastaFormat(),
1051                                        getProteinParser(), factory, ns);
1052                }
1053
1054                /**
1055                 * Read a GenBank file using a custom type of SymbolList. For example,
1056                 * use RichSequenceBuilderFactory.FACTORY to emulate
1057                 * readFasta(BufferedReader, SymbolTokenization) and
1058                 * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded
1059                 * using bit-packing.
1060                 * 
1061                 * @param br
1062                 *            the <code>BufferedReader</code> to read data from
1063                 * @param sTok
1064                 *            a <code>SymbolTokenization</code> that understands the
1065                 *            sequences
1066                 * @param seqFactory
1067                 *            a factory used to build a <code>SymbolList</code>
1068                 * @param ns
1069                 *            a <code>Namespace</code> to load the sequences into. Null
1070                 *            implies that it should use the namespace specified in the
1071                 *            file. If no namespace is specified in the file, then
1072                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1073                 *            used.
1074                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1075                 *         fasta file
1076                 */
1077                public static RichSequenceIterator readGenbank(BufferedReader br,
1078                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
1079                                Namespace ns) {
1080                        return new RichStreamReader(br, new GenbankFormat(), sTok,
1081                                        seqFactory, ns);
1082                }
1083
1084                /**
1085                 * Iterate over the sequences in an GenBank-format stream of DNA
1086                 * sequences.
1087                 * 
1088                 * @param br
1089                 *            the <code>BufferedReader</code> to read data from
1090                 * @param ns
1091                 *            a <code>Namespace</code> to load the sequences into. Null
1092                 *            implies that it should use the namespace specified in the
1093                 *            file. If no namespace is specified in the file, then
1094                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1095                 *            used.
1096                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1097                 *         fasta file
1098                 */
1099                public static RichSequenceIterator readGenbankDNA(BufferedReader br,
1100                                Namespace ns) {
1101                        return new RichStreamReader(br, new GenbankFormat(),
1102                                        getDNAParser(), factory, ns);
1103                }
1104
1105                /**
1106                 * Iterate over the sequences in an GenBank-format stream of RNA
1107                 * sequences.
1108                 * 
1109                 * @param br
1110                 *            the <code>BufferedReader</code> to read data from
1111                 * @param ns
1112                 *            a <code>Namespace</code> to load the sequences into. Null
1113                 *            implies that it should use the namespace specified in the
1114                 *            file. If no namespace is specified in the file, then
1115                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1116                 *            used.
1117                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1118                 *         fasta file
1119                 */
1120                public static RichSequenceIterator readGenbankRNA(BufferedReader br,
1121                                Namespace ns) {
1122                        return new RichStreamReader(br, new GenbankFormat(),
1123                                        getRNAParser(), factory, ns);
1124                }
1125
1126                /**
1127                 * Iterate over the sequences in an GenBank-format stream of Protein
1128                 * sequences.
1129                 * 
1130                 * @param br
1131                 *            the <code>BufferedReader</code> to read data from
1132                 * @param ns
1133                 *            a <code>Namespace</code> to load the sequences into. Null
1134                 *            implies that it should use the namespace specified in the
1135                 *            file. If no namespace is specified in the file, then
1136                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1137                 *            used.
1138                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1139                 *         fasta file
1140                 */
1141                public static RichSequenceIterator readGenbankProtein(
1142                                BufferedReader br, Namespace ns) {
1143                        return new RichStreamReader(br, new GenbankFormat(),
1144                                        getProteinParser(), factory, ns);
1145                }
1146
1147                /**
1148                 * Read a INSDseq file using a custom type of SymbolList. For example,
1149                 * use RichSequenceBuilderFactory.FACTORY to emulate
1150                 * readFasta(BufferedReader, SymbolTokenization) and
1151                 * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded
1152                 * using bit-packing.
1153                 * 
1154                 * @param br
1155                 *            the <code>BufferedReader</code> to read data from
1156                 * @param sTok
1157                 *            a <code>SymbolTokenization</code> that understands the
1158                 *            sequences
1159                 * @param seqFactory
1160                 *            a factory used to build a <code>SymbolList</code>
1161                 * @param ns
1162                 *            a <code>Namespace</code> to load the sequences into. Null
1163                 *            implies that it should use the namespace specified in the
1164                 *            file. If no namespace is specified in the file, then
1165                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1166                 *            used.
1167                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1168                 *         fasta file
1169                 */
1170                public static RichSequenceIterator readINSDseq(BufferedReader br,
1171                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
1172                                Namespace ns) {
1173                        return new RichStreamReader(br, new INSDseqFormat(), sTok,
1174                                        seqFactory, ns);
1175                }
1176
1177                /**
1178                 * Iterate over the sequences in an INSDseq-format stream of DNA
1179                 * sequences.
1180                 * 
1181                 * @param br
1182                 *            the <code>BufferedReader</code> to read data from
1183                 * @param ns
1184                 *            a <code>Namespace</code> to load the sequences into. Null
1185                 *            implies that it should use the namespace specified in the
1186                 *            file. If no namespace is specified in the file, then
1187                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1188                 *            used.
1189                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1190                 *         fasta file
1191                 */
1192                public static RichSequenceIterator readINSDseqDNA(BufferedReader br,
1193                                Namespace ns) {
1194                        return new RichStreamReader(br, new INSDseqFormat(),
1195                                        getDNAParser(), factory, ns);
1196                }
1197
1198                /**
1199                 * Iterate over the sequences in an INSDseq-format stream of RNA
1200                 * sequences.
1201                 * 
1202                 * @param br
1203                 *            the <code>BufferedReader</code> to read data from
1204                 * @param ns
1205                 *            a <code>Namespace</code> to load the sequences into. Null
1206                 *            implies that it should use the namespace specified in the
1207                 *            file. If no namespace is specified in the file, then
1208                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1209                 *            used.
1210                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1211                 *         fasta file
1212                 */
1213                public static RichSequenceIterator readINSDseqRNA(BufferedReader br,
1214                                Namespace ns) {
1215                        return new RichStreamReader(br, new INSDseqFormat(),
1216                                        getRNAParser(), factory, ns);
1217                }
1218
1219                /**
1220                 * Iterate over the sequences in an INSDseq-format stream of Protein
1221                 * sequences.
1222                 * 
1223                 * @param br
1224                 *            the <code>BufferedReader</code> to read data from
1225                 * @param ns
1226                 *            a <code>Namespace</code> to load the sequences into. Null
1227                 *            implies that it should use the namespace specified in the
1228                 *            file. If no namespace is specified in the file, then
1229                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1230                 *            used.
1231                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1232                 *         fasta file
1233                 */
1234                public static RichSequenceIterator readINSDseqProtein(
1235                                BufferedReader br, Namespace ns) {
1236                        return new RichStreamReader(br, new INSDseqFormat(),
1237                                        getProteinParser(), factory, ns);
1238                }
1239
1240                /**
1241                 * Read a EMBLxml file using a custom type of SymbolList. For example,
1242                 * use RichSequenceBuilderFactory.FACTORY to emulate
1243                 * readFasta(BufferedReader, SymbolTokenization) and
1244                 * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded
1245                 * using bit-packing.
1246                 * 
1247                 * @param br
1248                 *            the <code>BufferedReader</code> to read data from
1249                 * @param sTok
1250                 *            a <code>SymbolTokenization</code> that understands the
1251                 *            sequences
1252                 * @param seqFactory
1253                 *            a factory used to build a <code>SymbolList</code>
1254                 * @param ns
1255                 *            a <code>Namespace</code> to load the sequences into. Null
1256                 *            implies that it should use the namespace specified in the
1257                 *            file. If no namespace is specified in the file, then
1258                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1259                 *            used.
1260                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1261                 *         fasta file
1262                 */
1263                public static RichSequenceIterator readEMBLxml(BufferedReader br,
1264                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
1265                                Namespace ns) {
1266                        return new RichStreamReader(br, new EMBLxmlFormat(), sTok,
1267                                        seqFactory, ns);
1268                }
1269
1270                /**
1271                 * Iterate over the sequences in an EMBLxml-format stream of DNA
1272                 * sequences.
1273                 * 
1274                 * @param br
1275                 *            the <code>BufferedReader</code> to read data from
1276                 * @param ns
1277                 *            a <code>Namespace</code> to load the sequences into. Null
1278                 *            implies that it should use the namespace specified in the
1279                 *            file. If no namespace is specified in the file, then
1280                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1281                 *            used.
1282                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1283                 *         fasta file
1284                 */
1285                public static RichSequenceIterator readEMBLxmlDNA(BufferedReader br,
1286                                Namespace ns) {
1287                        return new RichStreamReader(br, new EMBLxmlFormat(),
1288                                        getDNAParser(), factory, ns);
1289                }
1290
1291                /**
1292                 * Iterate over the sequences in an EMBLxml-format stream of RNA
1293                 * sequences.
1294                 * 
1295                 * @param br
1296                 *            the <code>BufferedReader</code> to read data from
1297                 * @param ns
1298                 *            a <code>Namespace</code> to load the sequences into. Null
1299                 *            implies that it should use the namespace specified in the
1300                 *            file. If no namespace is specified in the file, then
1301                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1302                 *            used.
1303                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1304                 *         fasta file
1305                 */
1306                public static RichSequenceIterator readEMBLxmlRNA(BufferedReader br,
1307                                Namespace ns) {
1308                        return new RichStreamReader(br, new EMBLxmlFormat(),
1309                                        getRNAParser(), factory, ns);
1310                }
1311
1312                /**
1313                 * Iterate over the sequences in an EMBLxml-format stream of Protein
1314                 * sequences.
1315                 * 
1316                 * @param br
1317                 *            the <code>BufferedReader</code> to read data from
1318                 * @param ns
1319                 *            a <code>Namespace</code> to load the sequences into. Null
1320                 *            implies that it should use the namespace specified in the
1321                 *            file. If no namespace is specified in the file, then
1322                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1323                 *            used.
1324                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1325                 *         fasta file
1326                 */
1327                public static RichSequenceIterator readEMBLxmlProtein(
1328                                BufferedReader br, Namespace ns) {
1329                        return new RichStreamReader(br, new EMBLxmlFormat(),
1330                                        getProteinParser(), factory, ns);
1331                }
1332
1333                /**
1334                 * Read a EMBL file using a custom type of SymbolList. For example, use
1335                 * RichSequenceBuilderFactory.FACTORY to emulate
1336                 * readFasta(BufferedReader, SymbolTokenization) and
1337                 * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded
1338                 * using bit-packing.
1339                 * 
1340                 * @param br
1341                 *            the <code>BufferedReader</code> to read data from
1342                 * @param sTok
1343                 *            a <code>SymbolTokenization</code> that understands the
1344                 *            sequences
1345                 * @param seqFactory
1346                 *            a factory used to build a <code>SymbolList</code>
1347                 * @param ns
1348                 *            a <code>Namespace</code> to load the sequences into. Null
1349                 *            implies that it should use the namespace specified in the
1350                 *            file. If no namespace is specified in the file, then
1351                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1352                 *            used.
1353                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1354                 *         fasta file
1355                 */
1356                public static RichSequenceIterator readEMBL(BufferedReader br,
1357                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
1358                                Namespace ns) {
1359                        return new RichStreamReader(br, new EMBLFormat(), sTok, seqFactory,
1360                                        ns);
1361                }
1362
1363                /**
1364                 * Iterate over the sequences in an EMBL-format stream of DNA sequences.
1365                 * 
1366                 * @param br
1367                 *            the <code>BufferedReader</code> to read data from
1368                 * @param ns
1369                 *            a <code>Namespace</code> to load the sequences into. Null
1370                 *            implies that it should use the namespace specified in the
1371                 *            file. If no namespace is specified in the file, then
1372                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1373                 *            used.
1374                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1375                 *         fasta file
1376                 */
1377                public static RichSequenceIterator readEMBLDNA(BufferedReader br,
1378                                Namespace ns) {
1379                        return new RichStreamReader(br, new EMBLFormat(), getDNAParser(),
1380                                        factory, ns);
1381                }
1382
1383                /**
1384                 * Iterate over the sequences in an EMBL-format stream of RNA sequences.
1385                 * 
1386                 * @param br
1387                 *            the <code>BufferedReader</code> to read data from
1388                 * @param ns
1389                 *            a <code>Namespace</code> to load the sequences into. Null
1390                 *            implies that it should use the namespace specified in the
1391                 *            file. If no namespace is specified in the file, then
1392                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1393                 *            used.
1394                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1395                 *         fasta file
1396                 */
1397                public static RichSequenceIterator readEMBLRNA(BufferedReader br,
1398                                Namespace ns) {
1399                        return new RichStreamReader(br, new EMBLFormat(), getRNAParser(),
1400                                        factory, ns);
1401                }
1402
1403                /**
1404                 * Iterate over the sequences in an EMBL-format stream of Protein
1405                 * sequences.
1406                 * 
1407                 * @param br
1408                 *            the <code>BufferedReader</code> to read data from
1409                 * @param ns
1410                 *            a <code>Namespace</code> to load the sequences into. Null
1411                 *            implies that it should use the namespace specified in the
1412                 *            file. If no namespace is specified in the file, then
1413                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1414                 *            used.
1415                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1416                 *         fasta file
1417                 */
1418                public static RichSequenceIterator readEMBLProtein(BufferedReader br,
1419                                Namespace ns) {
1420                        return new RichStreamReader(br, new EMBLFormat(),
1421                                        getProteinParser(), factory, ns);
1422                }
1423
1424                /**
1425                 * Read a UniProt file using a custom type of SymbolList. For example,
1426                 * use RichSequenceBuilderFactory.FACTORY to emulate
1427                 * readFasta(BufferedReader, SymbolTokenization) and
1428                 * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded
1429                 * using bit-packing.
1430                 * 
1431                 * @param br
1432                 *            the <code>BufferedReader</code> to read data from
1433                 * @param sTok
1434                 *            a <code>SymbolTokenization</code> that understands the
1435                 *            sequences
1436                 * @param seqFactory
1437                 *            a factory used to build a <code>SymbolList</code>
1438                 * @param ns
1439                 *            a <code>Namespace</code> to load the sequences into. Null
1440                 *            implies that it should use the namespace specified in the
1441                 *            file. If no namespace is specified in the file, then
1442                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1443                 *            used.
1444                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1445                 *         fasta file
1446                 */
1447                public static RichSequenceIterator readUniProt(BufferedReader br,
1448                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
1449                                Namespace ns) {
1450                        return new RichStreamReader(br, new UniProtFormat(), sTok,
1451                                        seqFactory, ns);
1452                }
1453
1454                /**
1455                 * Iterate over the sequences in an UniProt-format stream of RNA
1456                 * sequences.
1457                 * 
1458                 * @param br
1459                 *            the <code>BufferedReader</code> to read data from
1460                 * @param ns
1461                 *            a <code>Namespace</code> to load the sequences into. Null
1462                 *            implies that it should use the namespace specified in the
1463                 *            file. If no namespace is specified in the file, then
1464                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1465                 *            used.
1466                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1467                 *         fasta file
1468                 */
1469                public static RichSequenceIterator readUniProt(BufferedReader br,
1470                                Namespace ns) {
1471                        return new RichStreamReader(br, new UniProtFormat(),
1472                                        getProteinParser(), factory, ns);
1473                }
1474
1475                /**
1476                 * Read a UniProt XML file using a custom type of SymbolList. For
1477                 * example, use RichSequenceBuilderFactory.FACTORY to emulate
1478                 * readFasta(BufferedReader, SymbolTokenization) and
1479                 * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded
1480                 * using bit-packing.
1481                 * 
1482                 * @param br
1483                 *            the <code>BufferedReader</code> to read data from
1484                 * @param sTok
1485                 *            a <code>SymbolTokenization</code> that understands the
1486                 *            sequences
1487                 * @param seqFactory
1488                 *            a factory used to build a <code>SymbolList</code>
1489                 * @param ns
1490                 *            a <code>Namespace</code> to load the sequences into. Null
1491                 *            implies that it should use the namespace specified in the
1492                 *            file. If no namespace is specified in the file, then
1493                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1494                 *            used.
1495                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1496                 *         fasta file
1497                 */
1498                public static RichSequenceIterator readUniProtXML(BufferedReader br,
1499                                SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory,
1500                                Namespace ns) {
1501                        return new RichStreamReader(br, new UniProtXMLFormat(), sTok,
1502                                        seqFactory, ns);
1503                }
1504
1505                /**
1506                 * Iterate over the sequences in an UniProt XML-format stream of RNA
1507                 * sequences.
1508                 * 
1509                 * @param br
1510                 *            the <code>BufferedReader</code> to read data from
1511                 * @param ns
1512                 *            a <code>Namespace</code> to load the sequences into. Null
1513                 *            implies that it should use the namespace specified in the
1514                 *            file. If no namespace is specified in the file, then
1515                 *            <code>RichObjectFactory.getDefaultNamespace()</code> is
1516                 *            used.
1517                 * @return a <code>RichSequenceIterator</code> over each sequence in the
1518                 *         fasta file
1519                 */
1520                public static RichSequenceIterator readUniProtXML(BufferedReader br,
1521                                Namespace ns) {
1522                        return new RichStreamReader(br, new UniProtXMLFormat(),
1523                                        getProteinParser(), factory, ns);
1524                }
1525
1526                /**
1527                 * Writes <CODE>Sequence</CODE>s from a <code>SequenceIterator</code> to
1528                 * an <code>OutputStream </code>in Fasta Format. This makes for a useful
1529                 * format filter where a <code>StreamReader</code> can be sent to the
1530                 * <code>RichStreamWriter</code> after formatting.
1531                 * 
1532                 * @param os
1533                 *            The stream to write fasta formatted data to
1534                 * @param in
1535                 *            The source of input <CODE>RichSequence</CODE>s
1536                 * @param ns
1537                 *            a <code>Namespace</code> to write the
1538                 *            <CODE>RichSequence</CODE>s to. <CODE>Null</CODE> implies
1539                 *            that it should use the namespace specified in the
1540                 *            individual sequence.
1541                 * @param header
1542                 *            the FastaHeader
1543                 * @throws java.io.IOException
1544                 *             if there is an IO problem
1545                 */
1546                public static void writeFasta(OutputStream os, SequenceIterator in,
1547                                Namespace ns, FastaHeader header) throws IOException {
1548                        FastaFormat fastaFormat = new FastaFormat();
1549                        if (header != null) {
1550                                fastaFormat.setHeader(header);
1551                        }
1552                        RichStreamWriter sw = new RichStreamWriter(os, fastaFormat);
1553                        sw.writeStream(in, ns);
1554                }
1555
1556                /**
1557                 * Writes <CODE>Sequence</CODE>s from a <code>SequenceIterator</code> to
1558                 * an <code>OutputStream </code>in Fasta Format. This makes for a useful
1559                 * format filter where a <code>StreamReader</code> can be sent to the
1560                 * <code>RichStreamWriter</code> after formatting.
1561                 * 
1562                 * @param os
1563                 *            The stream to write fasta formatted data to
1564                 * @param in
1565                 *            The source of input <CODE>RichSequence</CODE>s
1566                 * @param ns
1567                 *            a <code>Namespace</code> to write the
1568                 *            <CODE>RichSequence</CODE>s to. <CODE>Null</CODE> implies
1569                 *            that it should use the namespace specified in the
1570                 *            individual sequence.
1571                 * @throws java.io.IOException
1572                 *             if there is an IO problem
1573                 */
1574                public static void writeFasta(OutputStream os, SequenceIterator in,
1575                                Namespace ns) throws IOException {
1576                        writeFasta(os, in, ns, null);
1577                }
1578
1579                /**
1580                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1581                 * in Fasta format.
1582                 * 
1583                 * @param os
1584                 *            the <code>OutputStream</code>.
1585                 * @param seq
1586                 *            the <code>Sequence</code>.
1587                 * @param ns
1588                 *            a <code>Namespace</code> to write the sequences to. Null
1589                 *            implies that it should use the namespace specified in the
1590                 *            individual sequence.
1591                 * @throws java.io.IOException
1592                 *             if there is an IO problem
1593                 */
1594                public static void writeFasta(OutputStream os, Sequence seq,
1595                                Namespace ns) throws IOException {
1596                        writeFasta(os, new SingleRichSeqIterator(seq), ns, null);
1597                }
1598
1599                /**
1600                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1601                 * in Fasta format.
1602                 * 
1603                 * @param os
1604                 *            the <code>OutputStream</code>.
1605                 * @param seq
1606                 *            the <code>Sequence</code>.
1607                 * @param ns
1608                 *            a <code>Namespace</code> to write the sequences to. Null
1609                 *            implies that it should use the namespace specified in the
1610                 *            individual sequence.
1611                 * @param header
1612                 *            a <code>FastaHeader</code> that controls the fields in the
1613                 *            header.
1614                 * @throws java.io.IOException
1615                 *             if there is an IO problem
1616                 */
1617                public static void writeFasta(OutputStream os, Sequence seq,
1618                                Namespace ns, FastaHeader header) throws IOException {
1619                        writeFasta(os, new SingleRichSeqIterator(seq), ns, header);
1620                }
1621
1622                /**
1623                 * Writes sequences from a <code>SequenceIterator</code> to an
1624                 * <code>OutputStream </code>in GenBank Format. This makes for a useful
1625                 * format filter where a <code>StreamReader</code> can be sent to the
1626                 * <code>RichStreamWriter</code> after formatting.
1627                 * 
1628                 * @param os
1629                 *            The stream to write fasta formatted data to
1630                 * @param in
1631                 *            The source of input Sequences
1632                 * @param ns
1633                 *            a <code>Namespace</code> to write the sequences to. Null
1634                 *            implies that it should use the namespace specified in the
1635                 *            individual sequence.
1636                 * @throws java.io.IOException
1637                 *             if there is an IO problem
1638                 */
1639                public static void writeGenbank(OutputStream os, SequenceIterator in,
1640                                Namespace ns) throws IOException {
1641                        RichStreamWriter sw = new RichStreamWriter(os, new GenbankFormat());
1642                        sw.writeStream(in, ns);
1643                }
1644
1645                /**
1646                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1647                 * in GenBank format.
1648                 * 
1649                 * @param os
1650                 *            the <code>OutputStream</code>.
1651                 * @param seq
1652                 *            the <code>Sequence</code>.
1653                 * @param ns
1654                 *            a <code>Namespace</code> to write the sequences to. Null
1655                 *            implies that it should use the namespace specified in the
1656                 *            individual sequence.
1657                 * @throws java.io.IOException
1658                 *             if there is an IO problem
1659                 */
1660                public static void writeGenbank(OutputStream os, Sequence seq,
1661                                Namespace ns) throws IOException {
1662                        writeGenbank(os, new SingleRichSeqIterator(seq), ns);
1663                }
1664
1665                /**
1666                 * Writes sequences from a <code>SequenceIterator</code> to an
1667                 * <code>OutputStream </code>in INSDseq Format. This makes for a useful
1668                 * format filter where a <code>StreamReader</code> can be sent to the
1669                 * <code>RichStreamWriter</code> after formatting.
1670                 * 
1671                 * @param os
1672                 *            The stream to write fasta formatted data to
1673                 * @param in
1674                 *            The source of input Sequences
1675                 * @param ns
1676                 *            a <code>Namespace</code> to write the sequences to. Null
1677                 *            implies that it should use the namespace specified in the
1678                 *            individual sequence.
1679                 * @throws java.io.IOException
1680                 *             if there is an IO problem
1681                 */
1682                public static void writeINSDseq(OutputStream os, SequenceIterator in,
1683                                Namespace ns) throws IOException {
1684                        RichStreamWriter sw = new RichStreamWriter(os, new INSDseqFormat());
1685                        sw.writeStream(in, ns);
1686                }
1687
1688                /**
1689                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1690                 * in INSDseq format.
1691                 * 
1692                 * @param os
1693                 *            the <code>OutputStream</code>.
1694                 * @param seq
1695                 *            the <code>Sequence</code>.
1696                 * @param ns
1697                 *            a <code>Namespace</code> to write the sequences to. Null
1698                 *            implies that it should use the namespace specified in the
1699                 *            individual sequence.
1700                 * @throws java.io.IOException
1701                 *             if there is an IO problem
1702                 */
1703                public static void writeINSDseq(OutputStream os, Sequence seq,
1704                                Namespace ns) throws IOException {
1705                        writeINSDseq(os, new SingleRichSeqIterator(seq), ns);
1706                }
1707
1708                /**
1709                 * Writes sequences from a <code>SequenceIterator</code> to an
1710                 * <code>OutputStream </code>in EMBLxml Format. This makes for a useful
1711                 * format filter where a <code>StreamReader</code> can be sent to the
1712                 * <code>RichStreamWriter</code> after formatting.
1713                 * 
1714                 * @param os
1715                 *            The stream to write fasta formatted data to
1716                 * @param in
1717                 *            The source of input Sequences
1718                 * @param ns
1719                 *            a <code>Namespace</code> to write the sequences to. Null
1720                 *            implies that it should use the namespace specified in the
1721                 *            individual sequence.
1722                 * @throws java.io.IOException
1723                 *             if there is an IO problem
1724                 */
1725                public static void writeEMBLxml(OutputStream os, SequenceIterator in,
1726                                Namespace ns) throws IOException {
1727                        RichStreamWriter sw = new RichStreamWriter(os, new EMBLxmlFormat());
1728                        sw.writeStream(in, ns);
1729                }
1730
1731                /**
1732                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1733                 * in EMBLxml format.
1734                 * 
1735                 * @param os
1736                 *            the <code>OutputStream</code>.
1737                 * @param seq
1738                 *            the <code>Sequence</code>.
1739                 * @param ns
1740                 *            a <code>Namespace</code> to write the sequences to. Null
1741                 *            implies that it should use the namespace specified in the
1742                 *            individual sequence.
1743                 * @throws java.io.IOException
1744                 *             if there is an IO problem
1745                 */
1746                public static void writeEMBLxml(OutputStream os, Sequence seq,
1747                                Namespace ns) throws IOException {
1748                        writeEMBLxml(os, new SingleRichSeqIterator(seq), ns);
1749                }
1750
1751                /**
1752                 * Writes sequences from a <code>SequenceIterator</code> to an
1753                 * <code>OutputStream </code>in EMBL Format. This makes for a useful
1754                 * format filter where a <code>StreamReader</code> can be sent to the
1755                 * <code>RichStreamWriter</code> after formatting.
1756                 * 
1757                 * @param os
1758                 *            The stream to write fasta formatted data to
1759                 * @param in
1760                 *            The source of input Sequences
1761                 * @param ns
1762                 *            a <code>Namespace</code> to write the sequences to. Null
1763                 *            implies that it should use the namespace specified in the
1764                 *            individual sequence.
1765                 * @throws java.io.IOException
1766                 *             if there is an IO problem
1767                 */
1768                public static void writeEMBL(OutputStream os, SequenceIterator in,
1769                                Namespace ns) throws IOException {
1770                        RichStreamWriter sw = new RichStreamWriter(os, new EMBLFormat());
1771                        sw.writeStream(in, ns);
1772                }
1773
1774                /**
1775                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1776                 * in EMBL format.
1777                 * 
1778                 * @param os
1779                 *            the <code>OutputStream</code>.
1780                 * @param seq
1781                 *            the <code>Sequence</code>.
1782                 * @param ns
1783                 *            a <code>Namespace</code> to write the sequences to. Null
1784                 *            implies that it should use the namespace specified in the
1785                 *            individual sequence.
1786                 * @throws java.io.IOException
1787                 *             if there is an IO problem
1788                 */
1789                public static void writeEMBL(OutputStream os, Sequence seq, Namespace ns)
1790                                throws IOException {
1791                        writeEMBL(os, new SingleRichSeqIterator(seq), ns);
1792                }
1793
1794                /**
1795                 * Writes sequences from a <code>SequenceIterator</code> to an
1796                 * <code>OutputStream </code>in UniProt Format. This makes for a useful
1797                 * format filter where a <code>StreamReader</code> can be sent to the
1798                 * <code>RichStreamWriter</code> after formatting.
1799                 * 
1800                 * @param os
1801                 *            The stream to write fasta formatted data to
1802                 * @param in
1803                 *            The source of input Sequences
1804                 * @param ns
1805                 *            a <code>Namespace</code> to write the sequences to. Null
1806                 *            implies that it should use the namespace specified in the
1807                 *            individual sequence.
1808                 * @throws java.io.IOException
1809                 *             if there is an IO problem
1810                 */
1811                public static void writeUniProt(OutputStream os, SequenceIterator in,
1812                                Namespace ns) throws IOException {
1813                        RichStreamWriter sw = new RichStreamWriter(os, new UniProtFormat());
1814                        sw.writeStream(in, ns);
1815                }
1816
1817                /**
1818                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1819                 * in UniProt format.
1820                 * 
1821                 * @param os
1822                 *            the <code>OutputStream</code>.
1823                 * @param seq
1824                 *            the <code>Sequence</code>.
1825                 * @param ns
1826                 *            a <code>Namespace</code> to write the sequences to. Null
1827                 *            implies that it should use the namespace specified in the
1828                 *            individual sequence.
1829                 * @throws java.io.IOException
1830                 *             if there is an IO problem
1831                 */
1832                public static void writeUniProt(OutputStream os, Sequence seq,
1833                                Namespace ns) throws IOException {
1834                        writeUniProt(os, new SingleRichSeqIterator(seq), ns);
1835                }
1836
1837                /**
1838                 * Writes sequences from a <code>SequenceIterator</code> to an
1839                 * <code>OutputStream </code>in UniProt XML Format. This makes for a
1840                 * useful format filter where a <code>StreamReader</code> can be sent to
1841                 * the <code>RichStreamWriter</code> after formatting.
1842                 * 
1843                 * @param os
1844                 *            The stream to write fasta formatted data to
1845                 * @param in
1846                 *            The source of input Sequences
1847                 * @param ns
1848                 *            a <code>Namespace</code> to write the sequences to. Null
1849                 *            implies that it should use the namespace specified in the
1850                 *            individual sequence.
1851                 * @throws java.io.IOException
1852                 *             if there is an IO problem
1853                 */
1854                public static void writeUniProtXML(OutputStream os,
1855                                SequenceIterator in, Namespace ns) throws IOException {
1856                        RichStreamWriter sw = new RichStreamWriter(os,
1857                                        new UniProtXMLFormat());
1858                        sw.writeStream(in, ns);
1859                }
1860
1861                /**
1862                 * Writes a single <code>Sequence</code> to an <code>OutputStream</code>
1863                 * in UniProt XML format.
1864                 * 
1865                 * @param os
1866                 *            the <code>OutputStream</code>.
1867                 * @param seq
1868                 *            the <code>Sequence</code>.
1869                 * @param ns
1870                 *            a <code>Namespace</code> to write the sequences to. Null
1871                 *            implies that it should use the namespace specified in the
1872                 *            individual sequence.
1873                 * @throws java.io.IOException
1874                 *             if there is an IO problem
1875                 */
1876                public static void writeUniProtXML(OutputStream os, Sequence seq,
1877                                Namespace ns) throws IOException {
1878                        writeUniProtXML(os, new SingleRichSeqIterator(seq), ns);
1879                }
1880
1881                /**
1882                 * Creates a DNA symbol tokenizer.
1883                 * 
1884                 * @return a <code>SymbolTokenization</code> for parsing DNA.
1885                 */
1886                public static SymbolTokenization getDNAParser() {
1887                        try {
1888                                return DNATools.getDNA().getTokenization("token");
1889                        } catch (BioException ex) {
1890                                throw new BioError("Assertion failing:"
1891                                                + " Couldn't get DNA token parser", ex);
1892                        }
1893                }
1894
1895                /**
1896                 * Creates a RNA symbol tokenizer.
1897                 * 
1898                 * @return a <code>SymbolTokenization</code> for parsing RNA.
1899                 */
1900                public static SymbolTokenization getRNAParser() {
1901                        try {
1902                                return RNATools.getRNA().getTokenization("token");
1903                        } catch (BioException ex) {
1904                                throw new BioError("Assertion failing:"
1905                                                + " Couldn't get RNA token parser", ex);
1906                        }
1907                }
1908
1909                /**
1910                 * Creates a nucleotide symbol tokenizer.
1911                 * 
1912                 * @return a <code>SymbolTokenization</code> for parsing nucleotides.
1913                 */
1914                public static SymbolTokenization getNucleotideParser() {
1915                        try {
1916                                return NucleotideTools.getNucleotide().getTokenization("token");
1917                        } catch (BioException ex) {
1918                                throw new BioError("Assertion failing:"
1919                                                + " Couldn't get nucleotide token parser", ex);
1920                        }
1921                }
1922
1923                /**
1924                 * Creates a protein symbol tokenizer.
1925                 * 
1926                 * @return a <code>SymbolTokenization</code> for parsing protein.
1927                 */
1928                public static SymbolTokenization getProteinParser() {
1929                        try {
1930                                return ProteinTools.getTAlphabet().getTokenization("token");
1931                        } catch (BioException ex) {
1932                                throw new BioError("Assertion failing:"
1933                                                + " Couldn't get PROTEIN token parser", ex);
1934                        }
1935                }
1936
1937                /**
1938                 * Used to iterate over a single rich sequence
1939                 */
1940                public static final class SingleRichSeqIterator implements
1941                                RichSequenceIterator {
1942
1943                        private RichSequence seq;
1944
1945                        /**
1946                         * Creates an iterator over a single sequence.
1947                         * 
1948                         * @param seq
1949                         *            the sequence to iterate over.
1950                         */
1951                        public SingleRichSeqIterator(Sequence seq) {
1952                                try {
1953                                        if (seq instanceof RichSequence)
1954                                                this.seq = (RichSequence) seq;
1955                                        else
1956                                                this.seq = RichSequence.Tools.enrich(seq);
1957                                } catch (ChangeVetoException e) {
1958                                        throw new RuntimeException("Unable to enrich sequence", e);
1959                                }
1960                        }
1961
1962                        /**
1963                         * {@inheritDoc}
1964                         * 
1965                         * @return true if another <CODE>RichSequence</CODE> is available
1966                         */
1967                        public boolean hasNext() {
1968                                return seq != null;
1969                        }
1970
1971                        /**
1972                         * {@inheritDoc}
1973                         * 
1974                         * @return a <CODE>RichSequence</CODE>
1975                         */
1976                        public Sequence nextSequence() {
1977                                return this.nextRichSequence();
1978                        }
1979
1980                        /**
1981                         * {@inheritDoc}
1982                         * 
1983                         * @return a <CODE>RichSequence</CODE>
1984                         */
1985                        public BioEntry nextBioEntry() {
1986                                return this.nextRichSequence();
1987                        }
1988
1989                        /**
1990                         * {@inheritDoc}
1991                         * 
1992                         * @return a <CODE>RichSequence</CODE>
1993                         */
1994                        public RichSequence nextRichSequence() {
1995                                RichSequence seq = this.seq;
1996                                this.seq = null;
1997                                return seq;
1998                        }
1999                }
2000        }
2001}