001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 * 
020 * Created on Jun 24, 2008
021 * 
022 */
023
024package org.biojavax.bio.seq.io;
025
026import java.io.BufferedInputStream;
027import java.util.NoSuchElementException;
028
029import org.biojava.bio.Annotation;
030import org.biojava.bio.BioException;
031import org.biojava.bio.seq.Sequence;
032import org.biojava.bio.seq.SequenceIterator;
033import org.biojava.bio.seq.Feature.Template;
034import org.biojava.bio.seq.db.SequenceDB;
035import org.biojava.bio.seq.io.ParseException;
036import org.biojava.bio.seq.io.SeqIOTools;
037import org.biojava.bio.symbol.Alphabet;
038import org.biojava.bio.symbol.IllegalAlphabetException;
039import org.biojava.bio.symbol.Symbol;
040import org.biojava.bio.symbol.SymbolList;
041import org.biojavax.Namespace;
042import org.biojavax.RankedCrossRef;
043import org.biojavax.RankedDocRef;
044import org.biojavax.bio.BioEntry;
045import org.biojavax.bio.BioEntryRelationship;
046import org.biojavax.bio.seq.RichFeature;
047import org.biojavax.bio.seq.RichSequence;
048import org.biojavax.bio.seq.RichSequenceIterator;
049import org.biojavax.bio.seq.SimpleRichSequence;
050import org.biojavax.bio.taxa.NCBITaxon;
051
052
053/** Iterates over a Fasta file that is kept in memory for optimized access.
054 * @since 1.7
055 * @author Andreas Prlic
056 *
057 */
058public class HashedFastaIterator implements RichSequenceIterator{
059
060        Alphabet alpha;
061        Namespace ns;
062        SequenceDB db ;
063        FastaFormat format;
064        SequenceIterator iterator;
065        MyRichSeqIOListener listener;
066        
067        public HashedFastaIterator(BufferedInputStream is, Alphabet alpha,Namespace ns) throws BioException{
068
069//              get a SequenceDB of all sequences in the file
070                db = SeqIOTools.readFasta(is, alpha);
071                iterator = db.sequenceIterator();
072                this.ns = ns;
073                format = new FastaFormat();
074                listener = new MyRichSeqIOListener();
075                this.alpha = alpha;
076                
077        }
078
079        
080        public RichSequence nextRichSequence() throws NoSuchElementException, BioException {
081                listener.startSequence();
082                
083                Sequence s = iterator.nextSequence();
084
085                Annotation a = s.getAnnotation();
086
087                if ( a.containsProperty("description_line")){
088                        //process the description line...
089                        try {
090                                format.processHeader(">"+a.getProperty("description_line"), listener, ns);
091                        } catch (Exception e){
092                                throw new BioException(e);
093                        }
094                }
095                listener.setSymbolList(s);
096
097                listener.endSequence();
098                
099                return listener.getCurrentSequence();
100        }
101
102        public boolean hasNext() {
103                return iterator.hasNext();              
104        }
105
106        public BioEntry nextBioEntry() throws NoSuchElementException, BioException {
107                return this.nextRichSequence();
108        }
109
110        public Sequence nextSequence() throws NoSuchElementException, BioException {
111
112                return  iterator.nextSequence();
113        }
114}
115
116/** a RichSeqIOListener plus more...
117 * 
118 * @author Andreas Prlic
119 *
120 */
121class MyRichSeqIOListener implements RichSeqIOListener{
122
123        SimpleRichSequence currentSequence;
124        Namespace ns;
125        String ac;
126        String name;
127        int version;
128        Double sversion;
129        SymbolList symbolList;
130        
131        public MyRichSeqIOListener(){
132                currentSequence = null;
133        }
134
135        public SimpleRichSequence getCurrentSequence(){
136                return currentSequence;
137        }
138        
139        public RichFeature getCurrentFeature() throws ParseException {
140                // TODO Auto-generated method stub
141                return null;
142        }
143
144        
145        public SymbolList getSymbolList() {
146                return symbolList;
147        }
148
149        public void setSymbolList(SymbolList symbolList) {
150                this.symbolList = symbolList;
151        }
152
153        public void setAccession(String accession) throws ParseException {
154                ac = accession;
155
156        }
157
158        public void setCircular(boolean circular) throws ParseException {
159                // TODO Auto-generated method stub
160
161        }
162
163        public void setComment(String comment) throws ParseException {
164                // TODO Auto-generated method stub
165
166        }
167
168        public void setDescription(String description) throws ParseException {
169                // TODO Auto-generated method stub
170
171        }
172
173        public void setDivision(String division) throws ParseException {
174                // TODO Auto-generated method stub
175
176        }
177
178        public void setIdentifier(String identifier) throws ParseException {
179                // TODO Auto-generated method stub
180
181        }
182
183        public void setNamespace(Namespace namespace) throws ParseException {
184                ns = namespace;
185        }
186
187        public void setRankedCrossRef(RankedCrossRef crossRef) throws ParseException {
188                // TODO Auto-generated method stub
189
190        }
191
192        public void setRankedDocRef(RankedDocRef ref) throws ParseException {
193                // TODO Auto-generated method stub
194
195        }
196
197        public void setRelationship(BioEntryRelationship relationship) throws ParseException {
198                // TODO Auto-generated method stub
199
200        }
201
202        public void setSeqVersion(String version) throws ParseException {
203                try {
204                        sversion = Double.parseDouble(version);
205                } catch (Exception e){
206                        throw new ParseException(e.getMessage());
207                }
208
209        }
210
211        public void setTaxon(NCBITaxon taxon) throws ParseException {
212                // TODO Auto-generated method stub
213
214        }
215
216        public void setURI(String uri) throws ParseException {
217                // TODO Auto-generated method stub
218
219        }
220
221        public void setVersion(int version) throws ParseException {
222                this.version = version;
223
224        }
225
226        public void addFeatureProperty(Object key, Object value) throws ParseException {
227                // TODO Auto-generated method stub
228
229        }
230
231        public void addSequenceProperty(Object key, Object value) throws ParseException {
232                // TODO Auto-generated method stub
233
234        }
235
236        public void addSymbols(Alphabet alpha, Symbol[] syms, int start, int length) throws IllegalAlphabetException {
237                // TODO Auto-generated method stub
238
239        }
240
241        public void endFeature() throws ParseException {
242                // TODO Auto-generated method stub
243
244        }
245
246        public void endSequence() throws ParseException {
247                
248                currentSequence = new SimpleRichSequence(
249                                ns, 
250                                name, 
251                                ac, 
252                                version, 
253                                symbolList, 
254                                sversion);
255                
256
257        }
258
259        public void setName(String name) throws ParseException {
260                this.name = name;
261        }
262
263        public void startFeature(Template templ) throws ParseException {
264                // TODO Auto-generated method stub
265
266        }
267
268        public void startSequence() throws ParseException {
269                currentSequence = null;
270
271        }
272}