001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 24, 2008 021 * 022 */ 023 024package org.biojavax.bio.seq.io; 025 026import java.io.BufferedInputStream; 027import java.util.NoSuchElementException; 028 029import org.biojava.bio.Annotation; 030import org.biojava.bio.BioException; 031import org.biojava.bio.seq.Sequence; 032import org.biojava.bio.seq.SequenceIterator; 033import org.biojava.bio.seq.Feature.Template; 034import org.biojava.bio.seq.db.SequenceDB; 035import org.biojava.bio.seq.io.ParseException; 036import org.biojava.bio.seq.io.SeqIOTools; 037import org.biojava.bio.symbol.Alphabet; 038import org.biojava.bio.symbol.IllegalAlphabetException; 039import org.biojava.bio.symbol.Symbol; 040import org.biojava.bio.symbol.SymbolList; 041import org.biojavax.Namespace; 042import org.biojavax.RankedCrossRef; 043import org.biojavax.RankedDocRef; 044import org.biojavax.bio.BioEntry; 045import org.biojavax.bio.BioEntryRelationship; 046import org.biojavax.bio.seq.RichFeature; 047import org.biojavax.bio.seq.RichSequence; 048import org.biojavax.bio.seq.RichSequenceIterator; 049import org.biojavax.bio.seq.SimpleRichSequence; 050import org.biojavax.bio.taxa.NCBITaxon; 051 052 053/** Iterates over a Fasta file that is kept in memory for optimized access. 054 * @since 1.7 055 * @author Andreas Prlic 056 * 057 */ 058public class HashedFastaIterator implements RichSequenceIterator{ 059 060 Alphabet alpha; 061 Namespace ns; 062 SequenceDB db ; 063 FastaFormat format; 064 SequenceIterator iterator; 065 MyRichSeqIOListener listener; 066 067 public HashedFastaIterator(BufferedInputStream is, Alphabet alpha,Namespace ns) throws BioException{ 068 069// get a SequenceDB of all sequences in the file 070 db = SeqIOTools.readFasta(is, alpha); 071 iterator = db.sequenceIterator(); 072 this.ns = ns; 073 format = new FastaFormat(); 074 listener = new MyRichSeqIOListener(); 075 this.alpha = alpha; 076 077 } 078 079 080 public RichSequence nextRichSequence() throws NoSuchElementException, BioException { 081 listener.startSequence(); 082 083 Sequence s = iterator.nextSequence(); 084 085 Annotation a = s.getAnnotation(); 086 087 if ( a.containsProperty("description_line")){ 088 //process the description line... 089 try { 090 format.processHeader(">"+a.getProperty("description_line"), listener, ns); 091 } catch (Exception e){ 092 throw new BioException(e); 093 } 094 } 095 listener.setSymbolList(s); 096 097 listener.endSequence(); 098 099 return listener.getCurrentSequence(); 100 } 101 102 public boolean hasNext() { 103 return iterator.hasNext(); 104 } 105 106 public BioEntry nextBioEntry() throws NoSuchElementException, BioException { 107 return this.nextRichSequence(); 108 } 109 110 public Sequence nextSequence() throws NoSuchElementException, BioException { 111 112 return iterator.nextSequence(); 113 } 114} 115 116/** a RichSeqIOListener plus more... 117 * 118 * @author Andreas Prlic 119 * 120 */ 121class MyRichSeqIOListener implements RichSeqIOListener{ 122 123 SimpleRichSequence currentSequence; 124 Namespace ns; 125 String ac; 126 String name; 127 int version; 128 Double sversion; 129 SymbolList symbolList; 130 131 public MyRichSeqIOListener(){ 132 currentSequence = null; 133 } 134 135 public SimpleRichSequence getCurrentSequence(){ 136 return currentSequence; 137 } 138 139 public RichFeature getCurrentFeature() throws ParseException { 140 // TODO Auto-generated method stub 141 return null; 142 } 143 144 145 public SymbolList getSymbolList() { 146 return symbolList; 147 } 148 149 public void setSymbolList(SymbolList symbolList) { 150 this.symbolList = symbolList; 151 } 152 153 public void setAccession(String accession) throws ParseException { 154 ac = accession; 155 156 } 157 158 public void setCircular(boolean circular) throws ParseException { 159 // TODO Auto-generated method stub 160 161 } 162 163 public void setComment(String comment) throws ParseException { 164 // TODO Auto-generated method stub 165 166 } 167 168 public void setDescription(String description) throws ParseException { 169 // TODO Auto-generated method stub 170 171 } 172 173 public void setDivision(String division) throws ParseException { 174 // TODO Auto-generated method stub 175 176 } 177 178 public void setIdentifier(String identifier) throws ParseException { 179 // TODO Auto-generated method stub 180 181 } 182 183 public void setNamespace(Namespace namespace) throws ParseException { 184 ns = namespace; 185 } 186 187 public void setRankedCrossRef(RankedCrossRef crossRef) throws ParseException { 188 // TODO Auto-generated method stub 189 190 } 191 192 public void setRankedDocRef(RankedDocRef ref) throws ParseException { 193 // TODO Auto-generated method stub 194 195 } 196 197 public void setRelationship(BioEntryRelationship relationship) throws ParseException { 198 // TODO Auto-generated method stub 199 200 } 201 202 public void setSeqVersion(String version) throws ParseException { 203 try { 204 sversion = Double.parseDouble(version); 205 } catch (Exception e){ 206 throw new ParseException(e.getMessage()); 207 } 208 209 } 210 211 public void setTaxon(NCBITaxon taxon) throws ParseException { 212 // TODO Auto-generated method stub 213 214 } 215 216 public void setURI(String uri) throws ParseException { 217 // TODO Auto-generated method stub 218 219 } 220 221 public void setVersion(int version) throws ParseException { 222 this.version = version; 223 224 } 225 226 public void addFeatureProperty(Object key, Object value) throws ParseException { 227 // TODO Auto-generated method stub 228 229 } 230 231 public void addSequenceProperty(Object key, Object value) throws ParseException { 232 // TODO Auto-generated method stub 233 234 } 235 236 public void addSymbols(Alphabet alpha, Symbol[] syms, int start, int length) throws IllegalAlphabetException { 237 // TODO Auto-generated method stub 238 239 } 240 241 public void endFeature() throws ParseException { 242 // TODO Auto-generated method stub 243 244 } 245 246 public void endSequence() throws ParseException { 247 248 currentSequence = new SimpleRichSequence( 249 ns, 250 name, 251 ac, 252 version, 253 symbolList, 254 sversion); 255 256 257 } 258 259 public void setName(String name) throws ParseException { 260 this.name = name; 261 } 262 263 public void startFeature(Template templ) throws ParseException { 264 // TODO Auto-generated method stub 265 266 } 267 268 public void startSequence() throws ParseException { 269 currentSequence = null; 270 271 } 272}