001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.program.ssaha; 023 024import java.io.BufferedReader; 025import java.io.File; 026import java.io.FileReader; 027import java.io.IOException; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.List; 031 032import org.biojava.bio.BioException; 033import org.biojava.bio.seq.Sequence; 034import org.biojava.bio.seq.SequenceIterator; 035import org.biojava.bio.seq.db.SequenceDB; 036import org.biojava.bio.seq.io.SeqIOListener; 037import org.biojava.bio.seq.io.SequenceFormat; 038import org.biojava.bio.seq.io.SymbolTokenization; 039import org.biojava.bio.symbol.Symbol; 040 041public interface SequenceStreamer { 042 public boolean hasNext(); 043 public void streamNext(SeqIOListener listener) throws IOException, BioException; 044 public void reset() throws BioException; 045 046 public static class SequenceDBStreamer implements SequenceStreamer { 047 private SequenceDB seqDB; 048 private SequenceIterator si; 049 050 public SequenceDBStreamer(SequenceDB seqDB) { 051 this.seqDB = seqDB; 052 this.si = seqDB.sequenceIterator(); 053 } 054 055 public boolean hasNext() { 056 return si.hasNext(); 057 } 058 059 public void reset() { 060 si = seqDB.sequenceIterator(); 061 } 062 063 public void streamNext(SeqIOListener listener) 064 throws BioException 065 { 066 Sequence seq = si.nextSequence(); 067 System.err.println("Streaming " + seq.getName()); 068 069 listener.startSequence(); 070 listener.setName(seq.getName()); 071 listener.setURI(seq.getURN()); 072 Symbol[] syms = new Symbol[4096]; 073 int pos = 1; 074 int spos = 0; 075 while (pos <= seq.length()) { 076 syms[spos++] = seq.symbolAt(pos++); 077 if (spos == syms.length || pos > seq.length()) { 078 listener.addSymbols(seq.getAlphabet(), syms, 0, spos); 079 spos = 0; 080 } 081 } 082 listener.endSequence(); 083 } 084 } 085 086 public static class FileStreamer implements SequenceStreamer { 087 private final List fileList; 088 private final SequenceFormat format; 089 private final SymbolTokenization toke; 090 private Iterator fileIterator; 091 private BufferedReader currentStream = null; 092 093 public FileStreamer(SequenceFormat format, SymbolTokenization toke, List files) { 094 this.format = format; 095 this.fileList = files; 096 this.toke = toke; 097 fileIterator = fileList.iterator(); 098 } 099 100 public FileStreamer(SequenceFormat format, SymbolTokenization toke, File f) { 101 this(format, toke, Collections.singletonList(f)); 102 } 103 104 public void reset() { 105 currentStream = null; 106 fileIterator = fileList.iterator(); 107 } 108 109 public boolean hasNext() { 110 return (currentStream != null || fileIterator.hasNext()); 111 } 112 113 public void streamNext(SeqIOListener listener) 114 throws BioException, IOException 115 { 116 if (currentStream == null) { 117 currentStream = new BufferedReader(new FileReader((File) fileIterator.next())); 118 } 119 boolean more = format.readSequence(currentStream, toke, listener); 120 if (!more) { 121 currentStream.close(); 122 currentStream = null; 123 } 124 } 125 } 126}