001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 *
022 * @author Richard Holland
023 * @auther Scooter Willis
024 *
025 */
026package org.biojava.nbio.core.sequence.loader;
027
028import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
029import org.biojava.nbio.core.sequence.AccessionID;
030import org.biojava.nbio.core.sequence.Strand;
031import org.biojava.nbio.core.sequence.io.template.SequenceParserInterface;
032import org.biojava.nbio.core.sequence.storage.SequenceAsStringHelper;
033import org.biojava.nbio.core.sequence.template.*;
034
035import java.io.BufferedReader;
036import java.io.File;
037import java.io.FileReader;
038import java.io.IOException;
039import java.util.ArrayList;
040import java.util.Iterator;
041import java.util.List;
042
043/**
044 * This class represents the storage container of a sequence stored in a fasta file where
045 * the initial parsing of the file we store the offset and length of the sequence. When a call
046 * is made to any method that needs sequence data then the file will be opened and the sequence
047 * loaded. This class could be improved by using the hints or a some algorithm that indicates
048 * the sequence data once loaded should stay loaded. Could keep track of the last time sequence
049 * data was loaded and then after X amount of time clear the contents to free up memory.
050 *
051 *
052 * @author Scooter Willis <willishf at gmail dot com>
053 * @param <C>
054 */
055public class SequenceFileProxyLoader<C extends Compound> implements ProxySequenceReader<C> {
056
057        SequenceParserInterface sequenceParser;
058        private CompoundSet<C> compoundSet;
059        private List<C> parsedCompounds = new ArrayList<C>();
060        File file;
061        long sequenceStartIndex = -1;
062        int sequenceLength = -1;
063        //private boolean initialized = false;
064
065        /**
066         *
067         * @param file The file where the sequence will be found
068         * @param sequenceParser The parser to use to load the sequence
069         * @param sequenceStartIndex The file offset to the start of the sequence
070         * @param sequenceLength The length of the sequence
071         * @param compoundSet
072         * @throws IOException if problems occur while reading the file
073         * @throws CompoundNotFoundException if a compound in the sequence can't be found in the given compoundSet
074         */
075        public SequenceFileProxyLoader(File file, SequenceParserInterface sequenceParser, long sequenceStartIndex, int sequenceLength, CompoundSet<C> compoundSet)
076                        throws IOException, CompoundNotFoundException {
077                this.sequenceParser = sequenceParser;
078                this.file = file;
079                this.sequenceStartIndex = sequenceStartIndex;
080                this.sequenceLength = sequenceLength;
081                setCompoundSet(compoundSet);
082
083                init();
084        }
085
086        /**
087         *
088         * @param compoundSet
089         */
090        @Override
091        public void setCompoundSet(CompoundSet<C> compoundSet) {
092                this.compoundSet = compoundSet;
093        }
094
095        /**
096         *  Load the sequence
097         * @return
098         */
099        private boolean init() throws IOException, CompoundNotFoundException {
100
101                BufferedReader br = new BufferedReader(new FileReader(file));
102                br.skip(sequenceStartIndex);
103                String sequence = sequenceParser.getSequence(br, sequenceLength);
104                setContents(sequence);
105                br.close(); // close file to prevent too many being open
106
107                return true;
108        }
109
110        /**
111         *
112         * @param sequence
113         */
114        @Override
115        public void setContents(String sequence) throws CompoundNotFoundException {
116                // Horrendously inefficient - pretty much the way the old BJ did things.
117                // TODO Should be optimised.
118                this.parsedCompounds.clear();
119                for (int i = 0; i < sequence.length();) {
120                        String compoundStr = null;
121                        C compound = null;
122                        for (int compoundStrLength = 1; compound == null && compoundStrLength <= compoundSet.getMaxSingleCompoundStringLength(); compoundStrLength++) {
123                                compoundStr = sequence.substring(i, i + compoundStrLength);
124                                compound = compoundSet.getCompoundForString(compoundStr);
125                        }
126                        if (compound == null) {
127                                throw new CompoundNotFoundException("Compound "+compoundStr+" not found");
128                        } else {
129                                i += compoundStr.length();
130                        }
131                        this.parsedCompounds.add(compound);
132                }
133
134        }
135
136        /**
137         *
138         * @return
139         */
140        @Override
141        public int getLength() {
142                return sequenceLength;
143        }
144
145        /**
146         *
147         * @param position
148         * @return
149         */
150        @Override
151        public C getCompoundAt(int position) {
152
153                return this.parsedCompounds.get(position - 1);
154        }
155
156        /**
157         *
158         * @param compound
159         * @return
160         */
161        @Override
162        public int getIndexOf(C compound) {
163
164                return this.parsedCompounds.indexOf(compound) + 1;
165        }
166
167        /**
168         *
169         * @param compound
170         * @return
171         */
172        @Override
173        public int getLastIndexOf(C compound) {
174
175                return this.parsedCompounds.lastIndexOf(compound) + 1;
176        }
177
178        /**
179         *
180         * @return
181         */
182        @Override
183        public String toString() {
184
185                return getSequenceAsString();
186        }
187
188        /**
189         *
190         * @return
191         */
192        @Override
193        public String getSequenceAsString() {
194                return getSequenceAsString(1, getLength(), Strand.POSITIVE);
195        }
196
197        /**
198         *
199         * @param bioBegin
200         * @param bioEnd
201         * @param strand
202         * @return
203         */
204        public String getSequenceAsString(Integer bioBegin, Integer bioEnd, Strand strand) {
205
206                SequenceAsStringHelper<C> sequenceAsStringHelper = new SequenceAsStringHelper<C>();
207                return sequenceAsStringHelper.getSequenceAsString(this.parsedCompounds, compoundSet, bioBegin, bioEnd, strand);
208        }
209
210        /**
211         *
212         * @return
213         */
214        @Override
215        public List<C> getAsList() {
216
217                return this.parsedCompounds;
218
219        }
220
221        /**
222         *
223         * @param bioBegin
224         * @param bioEnd
225         * @return
226         */
227        @Override
228        public SequenceView<C> getSubSequence(final Integer bioBegin, final Integer bioEnd) {
229
230                return new SequenceProxyView<C>(SequenceFileProxyLoader.this, bioBegin, bioEnd);
231        }
232
233        /**
234         *
235         * @return
236         */
237        @Override
238        public Iterator<C> iterator() {
239
240                return this.parsedCompounds.iterator();
241        }
242
243        /**
244         *
245         * @return
246         */
247        @Override
248        public CompoundSet<C> getCompoundSet() {
249                return compoundSet;
250        }
251
252        /**
253         *
254         * @return
255         */
256        @Override
257        public AccessionID getAccession() {
258                throw new UnsupportedOperationException("Not supported yet.");
259        }
260
261        /**
262         *
263         * @param compounds
264         * @return
265         */
266        @Override
267        public int countCompounds(C... compounds) {
268                return SequenceMixin.countCompounds(this, compounds);
269        }
270
271        /**
272         *
273         * @return
274         */
275        @Override
276        public SequenceView<C> getInverse() {
277                return SequenceMixin.inverse(this);
278        }
279}