001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 *
022 * @author Richard Holland
023 * @auther Scooter Willis
024 *
025 */
026package org.biojava.nbio.core.sequence.loader;
027
028import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
029import org.biojava.nbio.core.sequence.AccessionID;
030import org.biojava.nbio.core.sequence.Strand;
031import org.biojava.nbio.core.sequence.io.template.SequenceParserInterface;
032import org.biojava.nbio.core.sequence.storage.SequenceAsStringHelper;
033import org.biojava.nbio.core.sequence.template.*;
034import org.biojava.nbio.core.util.Equals;
035
036import java.io.BufferedReader;
037import java.io.File;
038import java.io.FileReader;
039import java.io.IOException;
040import java.util.ArrayList;
041import java.util.Iterator;
042import java.util.List;
043
044/**
045 * This class represents the storage container of a sequence stored in a fasta file where
046 * the initial parsing of the file we store the offset and length of the sequence. When a call
047 * is made to any method that needs sequence data then the file will be opened and the sequence
048 * loaded. This class could be improved by using the hints or a some algorithm that indicates
049 * the sequence data once loaded should stay loaded. Could keep track of the last time sequence
050 * data was loaded and then after X amount of time clear the contents to free up memory.
051 *
052 *
053 * @author Scooter Willis <willishf at gmail dot com>
054 * @param <C>
055 */
056public class SequenceFileProxyLoader<C extends Compound> implements ProxySequenceReader<C> {
057
058        SequenceParserInterface sequenceParser;
059        private CompoundSet<C> compoundSet;
060        private List<C> parsedCompounds = new ArrayList<C>();
061        File file;
062        long sequenceStartIndex = -1;
063        int sequenceLength = -1;
064        //private boolean initialized = false;
065
066        /**
067         *
068         * @param file The file where the sequence will be found
069         * @param sequenceParser The parser to use to load the sequence
070         * @param sequenceStartIndex The file offset to the start of the sequence
071         * @param sequenceLength The length of the sequence
072         * @param compoundSet
073         * @throws IOException if problems occur while reading the file
074         * @throws CompoundNotFoundException if a compound in the sequence can't be found in the given compoundSet
075         */
076        public SequenceFileProxyLoader(File file, SequenceParserInterface sequenceParser, long sequenceStartIndex, int sequenceLength, CompoundSet<C> compoundSet)
077                        throws IOException, CompoundNotFoundException {
078                this.sequenceParser = sequenceParser;
079                this.file = file;
080                this.sequenceStartIndex = sequenceStartIndex;
081                this.sequenceLength = sequenceLength;
082                setCompoundSet(compoundSet);
083
084                init();
085        }
086
087        /**
088         *
089         * @param compoundSet
090         */
091        @Override
092        public void setCompoundSet(CompoundSet<C> compoundSet) {
093                this.compoundSet = compoundSet;
094        }
095
096        /**
097         *  Load the sequence
098         * @return
099         */
100        private boolean init() throws IOException, CompoundNotFoundException {
101
102                try (BufferedReader br = new BufferedReader(new FileReader(file))) {
103                        br.skip(sequenceStartIndex);
104                        String sequence = sequenceParser.getSequence(br, sequenceLength);
105                        setContents(sequence);
106                }
107
108                return true;
109        }
110
111        /**
112         *
113         * @param sequence
114         */
115        @Override
116        public void setContents(String sequence) throws CompoundNotFoundException {
117                // Horrendously inefficient - pretty much the way the old BJ did things.
118                // TODO Should be optimised.
119                this.parsedCompounds.clear();
120                for (int i = 0; i < sequence.length();) {
121                        String compoundStr = null;
122                        C compound = null;
123                        for (int compoundStrLength = 1; compound == null && compoundStrLength <= compoundSet.getMaxSingleCompoundStringLength(); compoundStrLength++) {
124                                compoundStr = sequence.substring(i, i + compoundStrLength);
125                                compound = compoundSet.getCompoundForString(compoundStr);
126                        }
127                        if (compound == null) {
128                                throw new CompoundNotFoundException("Compound "+compoundStr+" not found");
129                        } else {
130                                i += compoundStr.length();
131                        }
132                        this.parsedCompounds.add(compound);
133                }
134
135        }
136
137        /**
138         *
139         * @return
140         */
141        @Override
142        public int getLength() {
143                return sequenceLength;
144        }
145
146        /**
147         *
148         * @param position
149         * @return
150         */
151        @Override
152        public C getCompoundAt(int position) {
153
154                return this.parsedCompounds.get(position - 1);
155        }
156
157        /**
158         *
159         * @param compound
160         * @return
161         */
162        @Override
163        public int getIndexOf(C compound) {
164
165                return this.parsedCompounds.indexOf(compound) + 1;
166        }
167
168        /**
169         *
170         * @param compound
171         * @return
172         */
173        @Override
174        public int getLastIndexOf(C compound) {
175
176                return this.parsedCompounds.lastIndexOf(compound) + 1;
177        }
178
179        /**
180         *
181         * @return
182         */
183        @Override
184        public String toString() {
185
186                return getSequenceAsString();
187        }
188
189        /**
190         *
191         * @return
192         */
193        @Override
194        public String getSequenceAsString() {
195                return getSequenceAsString(1, getLength(), Strand.POSITIVE);
196        }
197
198        /**
199         *
200         * @param bioBegin
201         * @param bioEnd
202         * @param strand
203         * @return
204         */
205        public String getSequenceAsString(Integer bioBegin, Integer bioEnd, Strand strand) {
206
207                SequenceAsStringHelper<C> sequenceAsStringHelper = new SequenceAsStringHelper<C>();
208                return sequenceAsStringHelper.getSequenceAsString(this.parsedCompounds, compoundSet, bioBegin, bioEnd, strand);
209        }
210
211        /**
212         *
213         * @return
214         */
215        @Override
216        public List<C> getAsList() {
217
218                return this.parsedCompounds;
219
220        }
221
222        @Override
223        public boolean equals(Object o) {
224
225                if(! Equals.classEqual(this, o)) {
226                        return false;
227                }
228
229                Sequence<C> other = (Sequence<C>)o;
230                if ( other.getCompoundSet() != getCompoundSet())
231                        return false;
232
233                List<C> rawCompounds = getAsList();
234                List<C> otherCompounds = other.getAsList();
235
236                if ( rawCompounds.size() != otherCompounds.size())
237                        return false;
238
239                for (int i = 0 ; i < rawCompounds.size() ; i++){
240                        Compound myCompound = rawCompounds.get(i);
241                        Compound otherCompound = otherCompounds.get(i);
242                        if ( ! myCompound.equalsIgnoreCase(otherCompound))
243                                return false;
244                }
245                return true;
246        }
247
248        @Override
249        public int hashCode(){
250                String s = getSequenceAsString();
251                return s.hashCode();
252        }
253
254        /**
255         *
256         * @param bioBegin
257         * @param bioEnd
258         * @return
259         */
260        @Override
261        public SequenceView<C> getSubSequence(final Integer bioBegin, final Integer bioEnd) {
262
263                return new SequenceProxyView<C>(SequenceFileProxyLoader.this, bioBegin, bioEnd);
264        }
265
266        /**
267         *
268         * @return
269         */
270        @Override
271        public Iterator<C> iterator() {
272
273                return this.parsedCompounds.iterator();
274        }
275
276        /**
277         *
278         * @return
279         */
280        @Override
281        public CompoundSet<C> getCompoundSet() {
282                return compoundSet;
283        }
284
285        /**
286         *
287         * @return
288         */
289        @Override
290        public AccessionID getAccession() {
291                throw new UnsupportedOperationException("Not supported yet.");
292        }
293
294        /**
295         *
296         * @param compounds
297         * @return
298         */
299        @Override
300        public int countCompounds(C... compounds) {
301                return SequenceMixin.countCompounds(this, compounds);
302        }
303
304        /**
305         *
306         * @return
307         */
308        @Override
309        public SequenceView<C> getInverse() {
310                return SequenceMixin.inverse(this);
311        }
312}