001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.storage;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.AccessionID;
025import org.biojava.nbio.core.sequence.template.*;
026
027import java.util.*;
028
029/**
030 * This reader actually proxies onto multiple types of sequence in order
031 * to allow a number of sequence objects to act as if they are one sequence.
032 * The code takes in any number of sequences, records the minimum and maximum
033 * bounds each sequence covers with respect to 1 position indexing and then
034 * binary searches these when a position is requested. Because of this
035 * 0 length Sequences are excluded during construction.
036 *
037 * Performance is not as good as if you are using a flat sequence however the
038 * speed of lookup is more than adaquate for most situations. Using the iterator
039 * gives the best performance as this does not rely on the binary search
040 * mechanism instead iterating through each sequence in turn.
041 *
042 * @author ayates
043 * @param <C> Tyoe of compound to hold
044 */
045public class JoiningSequenceReader<C extends Compound> implements ProxySequenceReader<C> {
046
047        /**
048         * Internal implementation flag and controls how we look for the right
049         * sub-sequence
050         */
051        private static final boolean BINARY_SEARCH = true;
052        private final List<Sequence<C>> sequences;
053        private final CompoundSet<C> compoundSet;
054        private int[] maxSequenceIndex;
055        private int[] minSequenceIndex;
056
057        /**
058         * Allows creation of the store from Vargs Sequence<C> objects. CompoundSet
059         * defaults to the first element of the array (assuming there are elements
060         * available during construction otherwise we will throw an illegal
061         * state exception).
062         */
063        public JoiningSequenceReader(Sequence<C>... sequences) {
064                this(Arrays.asList(sequences));
065        }
066
067        /**
068         * Allows creation of the store from List<Sequence<C>>. CompoundSet
069         * defaults to the first element of the List (assuming there are elements
070         * available during construction otherwise we will throw an illegal
071         * state exception).
072         */
073        public JoiningSequenceReader(List<Sequence<C>> sequences) {
074                this.sequences = grepSequences(sequences);
075                this.compoundSet = grepCompoundSet();
076        }
077
078        public JoiningSequenceReader(CompoundSet<C> compoundSet, Sequence<C>... sequences) {
079                this(compoundSet, Arrays.asList(sequences));
080        }
081
082        public JoiningSequenceReader(CompoundSet<C> compoundSet, List<Sequence<C>> sequences) {
083                this.sequences = grepSequences(sequences);
084                this.compoundSet = compoundSet;
085        }
086
087        private List<Sequence<C>> grepSequences(List<Sequence<C>> sequences) {
088                List<Sequence<C>> seqs = new ArrayList<Sequence<C>>();
089                for (Sequence<C> s : sequences) {
090                        if (s.getLength() != 0) {
091                                seqs.add(s);
092                        }
093                }
094                return seqs;
095        }
096
097        private CompoundSet<C> grepCompoundSet() {
098                if (sequences.isEmpty()) {
099                        throw new IllegalStateException("Cannot get a CompoundSet because we have no sequences. Set during construction");
100                }
101                return sequences.get(0).getCompoundSet();
102        }
103
104
105        @Override
106        public C getCompoundAt(int position) {
107                int sequenceIndex = getSequenceIndex(position);
108                Sequence<C> sequence = sequences.get(sequenceIndex);
109                int indexInSequence = (position - getMinSequenceIndex()[sequenceIndex]) + 1;
110                return sequence.getCompoundAt(indexInSequence);
111        }
112
113
114        @Override
115        public CompoundSet<C> getCompoundSet() {
116                return compoundSet;
117        }
118
119
120        @Override
121        public int getLength() {
122                int[] maxSeqIndex = getMaxSequenceIndex();
123                if (maxSeqIndex.length == 0) {
124                        return 0;
125                }
126                return maxSeqIndex[maxSeqIndex.length - 1];
127        }
128
129        /**
130         * Returns which Sequence holds the position queried for
131         */
132        private int getSequenceIndex(int position) {
133                if (BINARY_SEARCH) {
134                        return binarySearch(position);
135                } else {
136                        return linearSearch(position);
137                }
138        }
139
140        private int[] getMinSequenceIndex() {
141                if (minSequenceIndex == null) {
142                        initSeqIndexes();
143                }
144                return minSequenceIndex;
145        }
146
147        private int[] getMaxSequenceIndex() {
148                if (maxSequenceIndex == null) {
149                        initSeqIndexes();
150                }
151                return maxSequenceIndex;
152        }
153
154        private void initSeqIndexes() {
155                minSequenceIndex = new int[sequences.size()];
156                maxSequenceIndex = new int[sequences.size()];
157                int currentMaxIndex = 0;
158                int currentMinIndex = 1;
159                int lastLength = 0;
160                for (int i = 0; i < sequences.size(); i++) {
161                        currentMinIndex += lastLength;
162                        currentMaxIndex += sequences.get(i).getLength();
163                        minSequenceIndex[i] = currentMinIndex;
164                        maxSequenceIndex[i] = currentMaxIndex;
165                        lastLength = sequences.get(i).getLength();
166                }
167        }
168
169        /**
170         * Scans through the sequence index arrays in linear time. Not the best
171         * performance but easier to code
172         */
173        private int linearSearch(int position) {
174                int[] minSeqIndex = getMinSequenceIndex();
175                int[] maxSeqIndex = getMaxSequenceIndex();
176                int length = minSeqIndex.length;
177                for (int i = 0; i < length; i++) {
178                        if (position >= minSeqIndex[i] && position <= maxSeqIndex[i]) {
179                                return i;
180                        }
181                }
182                throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
183        }
184
185        /**
186         * Scans through the sequence index arrays using binary search
187         */
188        private int binarySearch(int position) {
189                int[] minSeqIndex = getMinSequenceIndex();
190                int[] maxSeqIndex = getMaxSequenceIndex();
191
192                int low = 0;
193                int high = minSeqIndex.length - 1;
194                while (low <= high) {
195                        //Go to the mid point in the array
196                        int mid = (low + high) >>> 1;
197
198                        //Get the max position represented by this Sequence
199                        int midMinPosition = minSeqIndex[mid];
200                        int midMaxPosition = maxSeqIndex[mid];
201
202                        //if current position is greater than the current bounds then
203                        //increase search space
204                        if (midMinPosition < position && midMaxPosition < position) {
205                                low = mid + 1;
206                        } //if current position is less than current bounds then decrease
207                        //search space
208                        else if (midMinPosition > position && midMaxPosition > position) {
209                                high = mid - 1;
210                        } else {
211                                return mid;
212                        }
213                }
214                throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
215        }
216
217        /**
218         * Iterator implementation which attempts to move through the 2D structure
219         * attempting to skip onto the next sequence as & when it is asked to
220         */
221
222        @Override
223        public Iterator<C> iterator() {
224                final List<Sequence<C>> localSequences = sequences;
225                return new Iterator<C>() {
226
227                        private Iterator<C> currentSequenceIterator = null;
228                        private int currentPosition = 0;
229
230
231                        @Override
232                        public boolean hasNext() {
233                                //If the current iterator is null then see if the Sequences object has anything
234                                if (currentSequenceIterator == null) {
235                                        return !localSequences.isEmpty();
236                                }
237
238                                //See if we had any compounds
239                                boolean hasNext = currentSequenceIterator.hasNext();
240                                if (!hasNext) {
241                                        hasNext = currentPosition < sequences.size();
242                                }
243                                return hasNext;
244                        }
245
246
247                        @Override
248                        public C next() {
249                                if (currentSequenceIterator == null) {
250                                        if (localSequences.isEmpty()) {
251                                                throw new NoSuchElementException("No sequences to iterate over; make sure you call hasNext() before next()");
252                                        }
253                                        currentSequenceIterator = localSequences.get(currentPosition).iterator();
254                                        currentPosition++;
255                                }
256                                if (!currentSequenceIterator.hasNext()) {
257                                        currentSequenceIterator = localSequences.get(currentPosition).iterator();
258                                        currentPosition++;
259                                }
260                                return currentSequenceIterator.next();
261                        }
262
263
264                        @Override
265                        public void remove() throws UnsupportedOperationException {
266                                throw new UnsupportedOperationException("Cannot remove from this Sequence");
267                        }
268                };
269        }
270
271
272        @Override
273        public void setCompoundSet(CompoundSet<C> compoundSet) {
274                throw new UnsupportedOperationException();
275        }
276
277
278        @Override
279        public void setContents(String sequence) throws CompoundNotFoundException {
280                throw new UnsupportedOperationException();
281        }
282
283
284        @Override
285        public int countCompounds(C... compounds) {
286                return SequenceMixin.countCompounds(this, compounds);
287        }
288
289
290        @Override
291        public AccessionID getAccession() throws UnsupportedOperationException {
292                throw new UnsupportedOperationException();
293        }
294
295
296        @Override
297        public List<C> getAsList() {
298                return SequenceMixin.toList(this);
299        }
300
301
302        @Override
303        public int getIndexOf(C compound) {
304                return SequenceMixin.indexOf(this, compound);
305        }
306
307
308        @Override
309        public int getLastIndexOf(C compound) {
310                return SequenceMixin.lastIndexOf(this, compound);
311        }
312
313
314        @Override
315        public String getSequenceAsString() {
316                return SequenceMixin.toStringBuilder(this).toString();
317        }
318
319
320        @Override
321        public SequenceView<C> getSubSequence(Integer start, Integer end) {
322                return SequenceMixin.createSubSequence(this, start, end);
323        }
324
325        @Override
326        public SequenceView<C> getInverse() {
327                return SequenceMixin.inverse(this);
328        }
329}