001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.storage;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.AccessionID;
025import org.biojava.nbio.core.sequence.template.*;
026import org.biojava.nbio.core.util.Equals;
027
028import java.util.*;
029
030/**
031 * This reader actually proxies onto multiple types of sequence in order
032 * to allow a number of sequence objects to act as if they are one sequence.
033 * The code takes in any number of sequences, records the minimum and maximum
034 * bounds each sequence covers with respect to 1 position indexing and then
035 * binary searches these when a position is requested. Because of this
036 * 0 length Sequences are excluded during construction.
037 *
038 * Performance is not as good as if you are using a flat sequence however the
039 * speed of lookup is more than adaquate for most situations. Using the iterator
040 * gives the best performance as this does not rely on the binary search
041 * mechanism instead iterating through each sequence in turn.
042 *
043 * @author ayates
044 * @param <C> Tyoe of compound to hold
045 */
046public class JoiningSequenceReader<C extends Compound> implements ProxySequenceReader<C> {
047
048        /**
049         * Internal implementation flag and controls how we look for the right
050         * sub-sequence
051         */
052        private static final boolean BINARY_SEARCH = true;
053        private final List<Sequence<C>> sequences;
054        private final CompoundSet<C> compoundSet;
055        private int[] maxSequenceIndex;
056        private int[] minSequenceIndex;
057
058        /**
059         * Allows creation of the store from Vargs Sequence<C> objects. CompoundSet
060         * defaults to the first element of the array (assuming there are elements
061         * available during construction otherwise we will throw an illegal
062         * state exception).
063         */
064        public JoiningSequenceReader(Sequence<C>... sequences) {
065                this(Arrays.asList(sequences));
066        }
067
068        /**
069         * Allows creation of the store from List<Sequence<C>>. CompoundSet
070         * defaults to the first element of the List (assuming there are elements
071         * available during construction otherwise we will throw an illegal
072         * state exception).
073         */
074        public JoiningSequenceReader(List<Sequence<C>> sequences) {
075                this.sequences = grepSequences(sequences);
076                this.compoundSet = grepCompoundSet();
077        }
078
079        public JoiningSequenceReader(CompoundSet<C> compoundSet, Sequence<C>... sequences) {
080                this(compoundSet, Arrays.asList(sequences));
081        }
082
083        public JoiningSequenceReader(CompoundSet<C> compoundSet, List<Sequence<C>> sequences) {
084                this.sequences = grepSequences(sequences);
085                this.compoundSet = compoundSet;
086        }
087
088        private List<Sequence<C>> grepSequences(List<Sequence<C>> sequences) {
089                List<Sequence<C>> seqs = new ArrayList<Sequence<C>>();
090                for (Sequence<C> s : sequences) {
091                        if (s.getLength() != 0) {
092                                seqs.add(s);
093                        }
094                }
095                return seqs;
096        }
097
098        private CompoundSet<C> grepCompoundSet() {
099                if (sequences.isEmpty()) {
100                        throw new IllegalStateException("Cannot get a CompoundSet because we have no sequences. Set during construction");
101                }
102                return sequences.get(0).getCompoundSet();
103        }
104
105
106        @Override
107        public C getCompoundAt(int position) {
108                int sequenceIndex = getSequenceIndex(position);
109                Sequence<C> sequence = sequences.get(sequenceIndex);
110                int indexInSequence = (position - getMinSequenceIndex()[sequenceIndex]) + 1;
111                return sequence.getCompoundAt(indexInSequence);
112        }
113
114
115        @Override
116        public CompoundSet<C> getCompoundSet() {
117                return compoundSet;
118        }
119
120
121        @Override
122        public int getLength() {
123                int[] maxSeqIndex = getMaxSequenceIndex();
124                if (maxSeqIndex.length == 0) {
125                        return 0;
126                }
127                return maxSeqIndex[maxSeqIndex.length - 1];
128        }
129
130        /**
131         * Returns which Sequence holds the position queried for
132         */
133        private int getSequenceIndex(int position) {
134                if (BINARY_SEARCH) {
135                        return binarySearch(position);
136                } else {
137                        return linearSearch(position);
138                }
139        }
140
141        private int[] getMinSequenceIndex() {
142                if (minSequenceIndex == null) {
143                        initSeqIndexes();
144                }
145                return minSequenceIndex;
146        }
147
148        private int[] getMaxSequenceIndex() {
149                if (maxSequenceIndex == null) {
150                        initSeqIndexes();
151                }
152                return maxSequenceIndex;
153        }
154
155        private void initSeqIndexes() {
156                minSequenceIndex = new int[sequences.size()];
157                maxSequenceIndex = new int[sequences.size()];
158                int currentMaxIndex = 0;
159                int currentMinIndex = 1;
160                int lastLength = 0;
161                for (int i = 0; i < sequences.size(); i++) {
162                        currentMinIndex += lastLength;
163                        currentMaxIndex += sequences.get(i).getLength();
164                        minSequenceIndex[i] = currentMinIndex;
165                        maxSequenceIndex[i] = currentMaxIndex;
166                        lastLength = sequences.get(i).getLength();
167                }
168        }
169
170        /**
171         * Scans through the sequence index arrays in linear time. Not the best
172         * performance but easier to code
173         */
174        private int linearSearch(int position) {
175                int[] minSeqIndex = getMinSequenceIndex();
176                int[] maxSeqIndex = getMaxSequenceIndex();
177                int length = minSeqIndex.length;
178                for (int i = 0; i < length; i++) {
179                        if (position >= minSeqIndex[i] && position <= maxSeqIndex[i]) {
180                                return i;
181                        }
182                }
183                throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
184        }
185
186        /**
187         * Scans through the sequence index arrays using binary search
188         */
189        private int binarySearch(int position) {
190                int[] minSeqIndex = getMinSequenceIndex();
191                int[] maxSeqIndex = getMaxSequenceIndex();
192
193                int low = 0;
194                int high = minSeqIndex.length - 1;
195                while (low <= high) {
196                        //Go to the mid point in the array
197                        int mid = (low + high) >>> 1;
198
199                        //Get the max position represented by this Sequence
200                        int midMinPosition = minSeqIndex[mid];
201                        int midMaxPosition = maxSeqIndex[mid];
202
203                        //if current position is greater than the current bounds then
204                        //increase search space
205                        if (midMinPosition < position && midMaxPosition < position) {
206                                low = mid + 1;
207                        } //if current position is less than current bounds then decrease
208                        //search space
209                        else if (midMinPosition > position && midMaxPosition > position) {
210                                high = mid - 1;
211                        } else {
212                                return mid;
213                        }
214                }
215                throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
216        }
217
218        /**
219         * Iterator implementation which attempts to move through the 2D structure
220         * attempting to skip onto the next sequence as & when it is asked to
221         */
222
223        @Override
224        public Iterator<C> iterator() {
225                final List<Sequence<C>> localSequences = sequences;
226                return new Iterator<C>() {
227
228                        private Iterator<C> currentSequenceIterator = null;
229                        private int currentPosition = 0;
230
231
232                        @Override
233                        public boolean hasNext() {
234                                //If the current iterator is null then see if the Sequences object has anything
235                                if (currentSequenceIterator == null) {
236                                        return !localSequences.isEmpty();
237                                }
238
239                                //See if we had any compounds
240                                boolean hasNext = currentSequenceIterator.hasNext();
241                                if (!hasNext) {
242                                        hasNext = currentPosition < sequences.size();
243                                }
244                                return hasNext;
245                        }
246
247
248                        @Override
249                        public C next() {
250                                if (currentSequenceIterator == null) {
251                                        if (localSequences.isEmpty()) {
252                                                throw new NoSuchElementException("No sequences to iterate over; make sure you call hasNext() before next()");
253                                        }
254                                        currentSequenceIterator = localSequences.get(currentPosition).iterator();
255                                        currentPosition++;
256                                }
257                                if (!currentSequenceIterator.hasNext()) {
258                                        currentSequenceIterator = localSequences.get(currentPosition).iterator();
259                                        currentPosition++;
260                                }
261                                return currentSequenceIterator.next();
262                        }
263
264
265                        @Override
266                        public void remove() throws UnsupportedOperationException {
267                                throw new UnsupportedOperationException("Cannot remove from this Sequence");
268                        }
269                };
270        }
271
272
273        @Override
274        public void setCompoundSet(CompoundSet<C> compoundSet) {
275                throw new UnsupportedOperationException();
276        }
277
278
279        @Override
280        public void setContents(String sequence) throws CompoundNotFoundException {
281                throw new UnsupportedOperationException();
282        }
283
284
285        @Override
286        public int countCompounds(C... compounds) {
287                return SequenceMixin.countCompounds(this, compounds);
288        }
289
290
291        @Override
292        public AccessionID getAccession() throws UnsupportedOperationException {
293                throw new UnsupportedOperationException();
294        }
295
296
297        @Override
298        public List<C> getAsList() {
299                return SequenceMixin.toList(this);
300        }
301
302        @Override
303        public boolean equals(Object o) {
304
305                if(! Equals.classEqual(this, o)) {
306                        return false;
307                }
308
309                Sequence<C> other = (Sequence<C>)o;
310                if ( other.getCompoundSet() != getCompoundSet())
311                        return false;
312
313                List<C> rawCompounds = getAsList();
314                List<C> otherCompounds = other.getAsList();
315
316                if ( rawCompounds.size() != otherCompounds.size())
317                        return false;
318
319                for (int i = 0 ; i < rawCompounds.size() ; i++){
320                        Compound myCompound = rawCompounds.get(i);
321                        Compound otherCompound = otherCompounds.get(i);
322                        if ( ! myCompound.equalsIgnoreCase(otherCompound))
323                                return false;
324                }
325                return true;
326        }
327
328        @Override
329        public int hashCode(){
330                String s = getSequenceAsString();
331                return s.hashCode();
332        }
333
334        @Override
335        public int getIndexOf(C compound) {
336                return SequenceMixin.indexOf(this, compound);
337        }
338
339
340        @Override
341        public int getLastIndexOf(C compound) {
342                return SequenceMixin.lastIndexOf(this, compound);
343        }
344
345
346        @Override
347        public String getSequenceAsString() {
348                return SequenceMixin.toStringBuilder(this).toString();
349        }
350
351
352        @Override
353        public SequenceView<C> getSubSequence(Integer start, Integer end) {
354                return SequenceMixin.createSubSequence(this, start, end);
355        }
356
357        @Override
358        public SequenceView<C> getInverse() {
359                return SequenceMixin.inverse(this);
360        }
361}