001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.views;
022
023import org.biojava.nbio.core.sequence.template.Compound;
024import org.biojava.nbio.core.sequence.template.Sequence;
025import org.biojava.nbio.core.sequence.template.SequenceView;
026
027import java.util.Iterator;
028import java.util.List;
029import java.util.NoSuchElementException;
030
031/**
032 * A sliding window view of a sequence which does not implement any
033 * interfaces like {@link Sequence} because they do not fit how this works.
034 * For each index requested we return a SequenceView or List of compounds back.
035 *
036 * If you perform a view on a Sequence whose length is not a multiple of the
037 * window the final window will be omitted i.e. if we have the sequence AGCGG
038 * and a window of 3 then you will only see AGC since GG exceeds the calculated
039 * length of this sequence.
040 *
041 * Because this does not implement a Sequence interface we do not recommend
042 * passing this class around. If you need to represent a windowed sequence
043 * as a real Sequence then translate it into a new Compound
044 *
045 * @author ayates
046 *
047 * @param <C> The type of compound we return from a window
048 */
049public class WindowedSequence<C extends Compound> implements Iterable<SequenceView<C>> {
050
051        private final Sequence<C> sequence;
052        private final int windowSize;
053
054        public WindowedSequence(Sequence<C> sequence, int windowSize) {
055                this.sequence = sequence;
056                this.windowSize = windowSize;
057        }
058
059        /**
060         * Access the current window size
061         */
062        public int getWindowSize() {
063                return windowSize;
064        }
065
066        /**
067         * Access the sequence which backs this window
068         */
069        public Sequence<C> getBackingSequence() {
070                return sequence;
071        }
072
073        /**
074         * Calculates start index according to the equation start = ( (index-1) -
075         * windowSize) +1
076         */
077        protected int toStartIndex(int index) {
078                return ((index - 1) * getWindowSize()) + 1;
079        }
080
081        /**
082         * Returns the size of the windowed sequence which is the length by the
083         * window size. Trailing Compounds are omitted.
084         */
085        public int getLength() {
086                return getBackingSequence().getLength() / getWindowSize();
087        }
088
089        /**
090         * For a given position into the windowed view this will return those
091         * compounds we can see in the window. i.e. in the sequence AGGCCT requesting
092         * index 1 returns AGG and requesting index 2 return CCT.
093         *
094         * @param index Windowed index position
095         * @return The List of compounds
096         */
097        public List<C> getCompounds(int index) {
098                return get(index).getAsList();
099        }
100
101        /**
102         * Returns the window specified at the given index in offsets i.e. asking
103         * for position 2 in a moving window sequence of size 3 will get you
104         * the window starting at position 4.
105         */
106        public SequenceView<C> get(int index) {
107                int start = toStartIndex(index);
108                int end  = index + (getWindowSize() - 1);
109                return getBackingSequence().getSubSequence(start, end);
110        }
111
112        /**
113         * Returns an iterator which will return the windows in a sequence in
114         * sequential order.
115         */
116        @Override
117        public Iterator<SequenceView<C>> iterator() {
118                return new WindowedSequenceIterator<C>(this);
119        }
120
121        /**
122         * Iterator of all List of compounds available in a windowed sequence.
123         */
124        private static class WindowedSequenceIterator<C extends Compound> implements Iterator<SequenceView<C>> {
125
126                private final int end;
127                private final int window;
128                private final int offset;
129                private int currentIndex = 1;
130                private final Sequence<C> seq;
131
132                public WindowedSequenceIterator(WindowedSequence<C> sequence) {
133                        this.window = sequence.getWindowSize();
134                        this.offset = window - 1;
135                        this.seq = sequence.getBackingSequence();
136                        this.end = seq.getLength();
137                }
138
139                @Override
140                public boolean hasNext() {
141                        return (currentIndex+offset) <= end;
142                }
143
144                @Override
145                public SequenceView<C> next() {
146            if(!hasNext()){
147                throw new NoSuchElementException();
148            }
149            SequenceView<C> v = seq.getSubSequence(currentIndex, currentIndex + offset);
150                        currentIndex = currentIndex + window;
151                        return v;
152                }
153
154                @Override
155                public void remove() {
156                        throw new UnsupportedOperationException("Cannot remove from a Windowed view");
157                }
158        }
159}