001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.views;
022
023import org.biojava.nbio.core.sequence.template.Compound;
024import org.biojava.nbio.core.sequence.template.Sequence;
025import org.biojava.nbio.core.sequence.template.SequenceView;
026
027import java.util.Iterator;
028import java.util.List;
029
030/**
031 * A sliding window view of a sequence which does not implement any
032 * interfaces like {@link Sequence} because they do not fit how this works.
033 * For each index requested we return a SequenceView or List of compounds back.
034 *
035 * If you perform a view on a Sequence whose length is not a multiple of the
036 * window the final window will be omitted i.e. if we have the sequence AGCGG
037 * and a window of 3 then you will only see AGC since GG exceeds the calculated
038 * length of this sequence.
039 *
040 * Because this does not implement a Sequence interface we do not recommend
041 * passing this class around. If you need to represent a windowed sequence
042 * as a real Sequence then translate it into a new Compound
043 *
044 * @author ayates
045 *
046 * @param <C> The type of compound we return from a window
047 */
048public class WindowedSequence<C extends Compound> implements Iterable<SequenceView<C>> {
049
050        private final Sequence<C> sequence;
051        private final int windowSize;
052
053        public WindowedSequence(Sequence<C> sequence, int windowSize) {
054                this.sequence = sequence;
055                this.windowSize = windowSize;
056        }
057
058        /**
059         * Access the current window size
060         */
061        public int getWindowSize() {
062                return windowSize;
063        }
064
065        /**
066         * Access the sequence which backs this window
067         */
068        public Sequence<C> getBackingSequence() {
069                return sequence;
070        }
071
072        /**
073         * Calculates start index according to the equation start = ( (index-1) -
074         * windowSize) +1
075         */
076        protected int toStartIndex(int index) {
077                return ((index - 1) * getWindowSize()) + 1;
078        }
079
080        /**
081         * Returns the size of the windowed sequence which is the length by the
082         * window size. Trailing Compounds are omitted.
083         */
084        public int getLength() {
085                return getBackingSequence().getLength() / getWindowSize();
086        }
087
088        /**
089         * For a given position into the windowed view this will return those
090         * compounds we can see in the window. i.e. in the sequence AGGCCT requesting
091         * index 1 returns AGG and requesting index 2 return CCT.
092         *
093         * @param index Windowed index position
094         * @return The List of compounds
095         */
096        public List<C> getCompounds(int index) {
097                return get(index).getAsList();
098        }
099
100        /**
101         * Returns the window specified at the given index in offsets i.e. asking
102         * for position 2 in a moving window sequence of size 3 will get you
103         * the window starting at position 4.
104         */
105        public SequenceView<C> get(int index) {
106                int start = toStartIndex(index);
107                int end  = index + (getWindowSize() - 1);
108                return getBackingSequence().getSubSequence(start, end);
109        }
110
111        /**
112         * Returns an iterator which will return the windows in a sequence in
113         * sequential order.
114         */
115        @Override
116        public Iterator<SequenceView<C>> iterator() {
117                return new WindowedSequenceIterator<C>(this);
118        }
119
120        /**
121         * Iterator of all List of compounds available in a windowed sequence.
122         */
123        private static class WindowedSequenceIterator<C extends Compound> implements Iterator<SequenceView<C>> {
124
125                private final int end;
126                private final int window;
127                private final int offset;
128                private int currentIndex = 1;
129                private final Sequence<C> seq;
130
131                public WindowedSequenceIterator(WindowedSequence<C> sequence) {
132                        this.window = sequence.getWindowSize();
133                        this.offset = window - 1;
134                        this.seq = sequence.getBackingSequence();
135                        this.end = seq.getLength();
136                }
137
138                @Override
139                public boolean hasNext() {
140                        return (currentIndex+offset) <= end;
141                }
142
143                @Override
144                public SequenceView<C> next() {
145                        SequenceView<C> v = seq.getSubSequence(currentIndex, currentIndex + offset);
146                        currentIndex = currentIndex + window;
147                        return v;
148                }
149
150                @Override
151                public void remove() {
152                        throw new UnsupportedOperationException("Cannot remove from a Windowed view");
153                }
154        }
155}