001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.template;
025
026import org.biojava.nbio.core.sequence.location.template.Location;
027import org.biojava.nbio.core.sequence.location.template.Point;
028import org.biojava.nbio.core.sequence.template.Compound;
029import org.biojava.nbio.core.sequence.template.Sequence;
030
031/**
032 * Defines a data structure for a {@link Sequence} within an alignment.
033 *
034 * @author Mark Chapman
035 * @author Paolo Pavan
036 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C
037 * @param <S> the sequence type
038 */
039public interface AlignedSequence<S extends Sequence<C>, C extends Compound> extends Sequence<C> {
040
041        /**
042         * Defines an alignment step in order to pass alignment information from an Aligner to a constructor.
043         */
044        enum Step { COMPOUND, GAP }
045
046        /**
047         * Nullifies cached arrays/objects.
048         */
049        void clearCache();
050
051        /** Returns the alignment.
052         *
053         * @return the alignment
054         */
055        int[] getAlignmentFromSequence();
056
057        /** Returns the sequence positions at each alignment index
058         *
059         * @return array of the sequence positions
060         */
061        int[] getSequenceFromAlignment();
062
063        /**
064         * Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}.
065         * Both indices are 1-indexed and inclusive.
066         *
067         * @param sequenceIndex index in the original {@link Sequence}
068         * @return column index within an alignment
069         * @throws IndexOutOfBoundsException if sequenceIndex &lt; 1 or sequenceIndex >
070         *         {@link #getOriginalSequence()}.{@link #getLength()}
071         */
072        int getAlignmentIndexAt(int sequenceIndex);
073
074        /**
075         * Returns the {@link Point} within an alignment of the last element of the original {@link Sequence}.
076         *
077         * @return position within an alignment of final original {@link Sequence} element
078         */
079        Point getEnd();
080
081        /**
082         * Returns the {@link Location} of the original {@link Sequence} within an alignment.  This provides access to
083         * additional substructure beyond start and end points.
084         *
085         * @return location within an alignment
086         */
087        Location getLocationInAlignment();
088
089        /**
090         * Returns number of gaps (gap openings) in the sequence. This could be determined from the {@link Location} information or from
091         * gap {@link Compound}s, which may not necessarily result in the same number.
092         *
093         * @return number of gaps in the sequence
094         */
095        int getNumGaps();
096
097        /**
098         * Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location}
099         * information or from gap {@link Compound}s, which may not necessarily result in the same number.
100         *
101         * @return number of gap positions in the sequence
102         */
103        int getNumGapPositions();
104
105        /**
106         * Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence.
107         * This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength().
108         *
109         * @return coverage of the original sequence by the aligned sequence
110         */
111        double getCoverage();
112
113        /**
114         * Returns the original {@link Sequence} before alignment.
115         *
116         * @return the original sequence
117         */
118        S getOriginalSequence();
119
120        /**
121         * Returns the maximum number of elements contributed to a column of an alignment by this {@link Sequence}.  If
122         * this {@link Sequence} is circular, this number is >= 1.  If not, this overlap count is definitely 1.
123         *
124         * @return the most elements contributed to any alignment column
125         */
126        int getOverlapCount();
127
128        /**
129         * Returns the index in the original {@link Sequence} corresponding to the given index within an alignment.  Both
130         * indices are 1-indexed and inclusive.
131         *
132         * @param alignmentIndex column index within an alignment
133         * @return index in the original {@link Sequence}
134         * @throws IndexOutOfBoundsException if alignmentIndex &lt; 1 or alignmentIndex &gt; {@link #getLength()}
135         */
136        int getSequenceIndexAt(int alignmentIndex);
137
138        /**
139         * Returns the {@link Point} within an alignment of the first element of the original {@link Sequence}.
140         *
141         * @return position within an alignment of first original {@link Sequence} element
142         */
143        Point getStart();
144
145        /**
146         * Returns true if this {@link Sequence} wraps around from the last alignment column back to the first.  This makes
147         * overlap possible, but does not require an overlap count > 1.
148         *
149         * @return true for circular alignment elements
150         */
151        boolean isCircular();
152
153        /**
154         * Returns true if this {@link Sequence} has a gap at a particular alignment column.
155         *
156         * @param alignmentIndex column index within an alignment
157         * @return true if this column has a gap
158         * @throws IndexOutOfBoundsException if alignmentIndex &lt; 1 or alignmentIndex &gt; {@link #getLength()}
159         */
160        boolean isGap(int alignmentIndex);
161
162}