001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.template;
025
026import org.biojava.nbio.core.sequence.location.template.Location;
027import org.biojava.nbio.core.sequence.location.template.Point;
028import org.biojava.nbio.core.sequence.template.Compound;
029import org.biojava.nbio.core.sequence.template.Sequence;
030
031/**
032 * Defines a data structure for a {@link Sequence} within an alignment.
033 *
034 * @author Mark Chapman
035 * @author Paolo Pavan
036 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C
037 */
038public interface AlignedSequence<S extends Sequence<C>, C extends Compound> extends Sequence<C> {
039
040        /**
041         * Defines an alignment step in order to pass alignment information from an {@link Aligner} to a constructor.
042         */
043        enum Step { COMPOUND, GAP }
044
045        /**
046         * Nullifies cached arrays/objects.
047         */
048        void clearCache();
049
050        /** Returns the alignment.
051         *
052         * @return the alignment
053         */
054        int[] getAlignmentFromSequence();
055
056        /** Returns the sequence positions at each alignment index
057         *
058         * @return array of the sequence positions
059         */
060        int[] getSequenceFromAlignment();
061
062        /**
063         * Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}.
064         * Both indices are 1-indexed and inclusive.
065         *
066         * @param sequenceIndex index in the original {@link Sequence}
067         * @return column index within an alignment
068         * @throws IndexOutOfBoundsException if sequenceIndex < 1 or sequenceIndex >
069         *         {@link #getOriginalSequence()}.{@link #getLength()}
070         */
071        int getAlignmentIndexAt(int sequenceIndex);
072
073        /**
074         * Returns the {@link Point} within an alignment of the last element of the original {@link Sequence}.
075         *
076         * @return position within an alignment of final original {@link Sequence} element
077         */
078        Point getEnd();
079
080        /**
081         * Returns the {@link Location} of the original {@link Sequence} within an alignment.  This provides access to
082         * additional substructure beyond start and end points.
083         *
084         * @return location within an alignment
085         */
086        Location getLocationInAlignment();
087
088        /**
089         * Returns number of gaps in the sequence.  This could be determined from the {@link Location} information or from
090         * gap {@link Compound}s, which may not necessarily result in the same number.
091         *
092         * @return number of gaps in the sequence
093         */
094        int getNumGaps();
095
096        /**
097         * Returns the original {@link Sequence} before alignment.
098         *
099         * @return the original sequence
100         */
101        S getOriginalSequence();
102
103        /**
104         * Returns the maximum number of elements contributed to a column of an alignment by this {@link Sequence}.  If
105         * this {@link Sequence} is circular, this number is >= 1.  If not, this overlap count is definitely 1.
106         *
107         * @return the most elements contributed to any alignment column
108         */
109        int getOverlapCount();
110
111        /**
112         * Returns the index in the original {@link Sequence} corresponding to the given index within an alignment.  Both
113         * indices are 1-indexed and inclusive.
114         *
115         * @param alignmentIndex column index within an alignment
116         * @return index in the original {@link Sequence}
117         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
118         */
119        int getSequenceIndexAt(int alignmentIndex);
120
121        /**
122         * Returns the {@link Point} within an alignment of the first element of the original {@link Sequence}.
123         *
124         * @return position within an alignment of first original {@link Sequence} element
125         */
126        Point getStart();
127
128        /**
129         * Returns true if this {@link Sequence} wraps around from the last alignment column back to the first.  This makes
130         * overlap possible, but does not require an overlap count > 1.
131         *
132         * @return true for circular alignment elements
133         */
134        boolean isCircular();
135
136        /**
137         * Returns true if this {@link Sequence} has a gap at a particular alignment column.
138         *
139         * @param alignmentIndex column index within an alignment
140         * @return true if this column has a gap
141         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
142         */
143        boolean isGap(int alignmentIndex);
144
145        /**
146         * Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location}
147         * information or from gap {@link Compound}s, which may not necessarily result in the same number.
148         *
149         * @return number of gap positions in the sequence
150         */
151        int getNumGapPositions();
152
153        /**
154         * Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence.
155         * This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength().
156         *
157         * @return coverage of the original sequence by the aligned sequence
158         */
159        double getCoverage();
160
161}