001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.template;
025
026import org.biojava.nbio.core.sequence.location.template.Location;
027import org.biojava.nbio.core.sequence.location.template.Point;
028import org.biojava.nbio.core.sequence.template.Compound;
029import org.biojava.nbio.core.sequence.template.Sequence;
030
031/**
032 * Defines a data structure for a {@link Sequence} within an alignment.
033 *
034 * @author Mark Chapman
035 * @author Paolo Pavan
036 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C
037 */
038public interface AlignedSequence<S extends Sequence<C>, C extends Compound> extends Sequence<C> {
039
040        /**
041         * Defines an alignment step in order to pass alignment information from an {@link Aligner} to a constructor.
042         */
043        enum Step { COMPOUND, GAP }
044
045        /**
046         * Nullifies cached arrays/objects.
047         */
048        void clearCache();
049
050        /** Returns the alignment.
051         *
052         * @return the alignment
053         */
054        int[] getAlignmentFromSequence();
055
056        /** Returns the sequence positions at each alignment index
057         *
058         * @return array of the sequence positions
059         */
060        int[] getSequenceFromAlignment();
061
062        /**
063         * Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}.
064         * Both indices are 1-indexed and inclusive.
065         *
066         * @param sequenceIndex index in the original {@link Sequence}
067         * @return column index within an alignment
068         * @throws IndexOutOfBoundsException if sequenceIndex < 1 or sequenceIndex >
069         *         {@link #getOriginalSequence()}.{@link #getLength()}
070         */
071        int getAlignmentIndexAt(int sequenceIndex);
072
073        /**
074         * Returns the {@link Point} within an alignment of the last element of the original {@link Sequence}.
075         *
076         * @return position within an alignment of final original {@link Sequence} element
077         */
078        Point getEnd();
079
080        /**
081         * Returns the {@link Location} of the original {@link Sequence} within an alignment.  This provides access to
082         * additional substructure beyond start and end points.
083         *
084         * @return location within an alignment
085         */
086        Location getLocationInAlignment();
087
088        /**
089         * Returns number of gaps (gap openings) in the sequence. This could be determined from the {@link Location} information or from
090         * gap {@link Compound}s, which may not necessarily result in the same number.
091         *
092         * @return number of gaps in the sequence
093         */
094        int getNumGaps();
095        
096        /**
097         * Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location} 
098         * information or from gap {@link Compound}s, which may not necessarily result in the same number.
099         *
100         * @return number of gap positions in the sequence
101         */
102        int getNumGapPositions();
103        
104        /**
105         * Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence. 
106         * This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength().
107         *
108         * @return coverage of the original sequence by the aligned sequence
109         */
110        double getCoverage();
111
112        /**
113         * Returns the original {@link Sequence} before alignment.
114         *
115         * @return the original sequence
116         */
117        S getOriginalSequence();
118
119        /**
120         * Returns the maximum number of elements contributed to a column of an alignment by this {@link Sequence}.  If
121         * this {@link Sequence} is circular, this number is >= 1.  If not, this overlap count is definitely 1.
122         *
123         * @return the most elements contributed to any alignment column
124         */
125        int getOverlapCount();
126
127        /**
128         * Returns the index in the original {@link Sequence} corresponding to the given index within an alignment.  Both
129         * indices are 1-indexed and inclusive.
130         *
131         * @param alignmentIndex column index within an alignment
132         * @return index in the original {@link Sequence}
133         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
134         */
135        int getSequenceIndexAt(int alignmentIndex);
136
137        /**
138         * Returns the {@link Point} within an alignment of the first element of the original {@link Sequence}.
139         *
140         * @return position within an alignment of first original {@link Sequence} element
141         */
142        Point getStart();
143
144        /**
145         * Returns true if this {@link Sequence} wraps around from the last alignment column back to the first.  This makes
146         * overlap possible, but does not require an overlap count > 1.
147         *
148         * @return true for circular alignment elements
149         */
150        boolean isCircular();
151
152        /**
153         * Returns true if this {@link Sequence} has a gap at a particular alignment column.
154         *
155         * @param alignmentIndex column index within an alignment
156         * @return true if this column has a gap
157         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
158         */
159        boolean isGap(int alignmentIndex);
160
161}