001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.template;
025
026import org.biojava.nbio.core.sequence.template.Compound;
027import org.biojava.nbio.core.sequence.template.Sequence;
028
029/**
030 * Defines a data structure for the results of pairwise sequence alignment.
031 *
032 * @author Mark Chapman
033 * @author Paolo Pavan
034 * @param <S> each element of the alignment {@link Profile} is of type S
035 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
036 */
037public interface SequencePair<S extends Sequence<C>, C extends Compound> extends Profile<S, C> {
038
039        /**
040         * Returns the {@link Compound} in query sequence at given column index in alignment.
041         *
042         * @param alignmentIndex column index in alignment
043         * @return the query sequence element
044         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
045         */
046        C getCompoundInQueryAt(int alignmentIndex);
047
048        /**
049         * Returns the {@link Compound} in target sequence at given column index in alignment.
050         *
051         * @param alignmentIndex column index in alignment
052         * @return the target sequence element
053         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
054         */
055        C getCompoundInTargetAt(int alignmentIndex);
056
057        /**
058         * Returns the query index corresponding to a given alignment column.
059         *
060         * @param alignmentIndex column index in alignment
061         * @return index in query {@link Sequence}
062         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
063         */
064        int getIndexInQueryAt(int alignmentIndex);
065
066        /**
067         * Returns the query index corresponding to a given target index.
068         *
069         * @param targetIndex index in target {@link Sequence}
070         * @return index in query {@link Sequence}
071         * @throws IndexOutOfBoundsException if targetIndex < 1 or targetIndex > {@link #getTarget()}.getLength()
072         */
073        int getIndexInQueryForTargetAt(int targetIndex);
074
075        /**
076         * Returns the target index corresponding to a given alignment column.
077         *
078         * @param alignmentIndex column index in alignment
079         * @return index in target {@link Sequence}
080         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
081         */
082        int getIndexInTargetAt(int alignmentIndex);
083
084        /**
085         * Returns the target index corresponding to a given query index.
086         *
087         * @param queryIndex index in query {@link Sequence}
088         * @return index in target {@link Sequence}
089         * @throws IndexOutOfBoundsException if queryIndex < 1 or queryIndex > {@link #getQuery()}.getLength()
090         */
091        int getIndexInTargetForQueryAt(int queryIndex);
092
093        /**
094         * Returns the number of indices for which both the query and target sequences have an identical {@link Compound}.
095         *
096         * @return the number of identical indices
097         */
098        int getNumIdenticals();
099        
100        /**
101         * Returns the percentage of identity between the two sequences in the alignment as a fraction between 0 and 1.
102         *
103         * @param countGaps
104         *              If true, gap positions are counted as mismatches, i.e., the percentage is normalized by the alignment length.
105         *              If false, gap positions are not counted, i.e. the percentage is normalized by the number of aligned residue pairs.
106         *      See May (2004). "Percent sequence identity: the need to be explicit."
107         * @return the percentage of sequence identity as a fraction in [0,1]
108         */
109        double getPercentageOfIdentity(boolean countGaps);
110
111        /**
112         * Returns the number of indices for which both the query and target sequences have a similar {@link Compound}.
113         *
114         * @return the number of similar indices
115         */
116        int getNumSimilars();
117
118        /**
119         * Returns the first {@link AlignedSequence} of the pair.
120         *
121         * @return the first {@link AlignedSequence} of the pair
122         */
123        AlignedSequence<S, C> getQuery();
124
125        /**
126         * Returns the second {@link AlignedSequence} of the pair.
127         *
128         * @return the second {@link AlignedSequence} of the pair
129         */
130        AlignedSequence<S, C> getTarget();
131
132}