001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.template;
025
026import org.biojava.nbio.core.sequence.location.template.Location;
027import org.biojava.nbio.core.sequence.template.Compound;
028import org.biojava.nbio.core.sequence.template.CompoundSet;
029import org.biojava.nbio.core.sequence.template.Sequence;
030
031import java.util.List;
032
033/**
034 * Defines a data structure for the results of sequence alignment.  Every {@link List} returned is unmodifiable unless
035 * the class implements the subinterface {@link MutableProfile}.
036 *
037 * @author Mark Chapman
038 * @author Paolo Pavan
039 * @param <S> each element of the alignment {@link Profile} is of type S
040 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
041 */
042public interface Profile<S extends Sequence<C>, C extends Compound> extends Iterable<AlignedSequence<S, C>> {
043
044        /**
045         * List of output formats.
046         */
047        enum StringFormat {
048                ALN,
049                CLUSTALW,
050                FASTA,
051                GCG,
052                MSF,
053                PDBWEB
054        }
055
056        /**
057         * Returns {@link AlignedSequence} at given index.
058         *
059         * @param listIndex index of sequence in profile
060         * @return desired sequence
061         * @throws IndexOutOfBoundsException if listIndex < 1 or listIndex > number of sequences
062         */
063        AlignedSequence<S, C> getAlignedSequence(int listIndex);
064
065        /**
066         * Searches for the given {@link Sequence} within this alignment profile.  Returns the corresponding
067         * {@link AlignedSequence}.
068         *
069         * @param sequence an original {@link Sequence}
070         * @return the corresponding {@link AlignedSequence}
071         */
072        AlignedSequence<S, C> getAlignedSequence(S sequence);
073
074        /**
075         * Returns a {@link List} containing the individual {@link AlignedSequence}s of this alignment.
076         *
077         * @return list of aligned sequences
078         */
079        List<AlignedSequence<S, C>> getAlignedSequences();
080
081        /**
082         * Returns a {@link List} containing some of the individual {@link AlignedSequence}s of this alignment.
083         *
084         * @param listIndices indices of sequences in profile
085         * @return list of aligned sequences
086         */
087        List<AlignedSequence<S, C>> getAlignedSequences(int... listIndices);
088
089        /**
090         * Returns a {@link List} containing some of the individual {@link AlignedSequence}s of this alignment.
091         *
092         * @param sequences original {@link Sequence}s
093         * @return list of aligned sequences
094         */
095        List<AlignedSequence<S, C>> getAlignedSequences(S... sequences);
096
097        /**
098         * Returns the {@link Compound} at row of given sequence and column of alignment index.  If the given sequence has
099         * overlap, this will return the {@link Compound} from the top row of the sequence.
100         *
101         * @param listIndex index of sequence in profile
102         * @param alignmentIndex column index within an alignment
103         * @return the sequence element
104         * @throws IndexOutOfBoundsException if listIndex < 1, listIndex > number of sequences, alignmentIndex < 1, or
105         *     alignmentIndex > {@link #getLength()}
106         */
107        C getCompoundAt(int listIndex, int alignmentIndex);
108
109        /**
110         * Returns the {@link Compound} at row of given sequence and column of alignment index.  If the given sequence has
111         * overlap, this will return the {@link Compound} from the top row of the sequence.
112         *
113         * @param sequence either an {@link AlignedSequence} or an original {@link Sequence}
114         * @param alignmentIndex column index within an alignment
115         * @return the sequence element
116         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
117         */
118        C getCompoundAt(S sequence, int alignmentIndex);
119
120        /**
121         * Returns the number of each {@link Compound} in the given column for all compounds in {@link CompoundSet}.
122         *
123         * @param alignmentIndex column index within an alignment
124         * @return list of counts
125         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
126         */
127        int[] getCompoundCountsAt(int alignmentIndex);
128
129        /**
130         * Returns the number of each {@link Compound} in the given column only for compounds in the given list.
131         *
132         * @param alignmentIndex column index within an alignment
133         * @param compounds list of compounds to count
134         * @return corresponding list of counts
135         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
136         */
137        int[] getCompoundCountsAt(int alignmentIndex, List<C> compounds);
138
139        /**
140         * Returns the {@link Compound} elements of the original {@link Sequence}s at the given column.
141         *
142         * @param alignmentIndex column index within an alignment
143         * @return the sequence elements
144         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
145         */
146        List<C> getCompoundsAt(int alignmentIndex);
147
148        /**
149         * Returns {@link CompoundSet} of all {@link AlignedSequence}s
150         *
151         * @return set of {@link Compound}s in contained sequences
152         */
153        CompoundSet<C> getCompoundSet();
154
155        /**
156         * Returns the fraction of each {@link Compound} in the given column for all compounds in {@link CompoundSet}.
157         *
158         * @param alignmentIndex column index within an alignment
159         * @return list of fractional weights
160         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
161         */
162        float[] getCompoundWeightsAt(int alignmentIndex);
163
164        /**
165         * Returns the fraction of each {@link Compound} in the given column only for compounds in the given list.
166         *
167         * @param alignmentIndex column index within an alignment
168         * @param compounds list of compounds to count
169         * @return corresponding list of fractional weights
170         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
171         */
172        float[] getCompoundWeightsAt(int alignmentIndex, List<C> compounds);
173
174        /**
175         * Returns the indices in the original {@link Sequence}s corresponding to the given column.  All indices are
176         * 1-indexed and inclusive.
177         *
178         * @param alignmentIndex column index within an alignment
179         * @return the sequence indices
180         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
181         */
182        int[] getIndicesAt(int alignmentIndex);
183
184        /**
185         * Searches for the given {@link Compound} within this alignment profile.  Returns column index nearest to the
186         * start of the alignment profile, or -1 if not found.
187         *
188         * @param compound search element
189         * @return index of column containing search element nearest to the start of the alignment profile
190         */
191        int getIndexOf(C compound);
192
193        /**
194         * Searches for the given {@link Compound} within this alignment profile.  Returns column index nearest to the end
195         * of the alignment profile, or -1 if not found.
196         *
197         * @param compound search element
198         * @return index of column containing search element nearest to the end of the alignment profile
199         */
200        int getLastIndexOf(C compound);
201
202        /**
203         * Returns the number of columns in the alignment profile.
204         *
205         * @return the number of columns
206         */
207        int getLength();
208
209        /**
210         * Returns a {@link List} containing the original {@link Sequence}s used for alignment.
211         *
212         * @return list of original sequences
213         */
214        List<S> getOriginalSequences();
215
216        /**
217         * Returns the number of rows in this profile.  If any {@link AlignedSequence}s are circular and overlap within the
218         * alignment, the returned size will be greater than the number of sequences, otherwise the numbers will be equal.
219         *
220         * @return number of rows
221         */
222        int getSize();
223
224        /**
225         * Returns a {@link ProfileView} windowed to contain only the given {@link Location}.  This only includes the
226         * {@link AlignedSequence}s which overlap the location.
227         *
228         * @param location portion of profile to view
229         * @return a windowed view of the profile
230         * @throws IllegalArgumentException if location is invalid
231         */
232        ProfileView<S, C> getSubProfile(Location location);
233
234        /**
235         * Returns true if any {@link AlignedSequence} has a gap at the given index.
236         *
237         * @param alignmentIndex column index within an alignment
238         * @return true if any {@link AlignedSequence} has a gap at the given index
239         * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
240         */
241        boolean hasGap(int alignmentIndex);
242
243        /**
244         * Returns true if any {@link AlignedSequence} is circular.  If so, sequences may simply wrap around from the end
245         * to the start of the alignment or they may contribute multiple overlapping lines to the profile.
246         *
247         * @return true if any {@link AlignedSequence} is circular
248         */
249        boolean isCircular();
250
251        /**
252         * Returns a simple view of the alignment profile.  This shows each sequence on a separate line (or multiple lines,
253         * if circular) and nothing more.  This should result in {@link #getSize()} lines with {@link #getLength()}
254         * {@link Compound}s per line.
255         *
256         * @return a simple view of the alignment profile
257         */
258        @Override
259        String toString();
260
261        /**
262         * Returns a formatted view of the alignment profile.  This shows the start and end indices of the profile and each
263         * sequence for each group of lines of the given width.  Each line may also be labeled.
264         *
265         * @param width limit on the line length
266         * @return a formatted view of the alignment profile
267         */
268        String toString(int width);
269
270        /**
271         * Returns a formatted view of the alignment profile.  Details depend on the format given.
272         *
273         * @param format output format
274         * @return a formatted view of the alignment profile
275         */
276        String toString(StringFormat format);
277
278}