001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 7, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.core.alignment.template; 025 026import org.biojava.nbio.core.sequence.location.template.Location; 027import org.biojava.nbio.core.sequence.template.Compound; 028import org.biojava.nbio.core.sequence.template.CompoundSet; 029import org.biojava.nbio.core.sequence.template.Sequence; 030 031import java.util.List; 032 033/** 034 * Defines a data structure for the results of sequence alignment. Every {@link List} returned is unmodifiable unless 035 * the class implements the subinterface {@link MutableProfile}. 036 * 037 * @author Mark Chapman 038 * @author Paolo Pavan 039 * @param <S> each element of the alignment {@link Profile} is of type S 040 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C 041 */ 042public interface Profile<S extends Sequence<C>, C extends Compound> extends Iterable<AlignedSequence<S, C>> { 043 044 /** 045 * List of output formats. 046 */ 047 enum StringFormat { 048 ALN, 049 CLUSTALW, 050 FASTA, 051 GCG, 052 MSF, 053 PDBWEB 054 } 055 056 /** 057 * Returns {@link AlignedSequence} at given index. 058 * 059 * @param listIndex index of sequence in profile 060 * @return desired sequence 061 * @throws IndexOutOfBoundsException if listIndex < 1 or listIndex > number of sequences 062 */ 063 AlignedSequence<S, C> getAlignedSequence(int listIndex); 064 065 /** 066 * Searches for the given {@link Sequence} within this alignment profile. Returns the corresponding 067 * {@link AlignedSequence}. 068 * 069 * @param sequence an original {@link Sequence} 070 * @return the corresponding {@link AlignedSequence} 071 */ 072 AlignedSequence<S, C> getAlignedSequence(S sequence); 073 074 /** 075 * Returns a {@link List} containing the individual {@link AlignedSequence}s of this alignment. 076 * 077 * @return list of aligned sequences 078 */ 079 List<AlignedSequence<S, C>> getAlignedSequences(); 080 081 /** 082 * Returns a {@link List} containing some of the individual {@link AlignedSequence}s of this alignment. 083 * 084 * @param listIndices indices of sequences in profile 085 * @return list of aligned sequences 086 */ 087 List<AlignedSequence<S, C>> getAlignedSequences(int... listIndices); 088 089 /** 090 * Returns a {@link List} containing some of the individual {@link AlignedSequence}s of this alignment. 091 * 092 * @param sequences original {@link Sequence}s 093 * @return list of aligned sequences 094 */ 095 List<AlignedSequence<S, C>> getAlignedSequences(S... sequences); 096 097 /** 098 * Returns the {@link Compound} at row of given sequence and column of alignment index. If the given sequence has 099 * overlap, this will return the {@link Compound} from the top row of the sequence. 100 * 101 * @param listIndex index of sequence in profile 102 * @param alignmentIndex column index within an alignment 103 * @return the sequence element 104 * @throws IndexOutOfBoundsException if listIndex < 1, listIndex > number of sequences, alignmentIndex < 1, or 105 * alignmentIndex > {@link #getLength()} 106 */ 107 C getCompoundAt(int listIndex, int alignmentIndex); 108 109 /** 110 * Returns the {@link Compound} at row of given sequence and column of alignment index. If the given sequence has 111 * overlap, this will return the {@link Compound} from the top row of the sequence. 112 * 113 * @param sequence either an {@link AlignedSequence} or an original {@link Sequence} 114 * @param alignmentIndex column index within an alignment 115 * @return the sequence element 116 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 117 */ 118 C getCompoundAt(S sequence, int alignmentIndex); 119 120 /** 121 * Returns the number of each {@link Compound} in the given column for all compounds in {@link CompoundSet}. 122 * 123 * @param alignmentIndex column index within an alignment 124 * @return list of counts 125 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 126 */ 127 int[] getCompoundCountsAt(int alignmentIndex); 128 129 /** 130 * Returns the number of each {@link Compound} in the given column only for compounds in the given list. 131 * 132 * @param alignmentIndex column index within an alignment 133 * @param compounds list of compounds to count 134 * @return corresponding list of counts 135 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 136 */ 137 int[] getCompoundCountsAt(int alignmentIndex, List<C> compounds); 138 139 /** 140 * Returns the {@link Compound} elements of the original {@link Sequence}s at the given column. 141 * 142 * @param alignmentIndex column index within an alignment 143 * @return the sequence elements 144 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 145 */ 146 List<C> getCompoundsAt(int alignmentIndex); 147 148 /** 149 * Returns {@link CompoundSet} of all {@link AlignedSequence}s 150 * 151 * @return set of {@link Compound}s in contained sequences 152 */ 153 CompoundSet<C> getCompoundSet(); 154 155 /** 156 * Returns the fraction of each {@link Compound} in the given column for all compounds in {@link CompoundSet}. 157 * 158 * @param alignmentIndex column index within an alignment 159 * @return list of fractional weights 160 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 161 */ 162 float[] getCompoundWeightsAt(int alignmentIndex); 163 164 /** 165 * Returns the fraction of each {@link Compound} in the given column only for compounds in the given list. 166 * 167 * @param alignmentIndex column index within an alignment 168 * @param compounds list of compounds to count 169 * @return corresponding list of fractional weights 170 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 171 */ 172 float[] getCompoundWeightsAt(int alignmentIndex, List<C> compounds); 173 174 /** 175 * Returns the indices in the original {@link Sequence}s corresponding to the given column. All indices are 176 * 1-indexed and inclusive. 177 * 178 * @param alignmentIndex column index within an alignment 179 * @return the sequence indices 180 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 181 */ 182 int[] getIndicesAt(int alignmentIndex); 183 184 /** 185 * Searches for the given {@link Compound} within this alignment profile. Returns column index nearest to the 186 * start of the alignment profile, or -1 if not found. 187 * 188 * @param compound search element 189 * @return index of column containing search element nearest to the start of the alignment profile 190 */ 191 int getIndexOf(C compound); 192 193 /** 194 * Searches for the given {@link Compound} within this alignment profile. Returns column index nearest to the end 195 * of the alignment profile, or -1 if not found. 196 * 197 * @param compound search element 198 * @return index of column containing search element nearest to the end of the alignment profile 199 */ 200 int getLastIndexOf(C compound); 201 202 /** 203 * Returns the number of columns in the alignment profile. 204 * 205 * @return the number of columns 206 */ 207 int getLength(); 208 209 /** 210 * Returns a {@link List} containing the original {@link Sequence}s used for alignment. 211 * 212 * @return list of original sequences 213 */ 214 List<S> getOriginalSequences(); 215 216 /** 217 * Returns the number of rows in this profile. If any {@link AlignedSequence}s are circular and overlap within the 218 * alignment, the returned size will be greater than the number of sequences, otherwise the numbers will be equal. 219 * 220 * @return number of rows 221 */ 222 int getSize(); 223 224 /** 225 * Returns a {@link ProfileView} windowed to contain only the given {@link Location}. This only includes the 226 * {@link AlignedSequence}s which overlap the location. 227 * 228 * @param location portion of profile to view 229 * @return a windowed view of the profile 230 * @throws IllegalArgumentException if location is invalid 231 */ 232 ProfileView<S, C> getSubProfile(Location location); 233 234 /** 235 * Returns true if any {@link AlignedSequence} has a gap at the given index. 236 * 237 * @param alignmentIndex column index within an alignment 238 * @return true if any {@link AlignedSequence} has a gap at the given index 239 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 240 */ 241 boolean hasGap(int alignmentIndex); 242 243 /** 244 * Returns true if any {@link AlignedSequence} is circular. If so, sequences may simply wrap around from the end 245 * to the start of the alignment or they may contribute multiple overlapping lines to the profile. 246 * 247 * @return true if any {@link AlignedSequence} is circular 248 */ 249 boolean isCircular(); 250 251 /** 252 * Returns a simple view of the alignment profile. This shows each sequence on a separate line (or multiple lines, 253 * if circular) and nothing more. This should result in {@link #getSize()} lines with {@link #getLength()} 254 * {@link Compound}s per line. 255 * 256 * @return a simple view of the alignment profile 257 */ 258 @Override 259 String toString(); 260 261 /** 262 * Returns a formatted view of the alignment profile. This shows the start and end indices of the profile and each 263 * sequence for each group of lines of the given width. Each line may also be labeled. 264 * 265 * @param width limit on the line length 266 * @return a formatted view of the alignment profile 267 */ 268 String toString(int width); 269 270 /** 271 * Returns a formatted view of the alignment profile. Details depend on the format given. 272 * 273 * @param format output format 274 * @return a formatted view of the alignment profile 275 */ 276 String toString(StringFormat format); 277 278}