001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 7, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.core.alignment.template; 025 026import org.biojava.nbio.core.sequence.location.template.Location; 027import org.biojava.nbio.core.sequence.location.template.Point; 028import org.biojava.nbio.core.sequence.template.Compound; 029import org.biojava.nbio.core.sequence.template.Sequence; 030 031/** 032 * Defines a data structure for a {@link Sequence} within an alignment. 033 * 034 * @author Mark Chapman 035 * @author Paolo Pavan 036 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C 037 * @param <S> the sequence type 038 */ 039public interface AlignedSequence<S extends Sequence<C>, C extends Compound> extends Sequence<C> { 040 041 /** 042 * Defines an alignment step in order to pass alignment information from an Aligner to a constructor. 043 */ 044 enum Step { COMPOUND, GAP } 045 046 /** 047 * Nullifies cached arrays/objects. 048 */ 049 void clearCache(); 050 051 /** Returns the alignment. 052 * 053 * @return the alignment 054 */ 055 int[] getAlignmentFromSequence(); 056 057 /** Returns the sequence positions at each alignment index 058 * 059 * @return array of the sequence positions 060 */ 061 int[] getSequenceFromAlignment(); 062 063 /** 064 * Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}. 065 * Both indices are 1-indexed and inclusive. 066 * 067 * @param sequenceIndex index in the original {@link Sequence} 068 * @return column index within an alignment 069 * @throws IndexOutOfBoundsException if sequenceIndex < 1 or sequenceIndex > 070 * {@link #getOriginalSequence()}.{@link #getLength()} 071 */ 072 int getAlignmentIndexAt(int sequenceIndex); 073 074 /** 075 * Returns the {@link Point} within an alignment of the last element of the original {@link Sequence}. 076 * 077 * @return position within an alignment of final original {@link Sequence} element 078 */ 079 Point getEnd(); 080 081 /** 082 * Returns the {@link Location} of the original {@link Sequence} within an alignment. This provides access to 083 * additional substructure beyond start and end points. 084 * 085 * @return location within an alignment 086 */ 087 Location getLocationInAlignment(); 088 089 /** 090 * Returns number of gaps (gap openings) in the sequence. This could be determined from the {@link Location} information or from 091 * gap {@link Compound}s, which may not necessarily result in the same number. 092 * 093 * @return number of gaps in the sequence 094 */ 095 int getNumGaps(); 096 097 /** 098 * Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location} 099 * information or from gap {@link Compound}s, which may not necessarily result in the same number. 100 * 101 * @return number of gap positions in the sequence 102 */ 103 int getNumGapPositions(); 104 105 /** 106 * Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence. 107 * This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength(). 108 * 109 * @return coverage of the original sequence by the aligned sequence 110 */ 111 double getCoverage(); 112 113 /** 114 * Returns the original {@link Sequence} before alignment. 115 * 116 * @return the original sequence 117 */ 118 S getOriginalSequence(); 119 120 /** 121 * Returns the maximum number of elements contributed to a column of an alignment by this {@link Sequence}. If 122 * this {@link Sequence} is circular, this number is >= 1. If not, this overlap count is definitely 1. 123 * 124 * @return the most elements contributed to any alignment column 125 */ 126 int getOverlapCount(); 127 128 /** 129 * Returns the index in the original {@link Sequence} corresponding to the given index within an alignment. Both 130 * indices are 1-indexed and inclusive. 131 * 132 * @param alignmentIndex column index within an alignment 133 * @return index in the original {@link Sequence} 134 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 135 */ 136 int getSequenceIndexAt(int alignmentIndex); 137 138 /** 139 * Returns the {@link Point} within an alignment of the first element of the original {@link Sequence}. 140 * 141 * @return position within an alignment of first original {@link Sequence} element 142 */ 143 Point getStart(); 144 145 /** 146 * Returns true if this {@link Sequence} wraps around from the last alignment column back to the first. This makes 147 * overlap possible, but does not require an overlap count > 1. 148 * 149 * @return true for circular alignment elements 150 */ 151 boolean isCircular(); 152 153 /** 154 * Returns true if this {@link Sequence} has a gap at a particular alignment column. 155 * 156 * @param alignmentIndex column index within an alignment 157 * @return true if this column has a gap 158 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 159 */ 160 boolean isGap(int alignmentIndex); 161 162}