001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 7, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.core.alignment.template; 025 026import org.biojava.nbio.core.sequence.location.template.Location; 027import org.biojava.nbio.core.sequence.location.template.Point; 028import org.biojava.nbio.core.sequence.template.Compound; 029import org.biojava.nbio.core.sequence.template.Sequence; 030 031/** 032 * Defines a data structure for a {@link Sequence} within an alignment. 033 * 034 * @author Mark Chapman 035 * @author Paolo Pavan 036 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C 037 */ 038public interface AlignedSequence<S extends Sequence<C>, C extends Compound> extends Sequence<C> { 039 040 /** 041 * Defines an alignment step in order to pass alignment information from an {@link Aligner} to a constructor. 042 */ 043 enum Step { COMPOUND, GAP } 044 045 /** 046 * Nullifies cached arrays/objects. 047 */ 048 void clearCache(); 049 050 /** Returns the alignment. 051 * 052 * @return the alignment 053 */ 054 int[] getAlignmentFromSequence(); 055 056 /** Returns the sequence positions at each alignment index 057 * 058 * @return array of the sequence positions 059 */ 060 int[] getSequenceFromAlignment(); 061 062 /** 063 * Returns the column index within an alignment corresponding to the given index in the original {@link Sequence}. 064 * Both indices are 1-indexed and inclusive. 065 * 066 * @param sequenceIndex index in the original {@link Sequence} 067 * @return column index within an alignment 068 * @throws IndexOutOfBoundsException if sequenceIndex < 1 or sequenceIndex > 069 * {@link #getOriginalSequence()}.{@link #getLength()} 070 */ 071 int getAlignmentIndexAt(int sequenceIndex); 072 073 /** 074 * Returns the {@link Point} within an alignment of the last element of the original {@link Sequence}. 075 * 076 * @return position within an alignment of final original {@link Sequence} element 077 */ 078 Point getEnd(); 079 080 /** 081 * Returns the {@link Location} of the original {@link Sequence} within an alignment. This provides access to 082 * additional substructure beyond start and end points. 083 * 084 * @return location within an alignment 085 */ 086 Location getLocationInAlignment(); 087 088 /** 089 * Returns number of gaps (gap openings) in the sequence. This could be determined from the {@link Location} information or from 090 * gap {@link Compound}s, which may not necessarily result in the same number. 091 * 092 * @return number of gaps in the sequence 093 */ 094 int getNumGaps(); 095 096 /** 097 * Returns number of gap positions (gap openings and extensions) in the sequence. This could be determined from the {@link Location} 098 * information or from gap {@link Compound}s, which may not necessarily result in the same number. 099 * 100 * @return number of gap positions in the sequence 101 */ 102 int getNumGapPositions(); 103 104 /** 105 * Returns the coverage, as a fraction between 0 and 1, of this {@link AlignedSequence} with respect to the original sequence. 106 * This is equivalent to ({@link #getLength()} - {@link #getNumGapPositions()}) / getOriginalSequence().getLength(). 107 * 108 * @return coverage of the original sequence by the aligned sequence 109 */ 110 double getCoverage(); 111 112 /** 113 * Returns the original {@link Sequence} before alignment. 114 * 115 * @return the original sequence 116 */ 117 S getOriginalSequence(); 118 119 /** 120 * Returns the maximum number of elements contributed to a column of an alignment by this {@link Sequence}. If 121 * this {@link Sequence} is circular, this number is >= 1. If not, this overlap count is definitely 1. 122 * 123 * @return the most elements contributed to any alignment column 124 */ 125 int getOverlapCount(); 126 127 /** 128 * Returns the index in the original {@link Sequence} corresponding to the given index within an alignment. Both 129 * indices are 1-indexed and inclusive. 130 * 131 * @param alignmentIndex column index within an alignment 132 * @return index in the original {@link Sequence} 133 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 134 */ 135 int getSequenceIndexAt(int alignmentIndex); 136 137 /** 138 * Returns the {@link Point} within an alignment of the first element of the original {@link Sequence}. 139 * 140 * @return position within an alignment of first original {@link Sequence} element 141 */ 142 Point getStart(); 143 144 /** 145 * Returns true if this {@link Sequence} wraps around from the last alignment column back to the first. This makes 146 * overlap possible, but does not require an overlap count > 1. 147 * 148 * @return true for circular alignment elements 149 */ 150 boolean isCircular(); 151 152 /** 153 * Returns true if this {@link Sequence} has a gap at a particular alignment column. 154 * 155 * @param alignmentIndex column index within an alignment 156 * @return true if this column has a gap 157 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()} 158 */ 159 boolean isGap(int alignmentIndex); 160 161}