001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 14, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.core.alignment; 025 026import org.biojava.nbio.core.alignment.template.AlignedSequence; 027import org.biojava.nbio.core.alignment.template.AlignedSequence.Step; 028import org.biojava.nbio.core.alignment.template.Profile; 029import org.biojava.nbio.core.alignment.template.SequencePair; 030import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 031import org.biojava.nbio.core.sequence.template.Compound; 032import org.biojava.nbio.core.sequence.template.Sequence; 033 034import java.util.List; 035 036/** 037 * Implements a data structure for the results of pairwise sequence alignment. 038 * 039 * @author Mark Chapman 040 * @author Paolo Pavan 041 * @param <S> 042 * each element of the alignment {@link Profile} is of type S 043 * @param <C> 044 * each element of an {@link AlignedSequence} is a {@link Compound} 045 * of type C 046 */ 047public class SimpleSequencePair<S extends Sequence<C>, C extends Compound> 048 extends SimpleProfile<S, C> implements SequencePair<S, C> { 049 050 private static final long serialVersionUID = 1L; 051 052 private int identicals = -1, similars = -1; 053 054 /** 055 * Creates a pair profile for the given already aligned sequences. 056 * 057 * @param query 058 * the first sequence of the pair 059 * @param target 060 * the second sequence of the pair 061 * @throws IllegalArgumentException 062 * if sequences differ in size 063 */ 064 public SimpleSequencePair(AlignedSequence<S, C> query, 065 AlignedSequence<S, C> target) { 066 super(query, target); 067 } 068 069 /** 070 * Creates a pair profile for the given sequences with a global alignment. 071 * 072 * @param query 073 * the first sequence of the pair 074 * @param target 075 * the second sequence of the pair 076 * @param sx 077 * lists whether the query sequence aligns a {@link Compound} or 078 * gap at each index of the alignment 079 * @param sy 080 * lists whether the target sequence aligns a {@link Compound} or 081 * gap at each index of the alignment 082 * @throws IllegalArgumentException 083 * if alignments differ in size or given sequences do not fit in 084 * alignments 085 */ 086 public SimpleSequencePair(S query, S target, List<Step> sx, List<Step> sy) { 087 this(query, target, sx, 0, 0, sy, 0, 0); 088 } 089 090 /** 091 * Creates a pair profile for the given sequences with a local alignment. 092 * 093 * @param query 094 * the first sequence of the pair 095 * @param target 096 * the second sequence of the pair 097 * @param sx 098 * lists whether the query sequence aligns a {@link Compound} or 099 * gap at each index of the alignment 100 * @param xb 101 * number of {@link Compound}s skipped in the query sequence 102 * before the aligned region 103 * @param xa 104 * number of {@link Compound}s skipped in the query sequence 105 * after the aligned region 106 * @param sy 107 * lists whether the target sequence aligns a {@link Compound} or 108 * gap at each index of the alignment 109 * @param yb 110 * number of {@link Compound}s skipped in the target sequence 111 * before the aligned region 112 * @param ya 113 * number of {@link Compound}s skipped in the target sequence 114 * after the aligned region 115 * @throws IllegalArgumentException 116 * if alignments differ in size or given sequences do not fit in 117 * alignments 118 */ 119 public SimpleSequencePair(S query, S target, List<Step> sx, int xb, int xa, 120 List<Step> sy, int yb, int ya) { 121 super(query, target, sx, xb, xa, sy, yb, ya); 122 } 123 124 @Override 125 public C getCompoundInQueryAt(int alignmentIndex) { 126 return getAlignedSequence(1).getCompoundAt(alignmentIndex); 127 } 128 129 @Override 130 public C getCompoundInTargetAt(int alignmentIndex) { 131 return getAlignedSequence(2).getCompoundAt(alignmentIndex); 132 } 133 134 @Override 135 public int getIndexInQueryAt(int alignmentIndex) { 136 return getAlignedSequence(1).getSequenceIndexAt(alignmentIndex); 137 } 138 139 @Override 140 public int getIndexInQueryForTargetAt(int targetIndex) { 141 return getAlignedSequence(1).getSequenceIndexAt( 142 getAlignedSequence(2).getAlignmentIndexAt(targetIndex)); 143 } 144 145 @Override 146 public int getIndexInTargetAt(int alignmentIndex) { 147 return getAlignedSequence(2).getSequenceIndexAt(alignmentIndex); 148 } 149 150 @Override 151 public int getIndexInTargetForQueryAt(int queryIndex) { 152 return getAlignedSequence(2).getSequenceIndexAt( 153 getAlignedSequence(1).getAlignmentIndexAt(queryIndex)); 154 } 155 156 @Override 157 public int getNumIdenticals() { 158 if (identicals == -1) { 159 identicals = 0; 160 for (int i = 1; i <= getLength(); i++) { 161 if (getCompoundInQueryAt(i).equalsIgnoreCase( 162 getCompoundInTargetAt(i))) { 163 identicals++; 164 } 165 } 166 getQuery().clearCache(); 167 getTarget().clearCache(); 168 } 169 return identicals; 170 } 171 172 @Override 173 public int getNumSimilars() { 174 if (similars == -1) { 175 similars = 0; 176 for (int i = 1; i <= getLength(); i++) { 177 178 C c1 = getCompoundInQueryAt(i); 179 C c2 = getCompoundInTargetAt(i); 180 181 if (c1 instanceof AminoAcidCompound 182 && c2 instanceof AminoAcidCompound) { 183 short value = matrix.getValue((AminoAcidCompound) c1, 184 (AminoAcidCompound) c2); 185 if (value > 0) 186 similars++; 187 } else { 188 189 if (getCompoundSet().compoundsEquivalent(c1, c2)) { 190 similars++; 191 } 192 } 193 } 194 getQuery().clearCache(); 195 getTarget().clearCache(); 196 } 197 return similars; 198 } 199 200 @Override 201 public AlignedSequence<S, C> getQuery() { 202 return getAlignedSequence(1); 203 } 204 205 @Override 206 public AlignedSequence<S, C> getTarget() { 207 return getAlignedSequence(2); 208 } 209 210 /** 211 * Returns the percentage of identity between the two sequences in the alignment as a fraction between 0 and 1. 212 * 213 * @param countGaps 214 * If true, gap positions are counted as mismatches, i.e., the percentage is normalized by the alignment length. 215 * If false, gap positions are not counted, i.e. the percentage is normalized by the number of aligned residue pairs. 216 * See May (2004). "Percent sequence identity: the need to be explicit." 217 * @return the percentage of sequence identity as a fraction in [0,1] 218 */ 219 @Override 220 public double getPercentageOfIdentity(boolean countGaps) { 221 double seqid = getNumIdenticals(); 222 double length = getLength(); 223 if (!countGaps) { 224 length = length - getAlignedSequence(1).getNumGapPositions() 225 - getAlignedSequence(2).getNumGapPositions(); 226 } 227 return seqid / length; 228 } 229 230}