001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on July 2, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.alignment.template; 025 026import org.biojava.nbio.core.alignment.template.Profile; 027import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 028import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor; 029import org.biojava.nbio.alignment.routines.AlignerHelper.Last; 030import org.biojava.nbio.alignment.routines.AlignerHelper.Subproblem; 031import org.biojava.nbio.core.alignment.template.AlignedSequence.Step; 032import org.biojava.nbio.core.sequence.template.Compound; 033import org.biojava.nbio.core.sequence.template.CompoundSet; 034import org.biojava.nbio.core.sequence.template.Sequence; 035 036import java.util.ArrayList; 037import java.util.Arrays; 038import java.util.List; 039 040import static org.biojava.nbio.alignment.routines.AlignerHelper.setScoreVector; 041import static org.biojava.nbio.alignment.routines.AlignerHelper.setSteps; 042 043/** 044 * Implements common code for an {@link Aligner} which builds a score matrix during computation. 045 * 046 * @author Mark Chapman 047 * @author Daniel Cameron 048 * @param <S> each element of the alignment {@link Profile} is of type S 049 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C 050 */ 051public abstract class AbstractMatrixAligner<S extends Sequence<C>, C extends Compound> extends AbstractScorer 052 implements MatrixAligner<S, C> { 053 054 // input fields 055 protected GapPenalty gapPenalty; 056 private SubstitutionMatrix<C> subMatrix; 057 private boolean local, storingScoreMatrix; 058 protected List<Anchor> anchors = new ArrayList<Anchor>(); 059 protected int cutsPerSection; 060 061 // output fields 062 protected Profile<S, C> profile; 063 /** 064 * Start position of the aligned sequence in the query and target respectively 065 */ 066 protected int[] xyStart; 067 /** 068 * End position of the aligned sequence in the query and target respectively 069 */ 070 protected int[] xyMax; 071 protected int max, min, score; 072 /** 073 * Dynamic programming score matrix 074 * The first dimension has the length of the first (query) sequence + 1 075 * The second has the length of the second (target) sequence + 1 076 * The third has length 1 for linear gap penalty and 3 for affine/constant gap 077 * (one each for match/substitution, deletion, insertion) 078 */ 079 protected int[][][] scores; 080 /** 081 * Friendly name of each copy of the scoring matrix. 082 * The number of elements must match the number of elements in third dimension of @see scores 083 */ 084 private String[] types; 085 protected long time = -1; 086 087 /** 088 * Before running an alignment, data must be sent in via calls to {@link #setGapPenalty(GapPenalty)} and 089 * {@link #setSubstitutionMatrix(SubstitutionMatrix)}. 090 */ 091 protected AbstractMatrixAligner() { 092 } 093 094 /** 095 * Prepares for an alignment. 096 * 097 * @param gapPenalty the gap penalties used during alignment 098 * @param subMatrix the set of substitution scores used during alignment 099 */ 100 protected AbstractMatrixAligner(GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 101 this(gapPenalty, subMatrix, false); 102 } 103 104 /** 105 * Prepares for an alignment. 106 * 107 * @param gapPenalty the gap penalties used during alignment 108 * @param subMatrix the set of substitution scores used during alignment 109 * @param local if true, find a region of similarity rather than aligning every compound 110 */ 111 protected AbstractMatrixAligner(GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix, boolean local) { 112 this.gapPenalty = gapPenalty; 113 this.subMatrix = subMatrix; 114 this.local = local; 115 reset(); 116 } 117 118 /** 119 * Returns the gap penalties. 120 * 121 * @return the gap penalties used during alignment 122 */ 123 public GapPenalty getGapPenalty() { 124 return gapPenalty; 125 } 126 127 /** 128 * Returns the substitution matrix. 129 * 130 * @return the set of substitution scores used during alignment 131 */ 132 public SubstitutionMatrix<C> getSubstitutionMatrix() { 133 return subMatrix; 134 } 135 136 /** 137 * Returns whether alignment finds a region of similarity rather than aligning every compound. 138 * 139 * @return true if alignment finds a region of similarity rather than aligning every compound 140 */ 141 public boolean isLocal() { 142 return local; 143 } 144 145 /** 146 * Returns choice to cache the score matrix or to save memory by deleting score matrix after alignment. 147 * 148 * @return choice to cache the score matrix 149 */ 150 public boolean isStoringScoreMatrix() { 151 return storingScoreMatrix; 152 } 153 154 /** 155 * Sets the gap penalties. 156 * 157 * @param gapPenalty the gap penalties used during alignment 158 */ 159 public void setGapPenalty(GapPenalty gapPenalty) { 160 this.gapPenalty = gapPenalty; 161 reset(); 162 } 163 164 /** 165 * Sets the substitution matrix. 166 * 167 * @param subMatrix the set of substitution scores used during alignment 168 */ 169 public void setSubstitutionMatrix(SubstitutionMatrix<C> subMatrix) { 170 this.subMatrix = subMatrix; 171 reset(); 172 } 173 174 /** 175 * Sets choice to cache the score matrix or to save memory by deleting score matrix after alignment. 176 * 177 * @param storingScoreMatrix choice to cache the score matrix 178 */ 179 public void setStoringScoreMatrix(boolean storingScoreMatrix) { 180 this.storingScoreMatrix = storingScoreMatrix; 181 if (!storingScoreMatrix) { 182 scores = null; 183 } 184 } 185 186 // methods for MatrixAligner 187 188 @Override 189 public int[][][] getScoreMatrix() { 190 boolean tempStoringScoreMatrix = storingScoreMatrix; 191 if (scores == null) { 192 storingScoreMatrix = true; 193 align(); 194 if (scores == null) { 195 return null; 196 } 197 } 198 int[][][] copy = scores; 199 if (tempStoringScoreMatrix) { 200 copy = new int[scores.length][scores[0].length][]; 201 for (int i = 0; i < copy.length; i++) { 202 for (int j = 0; j < copy[0].length; j++) { 203 copy[i][j] = Arrays.copyOf(scores[i][j], scores[i][j].length); 204 } 205 } 206 } 207 setStoringScoreMatrix(tempStoringScoreMatrix); 208 return copy; 209 } 210 211 @Override 212 public String getScoreMatrixAsString() { 213 int[][][] scores = getScoreMatrix(); 214 return scoreMatrixToString(scores); 215 } 216 private String scoreMatrixToString(int[][][] scores) { 217 StringBuilder s = new StringBuilder(); 218 CompoundSet<C> compoundSet = getCompoundSet(); 219 int lengthCompound = compoundSet.getMaxSingleCompoundStringLength(), lengthRest = 220 Math.max(Math.max(Integer.toString(min).length(), Integer.toString(max).length()), lengthCompound) + 1; 221 String padCompound = "%" + Integer.toString(lengthCompound) + "s", 222 padRest = "%" + Integer.toString(lengthRest); 223 List<C> query = getCompoundsOfQuery(), target = getCompoundsOfTarget(); 224 for (int type = 0; type < scores[0][0].length; type++) { 225 if (type > 0) { 226 s.append(String.format("%n")); 227 } 228 if (types[type] != null) { 229 s.append(String.format("%s%n", types[type])); 230 } 231 s.append(String.format(padCompound, "")); 232 s.append(String.format(padRest + "s", "")); 233 for (C col : target) { 234 s.append(String.format(padRest + "s", compoundSet.getStringForCompound(col))); 235 } 236 s.append(String.format("%n")); 237 for (int x = 0; x < scores.length; x++) { 238 s.append(String.format(padCompound, (x == 0) ? "" : 239 compoundSet.getStringForCompound(query.get(x - 1)))); 240 for (int y = 0; y < scores[0].length; y++) { 241 s.append(scores[x][y][type] >= min ? String.format(padRest + "d", scores[x][y][type]) : 242 String.format(padRest + "s", "-\u221E")); 243 } 244 s.append(String.format("%n")); 245 } 246 } 247 return s.toString(); 248 } 249 250 // methods for Aligner 251 @Override 252 public long getComputationTime() { 253 if (profile == null) { 254 align(); 255 } 256 return time; 257 } 258 259 @Override 260 public Profile<S, C> getProfile() { 261 if (profile == null) { 262 align(); 263 } 264 return profile; 265 } 266 267 // methods for Scorer 268 269 @Override 270 public double getMaxScore() { 271 if (profile == null) { 272 align(); 273 } 274 return max; 275 } 276 277 @Override 278 public double getMinScore() { 279 if (profile == null) { 280 align(); 281 } 282 return min; 283 } 284 285 @Override 286 public double getScore() { 287 if (profile == null) { 288 align(); 289 } 290 return score; 291 } 292 293 // helper methods 294 295 /** 296 * Performs alignment 297 */ 298 protected void align() { 299 if (!isReady()) { 300 return; 301 } 302 303 long timeStart = System.nanoTime(); 304 305 int[] dim = getScoreMatrixDimensions(); 306 if (storingScoreMatrix) { 307 scores = new int[dim[0]][dim[1]][dim[2]]; 308 } else { 309 scores = new int[dim[0]][][]; 310 scores[0] = new int[dim[1]][dim[2]]; 311 scores[1] = new int[dim[1]][dim[2]]; 312 } 313 boolean linear = (gapPenalty.getType() == GapPenalty.Type.LINEAR); 314 Last[][][] traceback = new Last[dim[0]][][]; 315 List<Step> sx = new ArrayList<Step>(), sy = new ArrayList<Step>(); 316 317 if (!local) { 318 xyMax = new int[] { dim[0] - 1, dim[1] - 1 }; 319 xyStart = new int[] { 0, 0 }; 320 score = 0; 321 List<Subproblem> problems = Subproblem.getSubproblems(anchors, xyMax[0], xyMax[1]); 322 assert problems.size() == anchors.size() + 1; 323 for (int i = 0; i < problems.size(); i++) { 324 Subproblem subproblem = problems.get(i); 325 for (int x = subproblem.getQueryStartIndex(); x <= subproblem.getQueryEndIndex(); x++) { 326 327 traceback[x] = 328 329 linear ? 330 setScoreVector(x, subproblem, gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x, subproblem), storingScoreMatrix, scores) : 331 332 setScoreVector(x, subproblem, gapPenalty.getOpenPenalty(), gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x, subproblem), storingScoreMatrix, scores); 333 } 334 } 335 setSteps(traceback, scores, sx, sy); 336 score = Integer.MIN_VALUE; 337 int[] finalScore = scores[xyMax[0]][xyMax[1]]; 338 for (int z = 0; z < finalScore.length; z++) { 339 score = Math.max(score, finalScore[z]); 340 } 341 } else { 342 for (int x = 0; x < dim[0]; x++) { 343 344 traceback[x] = 345 346 linear ? 347 setScoreVector(x, gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x), storingScoreMatrix, scores, xyMax, score) : 348 349 setScoreVector(x, gapPenalty.getOpenPenalty(), gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x), storingScoreMatrix, scores, xyMax, score); 350 351 if (xyMax[0] == x) { 352 score = scores[x][xyMax[1]][0]; 353 } 354 } 355 xyStart = local ? setSteps(traceback, xyMax, sx, sy) : setSteps(traceback, scores, sx, sy); 356 } 357 358 setProfile(sx, sy); 359 360 if (!storingScoreMatrix) { 361 scores = null; 362 } 363 364 time = System.nanoTime() - timeStart; 365 } 366 367 /** 368 * Returns score for the alignment of the query column to all target columns 369 * @param queryColumn 370 * @return 371 */ 372 protected int[] getSubstitutionScoreVector(int queryColumn) { 373 return getSubstitutionScoreVector(queryColumn, new Subproblem(0, 0, scores.length - 1, scores[0].length - 1)); 374 } 375 376 /** 377 * Returns score for the alignment of the query column to all target columns 378 * @param queryColumn 379 * @param subproblem 380 * @return 381 */ 382 protected int[] getSubstitutionScoreVector(int queryColumn, Subproblem subproblem) { 383 int[] subs = new int[subproblem.getTargetEndIndex() + 1]; 384 if (queryColumn > 0) { 385 for (int y = Math.max(1, subproblem.getTargetStartIndex()); y <= subproblem.getTargetEndIndex(); y++) { 386 subs[y] = getSubstitutionScore(queryColumn, y); 387 } 388 } 389 return subs; 390 } 391 392 /** 393 * Resets output fields; should be overridden to set max and min 394 */ 395 protected void reset() { 396 xyMax = new int[] {0, 0}; 397 xyStart = new int[] {0, 0}; 398 scores = null; 399 types = (gapPenalty == null || gapPenalty.getType() == GapPenalty.Type.LINEAR) ? new String[] { null } : 400 new String[] { "Substitution", "Deletion", "Insertion" }; 401 time = -1; 402 profile = null; 403 } 404 // abstract methods 405 406 // returns compound set of sequences 407 protected abstract CompoundSet<C> getCompoundSet(); 408 409 // returns compounds in query sequence/profile 410 protected abstract List<C> getCompoundsOfQuery(); 411 412 // returns compounds in target sequence/profile 413 protected abstract List<C> getCompoundsOfTarget(); 414 415 // returns the 3 score matrix dimensions 416 protected abstract int[] getScoreMatrixDimensions(); 417 418 // returns score for the alignment of two columns 419 protected abstract int getSubstitutionScore(int queryColumn, int targetColumn); 420 421 // prepares for alignment; returns true if everything is set to run the alignment 422 protected abstract boolean isReady(); 423 424 // sets profile following the given alignment path 425 protected abstract void setProfile(List<Step> sx, List<Step> sy); 426 427}