001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 7, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.alignment; 025 026import org.biojava.nbio.core.alignment.template.ProfilePair; 027import org.biojava.nbio.core.alignment.template.SequencePair; 028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 029import org.biojava.nbio.core.alignment.template.Profile; 030import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 031import org.biojava.nbio.alignment.template.*; 032import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet; 033import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 034import org.biojava.nbio.core.sequence.compound.DNACompoundSet; 035import org.biojava.nbio.core.sequence.template.Compound; 036import org.biojava.nbio.core.sequence.template.CompoundSet; 037import org.biojava.nbio.core.sequence.template.Sequence; 038import org.biojava.nbio.core.util.ConcurrencyTools; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042import java.util.ArrayList; 043import java.util.List; 044import java.util.concurrent.ExecutionException; 045import java.util.concurrent.Future; 046 047/** 048 * Static utility to easily run alignment routines. To exit cleanly after running any parallel method that mentions 049 * use of the {@link ConcurrencyTools} utility, {@link ConcurrencyTools#shutdown()} or 050 * {@link ConcurrencyTools#shutdownAndAwaitTermination()} must be called. 051 * 052 * @author Mark Chapman 053 */ 054public class Alignments { 055 056 private final static Logger logger = LoggerFactory.getLogger(Alignments.class); 057 058 /** 059 * List of implemented sequence pair in a profile scoring routines. 060 */ 061 public static enum PairInProfileScorerType { 062 IDENTITIES, // similar to MUSCLE 063 SIMILARITIES 064 } 065 066 /** 067 * List of implemented pairwise sequence alignment routines. 068 */ 069 public static enum PairwiseSequenceAlignerType { 070 GLOBAL, // Needleman-Wunsch/Gotoh 071 GLOBAL_LINEAR_SPACE, // Guan-Uberbacher 072 LOCAL, // Smith-Waterman/Gotoh 073 LOCAL_LINEAR_SPACE // Smith-Waterman/Gotoh with smart traceback at each maximum 074 } 075 076 /** 077 * List of implemented pairwise sequence scoring routines. 078 */ 079 public static enum PairwiseSequenceScorerType { 080 GLOBAL, 081 GLOBAL_IDENTITIES, // similar to CLUSTALW and CLUSTALW2 082 GLOBAL_SIMILARITIES, 083 LOCAL, 084 LOCAL_IDENTITIES, 085 LOCAL_SIMILARITIES, 086 KMERS, // similar to CLUSTAL and MUSCLE 087 WU_MANBER // similar to KALIGN 088 } 089 090 /** 091 * List of implemented profile-profile alignment routines. 092 */ 093 public static enum ProfileProfileAlignerType { 094 GLOBAL, // similar to MUSCLE and KALIGN 095 GLOBAL_LINEAR_SPACE, // similar to CLUSTALW and CLUSTALW2 096 GLOBAL_CONSENSUS, // similar to CLUSTAL 097 LOCAL, 098 LOCAL_LINEAR_SPACE, 099 LOCAL_CONSENSUS 100 } 101 102 /** 103 * List of implemented profile refinement routines. 104 */ 105 public static enum RefinerType { 106 PARTITION_SINGLE, // similar to CLUSTALW2 107 PARTITION_SINGLE_ALL, // similar to CLUSTALW2 108 PARTITION_TREE, // similar to MUSCLE 109 PARTITION_TREE_ALL, 110 RESCORE_IDENTITIES, // similar to MUSCLE 111 RESCORE_SIMILARITIES 112 } 113 114 // prevents instantiation 115 private Alignments() { } 116 117 // public factory methods 118 119 /** 120 * Factory method which computes a sequence alignment for all {@link Sequence} pairs in the given {@link List}. 121 * This method runs the alignments in parallel by submitting all of the alignments to the shared thread pool of the 122 * {@link ConcurrencyTools} utility. 123 * 124 * @param <S> each {@link Sequence} of an alignment pair is of type S 125 * @param <C> each element of a sequence is a {@link Compound} of type C 126 * @param sequences the {@link List} of {@link Sequence}s to align 127 * @param type chosen type from list of pairwise sequence alignment routines 128 * @param gapPenalty the gap penalties used during alignment 129 * @param subMatrix the set of substitution scores used during alignment 130 * @return list of sequence alignment pairs 131 */ 132 public static <S extends Sequence<C>, C extends Compound> List<SequencePair<S, C>> getAllPairsAlignments( 133 List<S> sequences, PairwiseSequenceAlignerType type, GapPenalty gapPenalty, 134 SubstitutionMatrix<C> subMatrix) { 135 return runPairwiseAligners(getAllPairsAligners(sequences, type, gapPenalty, subMatrix)); 136 } 137 138 /** 139 * Factory method which computes a multiple sequence alignment for the given {@link List} of {@link Sequence}s. 140 * 141 * @param <S> each {@link Sequence} of the {@link List} is of type S 142 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C 143 * @param sequences the {@link List} of {@link Sequence}s to align 144 * @param settings optional settings that adjust the alignment 145 * @return multiple sequence alignment {@link Profile} 146 */ 147 public static <S extends Sequence<C>, C extends Compound> Profile<S, C> getMultipleSequenceAlignment( 148 List<S> sequences, Object... settings) { // TODO convert other factories to this parameter style? 149 CompoundSet<C> cs = sequences.get(0).getCompoundSet(); 150 PairwiseSequenceScorerType ps = PairwiseSequenceScorerType.GLOBAL_IDENTITIES; 151 GapPenalty gapPenalty = new SimpleGapPenalty(); 152 SubstitutionMatrix<C> subMatrix = null; 153 if (cs == AminoAcidCompoundSet.getAminoAcidCompoundSet()) { 154 @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal 155 SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getBlosum62(); 156 subMatrix = temp; 157 } else if (cs == DNACompoundSet.getDNACompoundSet()) { 158 @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal 159 SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getNuc4_4(); 160 subMatrix = temp; 161 162 } else if (cs == AmbiguityDNACompoundSet.getDNACompoundSet()) { 163 @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal 164 SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getNuc4_4(); 165 subMatrix = temp; 166 167 } 168 ProfileProfileAlignerType pa = ProfileProfileAlignerType.GLOBAL; 169 for (Object o : settings) { 170 if (o instanceof PairwiseSequenceScorerType) { 171 ps = (PairwiseSequenceScorerType) o; 172 } else if (o instanceof GapPenalty) { 173 gapPenalty = (GapPenalty) o; 174 } else if (o instanceof SubstitutionMatrix<?>) { 175 if (cs != ((SubstitutionMatrix<?>) o).getCompoundSet()) { 176 throw new IllegalArgumentException( 177 "Compound sets of the sequences and substitution matrix must match."); 178 } 179 @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal 180 SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) o; 181 subMatrix = temp; 182 } else if (o instanceof ProfileProfileAlignerType) { 183 pa = (ProfileProfileAlignerType) o; 184 } 185 } 186 187 // stage 1: pairwise similarity calculation 188 List<PairwiseSequenceScorer<S, C>> scorers = getAllPairsScorers(sequences, ps, gapPenalty, subMatrix); 189 runPairwiseScorers(scorers); 190 191 // stage 2: hierarchical clustering into a guide tree 192 GuideTree<S, C> tree = new GuideTree<>(sequences, scorers); 193 scorers = null; 194 195 // stage 3: progressive alignment 196 Profile<S, C> msa = getProgressiveAlignment(tree, pa, gapPenalty, subMatrix); 197 198 // TODO stage 4: refinement 199 return msa; 200 } 201 202 /** 203 * Factory method which computes a sequence alignment for the given {@link Sequence} pair. 204 * 205 * @param <S> each {@link Sequence} of the pair is of type S 206 * @param <C> each element of a sequence is a {@link Compound} of type C 207 * @param query the first {@link Sequence}s to align 208 * @param target the second {@link Sequence}s to align 209 * @param type chosen type from list of pairwise sequence alignment routines 210 * @param gapPenalty the gap penalties used during alignment 211 * @param subMatrix the set of substitution scores used during alignment 212 * @return sequence alignment pair 213 */ 214 public static <S extends Sequence<C>, C extends Compound> SequencePair<S, C> getPairwiseAlignment( 215 S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty, 216 SubstitutionMatrix<C> subMatrix) { 217 return getPairwiseAligner(query, target, type, gapPenalty, subMatrix).getPair(); 218 } 219 220 // default access (package private) factory methods 221 222 /** 223 * Factory method which sets up a sequence alignment for all {@link Sequence} pairs in the given {@link List}. 224 * 225 * @param <S> each {@link Sequence} of an alignment pair is of type S 226 * @param <C> each element of a sequence is a {@link Compound} of type C 227 * @param sequences the {@link List} of {@link Sequence}s to align 228 * @param type chosen type from list of pairwise sequence alignment routines 229 * @param gapPenalty the gap penalties used during alignment 230 * @param subMatrix the set of substitution scores used during alignment 231 * @return list of pairwise sequence aligners 232 */ 233 static <S extends Sequence<C>, C extends Compound> List<PairwiseSequenceAligner<S, C>> getAllPairsAligners( 234 List<S> sequences, PairwiseSequenceAlignerType type, GapPenalty gapPenalty, 235 SubstitutionMatrix<C> subMatrix) { 236 List<PairwiseSequenceAligner<S, C>> allPairs = new ArrayList<>(); 237 for (int i = 0; i < sequences.size(); i++) { 238 for (int j = i+1; j < sequences.size(); j++) { 239 allPairs.add(getPairwiseAligner(sequences.get(i), sequences.get(j), type, gapPenalty, subMatrix)); 240 } 241 } 242 return allPairs; 243 } 244 245 /** 246 * Factory method which sets up a sequence pair scorer for all {@link Sequence} pairs in the given {@link List}. 247 * 248 * @param <S> each {@link Sequence} of a pair is of type S 249 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C 250 * @param sequences the {@link List} of {@link Sequence}s to align 251 * @param type chosen type from list of pairwise sequence scoring routines 252 * @param gapPenalty the gap penalties used during alignment 253 * @param subMatrix the set of substitution scores used during alignment 254 * @return list of sequence pair scorers 255 */ 256 public static <S extends Sequence<C>, C extends Compound> List<PairwiseSequenceScorer<S, C>> getAllPairsScorers( 257 List<S> sequences, PairwiseSequenceScorerType type, GapPenalty gapPenalty, 258 SubstitutionMatrix<C> subMatrix) { 259 List<PairwiseSequenceScorer<S, C>> allPairs = new ArrayList<>(); 260 for (int i = 0; i < sequences.size(); i++) { 261 for (int j = i+1; j < sequences.size(); j++) { 262 allPairs.add(getPairwiseScorer(sequences.get(i), sequences.get(j), type, gapPenalty, subMatrix)); 263 } 264 } 265 return allPairs; 266 } 267 268 /** 269 * Factory method which computes a sequence pair score for all {@link Sequence} pairs in the given {@link List}. 270 * This method runs the scorings in parallel by submitting all of the scorings to the shared thread pool of the 271 * {@link ConcurrencyTools} utility. 272 * 273 * @param <S> each {@link Sequence} of a pair is of type S 274 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C 275 * @param sequences the {@link List} of {@link Sequence}s to align 276 * @param type chosen type from list of pairwise sequence scoring routines 277 * @param gapPenalty the gap penalties used during alignment 278 * @param subMatrix the set of substitution scores used during alignment 279 * @return list of sequence pair scores 280 */ 281 public static <S extends Sequence<C>, C extends Compound> double[] getAllPairsScores( List<S> sequences, 282 PairwiseSequenceScorerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 283 return runPairwiseScorers(getAllPairsScorers(sequences, type, gapPenalty, subMatrix)); 284 } 285 286 /** 287 * Factory method which retrieves calculated elements from a list of tasks on the concurrent execution queue. 288 * 289 * @param <E> each task calculates a value of type E 290 * @param futures list of tasks 291 * @return calculated elements 292 */ 293 static <E> List<E> getListFromFutures(List<Future<E>> futures) { 294 List<E> list = new ArrayList<>(); 295 for (Future<E> f : futures) { 296 // TODO when added to ConcurrencyTools, log completions and exceptions instead of printing stack traces 297 try { 298 list.add(f.get()); 299 } catch (InterruptedException e) { 300 logger.error("Interrupted Exception: ", e); 301 } catch (ExecutionException e) { 302 logger.error("Execution Exception: ", e); 303 } 304 } 305 return list; 306 } 307 308 /** 309 * Factory method which constructs a pairwise sequence aligner. 310 * 311 * @param <S> each {@link Sequence} of an alignment pair is of type S 312 * @param <C> each element of a sequence is a {@link Compound} of type C 313 * @param query the first {@link Sequence} to align 314 * @param target the second {@link Sequence} to align 315 * @param type chosen type from list of pairwise sequence alignment routines 316 * @param gapPenalty the gap penalties used during alignment 317 * @param subMatrix the set of substitution scores used during alignment 318 * @return pairwise sequence aligner 319 */ 320 public static <S extends Sequence<C>, C extends Compound> PairwiseSequenceAligner<S, C> getPairwiseAligner( 321 S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty, 322 SubstitutionMatrix<C> subMatrix) { 323 if (!query.getCompoundSet().equals(target.getCompoundSet())) { 324 throw new IllegalArgumentException("Sequence compound sets must be the same"); 325 } 326 switch (type) { 327 default: 328 case GLOBAL: 329 return new NeedlemanWunsch<>(query, target, gapPenalty, subMatrix); 330 case LOCAL: 331 return new SmithWaterman<>(query, target, gapPenalty, subMatrix); 332 case GLOBAL_LINEAR_SPACE: 333 case LOCAL_LINEAR_SPACE: 334 // TODO other alignment options (Myers-Miller, Thompson) 335 throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " + 336 type + " alignment"); 337 } 338 } 339 340 /** 341 * Factory method which computes a similarity score for the given {@link Sequence} pair. 342 * 343 * @param <S> each {@link Sequence} of the pair is of type S 344 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C 345 * @param query the first {@link Sequence} to score 346 * @param target the second {@link Sequence} to score 347 * @param type chosen type from list of pairwise sequence scoring routines 348 * @param gapPenalty the gap penalties used during alignment 349 * @param subMatrix the set of substitution scores used during alignment 350 * @return sequence pair score 351 */ 352 static <S extends Sequence<C>, C extends Compound> double getPairwiseScore(S query, S target, 353 PairwiseSequenceScorerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 354 return getPairwiseScorer(query, target, type, gapPenalty, subMatrix).getScore(); 355 } 356 357 /** 358 * Factory method which constructs a pairwise sequence scorer. 359 * 360 * @param <S> each {@link Sequence} of a pair is of type S 361 * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C 362 * @param query the first {@link Sequence} to score 363 * @param target the second {@link Sequence} to score 364 * @param type chosen type from list of pairwise sequence scoring routines 365 * @param gapPenalty the gap penalties used during alignment 366 * @param subMatrix the set of substitution scores used during alignment 367 * @return sequence pair scorer 368 */ 369 static <S extends Sequence<C>, C extends Compound> PairwiseSequenceScorer<S, C> getPairwiseScorer( 370 S query, S target, PairwiseSequenceScorerType type, GapPenalty gapPenalty, 371 SubstitutionMatrix<C> subMatrix) { 372 switch (type) { 373 default: 374 case GLOBAL: 375 return getPairwiseAligner(query, target, PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix); 376 case GLOBAL_IDENTITIES: 377 return new FractionalIdentityScorer<>(getPairwiseAligner(query, target, 378 PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix)); 379 case GLOBAL_SIMILARITIES: 380 return new FractionalSimilarityScorer<>(getPairwiseAligner(query, target, 381 PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix)); 382 case LOCAL: 383 return getPairwiseAligner(query, target, PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix); 384 case LOCAL_IDENTITIES: 385 return new FractionalIdentityScorer<>(getPairwiseAligner(query, target, 386 PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix)); 387 case LOCAL_SIMILARITIES: 388 return new FractionalSimilarityScorer<>(getPairwiseAligner(query, target, 389 PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix)); 390 case KMERS: 391 case WU_MANBER: 392 // TODO other scoring options 393 throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " + 394 type + " scoring"); 395 } 396 } 397 398 /** 399 * Factory method which constructs a profile-profile aligner. 400 * 401 * @param <S> each {@link Sequence} of an alignment profile is of type S 402 * @param <C> each element of a sequence is a {@link Compound} of type C 403 * @param profile1 the first {@link Profile} to align 404 * @param profile2 the second {@link Profile} to align 405 * @param type chosen type from list of profile-profile alignment routines 406 * @param gapPenalty the gap penalties used during alignment 407 * @param subMatrix the set of substitution scores used during alignment 408 * @return profile-profile aligner 409 */ 410 static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner( 411 Profile<S, C> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, GapPenalty gapPenalty, 412 SubstitutionMatrix<C> subMatrix) { 413 switch (type) { 414 default: 415 case GLOBAL: 416 return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix); 417 case GLOBAL_LINEAR_SPACE: 418 case GLOBAL_CONSENSUS: 419 case LOCAL: 420 case LOCAL_LINEAR_SPACE: 421 case LOCAL_CONSENSUS: 422 // TODO other alignment options (Myers-Miller, consensus, local) 423 throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " + 424 type + " alignment"); 425 } 426 } 427 428 /** 429 * Factory method which constructs a profile-profile aligner. 430 * 431 * @param <S> each {@link Sequence} of an alignment profile is of type S 432 * @param <C> each element of a sequence is a {@link Compound} of type C 433 * @param profile1 the first {@link Profile} to align 434 * @param profile2 the second {@link Profile} to align 435 * @param type chosen type from list of profile-profile alignment routines 436 * @param gapPenalty the gap penalties used during alignment 437 * @param subMatrix the set of substitution scores used during alignment 438 * @return profile-profile aligner 439 */ 440 static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner( 441 Future<ProfilePair<S, C>> profile1, Future<ProfilePair<S, C>> profile2, ProfileProfileAlignerType type, 442 GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 443 switch (type) { 444 default: 445 case GLOBAL: 446 return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix); 447 case GLOBAL_LINEAR_SPACE: 448 case GLOBAL_CONSENSUS: 449 case LOCAL: 450 case LOCAL_LINEAR_SPACE: 451 case LOCAL_CONSENSUS: 452 // TODO other alignment options (Myers-Miller, consensus, local) 453 throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " + 454 type + " alignment"); 455 } 456 } 457 458 /** 459 * Factory method which constructs a profile-profile aligner. 460 * 461 * @param <S> each {@link Sequence} of an alignment profile is of type S 462 * @param <C> each element of a sequence is a {@link Compound} of type C 463 * @param profile1 the first {@link Profile} to align 464 * @param profile2 the second {@link Profile} to align 465 * @param type chosen type from list of profile-profile alignment routines 466 * @param gapPenalty the gap penalties used during alignment 467 * @param subMatrix the set of substitution scores used during alignment 468 * @return profile-profile aligner 469 */ 470 static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner( 471 Profile<S, C> profile1, Future<ProfilePair<S, C>> profile2, ProfileProfileAlignerType type, 472 GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 473 switch (type) { 474 default: 475 case GLOBAL: 476 return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix); 477 case GLOBAL_LINEAR_SPACE: 478 case GLOBAL_CONSENSUS: 479 case LOCAL: 480 case LOCAL_LINEAR_SPACE: 481 case LOCAL_CONSENSUS: 482 // TODO other alignment options (Myers-Miller, consensus, local) 483 throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " + 484 type + " alignment"); 485 } 486 } 487 488 /** 489 * Factory method which constructs a profile-profile aligner. 490 * 491 * @param <S> each {@link Sequence} of an alignment profile is of type S 492 * @param <C> each element of a sequence is a {@link Compound} of type C 493 * @param profile1 the first {@link Profile} to align 494 * @param profile2 the second {@link Profile} to align 495 * @param type chosen type from list of profile-profile alignment routines 496 * @param gapPenalty the gap penalties used during alignment 497 * @param subMatrix the set of substitution scores used during alignment 498 * @return profile-profile aligner 499 */ 500 static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner( 501 Future<ProfilePair<S, C>> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, 502 GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 503 switch (type) { 504 default: 505 case GLOBAL: 506 return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix); 507 case GLOBAL_LINEAR_SPACE: 508 case GLOBAL_CONSENSUS: 509 case LOCAL: 510 case LOCAL_LINEAR_SPACE: 511 case LOCAL_CONSENSUS: 512 // TODO other alignment options (Myers-Miller, consensus, local) 513 throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " + 514 type + " alignment"); 515 } 516 } 517 518 /** 519 * Factory method which computes a profile alignment for the given {@link Profile} pair. 520 * 521 * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S 522 * @param <C> each element of a sequence is a {@link Compound} of type C 523 * @param profile1 the first {@link Profile} to align 524 * @param profile2 the second {@link Profile} to align 525 * @param type chosen type from list of profile-profile alignment routines 526 * @param gapPenalty the gap penalties used during alignment 527 * @param subMatrix the set of substitution scores used during alignment 528 * @return alignment profile 529 */ 530 static <S extends Sequence<C>, C extends Compound> ProfilePair<S, C> getProfileProfileAlignment( 531 Profile<S, C> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, GapPenalty gapPenalty, 532 SubstitutionMatrix<C> subMatrix) { 533 return getProfileProfileAligner(profile1, profile2, type, gapPenalty, subMatrix).getPair(); 534 } 535 536 /** 537 * Factory method to run the profile-profile alignments of a progressive multiple sequence alignment concurrently. 538 * This method runs the alignments in parallel by submitting all of the alignment tasks to the shared thread pool 539 * of the {@link ConcurrencyTools} utility. 540 * 541 * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S 542 * @param <C> each element of a sequence is a {@link Compound} of type C 543 * @param tree guide tree to follow aligning profiles from leaves to root 544 * @param type chosen type from list of profile-profile alignment routines 545 * @param gapPenalty the gap penalties used during alignment 546 * @param subMatrix the set of substitution scores used during alignment 547 * @return multiple sequence alignment 548 */ 549 public static <S extends Sequence<C>, C extends Compound> Profile<S, C> getProgressiveAlignment(GuideTree<S, C> tree, 550 ProfileProfileAlignerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 551 552 // find inner nodes in post-order traversal of tree (each leaf node has a single sequence profile) 553 List<GuideTreeNode<S, C>> innerNodes = new ArrayList<>(); 554 for (GuideTreeNode<S, C> n : tree) { 555 if (n.getProfile() == null) { 556 innerNodes.add(n); 557 } 558 } 559 560 // submit alignment tasks to the shared thread pool 561 int i = 1, all = innerNodes.size(); 562 for (GuideTreeNode<S, C> n : innerNodes) { 563 Profile<S, C> p1 = n.getChild1().getProfile(), p2 = n.getChild2().getProfile(); 564 Future<ProfilePair<S, C>> pf1 = n.getChild1().getProfileFuture(), pf2 = n.getChild2().getProfileFuture(); 565 ProfileProfileAligner<S, C> aligner = 566 (p1 != null) ? ((p2 != null) ? getProfileProfileAligner(p1, p2, type, gapPenalty, subMatrix) : 567 getProfileProfileAligner(p1, pf2, type, gapPenalty, subMatrix)) : 568 ((p2 != null) ? getProfileProfileAligner(pf1, p2, type, gapPenalty, subMatrix) : 569 getProfileProfileAligner(pf1, pf2, type, gapPenalty, subMatrix)); 570 n.setProfileFuture(ConcurrencyTools.submit(new CallableProfileProfileAligner<S, C>(aligner), String.format( 571 "Aligning pair %d of %d", i++, all))); 572 } 573 574 // retrieve the alignment results 575 for (GuideTreeNode<S, C> n : innerNodes) { 576 // TODO when added to ConcurrencyTools, log completions and exceptions instead of printing stack traces 577 try { 578 n.setProfile(n.getProfileFuture().get()); 579 } catch (InterruptedException e) { 580 logger.error("Interrupted Exception: ", e); 581 } catch (ExecutionException e) { 582 logger.error("Execution Exception: ", e); 583 } 584 } 585 586 // the alignment profile at the root of the tree is the full multiple sequence alignment 587 return tree.getRoot().getProfile(); 588 } 589 590 /** 591 * Factory method to run a list of alignments concurrently. This method runs the alignments in parallel by 592 * submitting all of the alignment tasks to the shared thread pool of the {@link ConcurrencyTools} utility. 593 * 594 * @param <S> each {@link Sequence} of an alignment pair is of type S 595 * @param <C> each element of a sequence is a {@link Compound} of type C 596 * @param aligners list of alignments to run 597 * @return list of {@link SequencePair} results from running alignments 598 */ 599 static <S extends Sequence<C>, C extends Compound> List<SequencePair<S, C>> 600 runPairwiseAligners(List<PairwiseSequenceAligner<S, C>> aligners) { 601 int n = 1, all = aligners.size(); 602 List<Future<SequencePair<S, C>>> futures = new ArrayList<>(); 603 for (PairwiseSequenceAligner<S, C> aligner : aligners) { 604 futures.add(ConcurrencyTools.submit(new CallablePairwiseSequenceAligner<S, C>(aligner), 605 String.format("Aligning pair %d of %d", n++, all))); 606 } 607 return getListFromFutures(futures); 608 } 609 610 /** 611 * Factory method to run a list of scorers concurrently. This method runs the scorers in parallel by submitting 612 * all of the scoring tasks to the shared thread pool of the {@link ConcurrencyTools} utility. 613 * 614 * @param <S> each {@link Sequence} of an alignment pair is of type S 615 * @param <C> each element of a sequence is a {@link Compound} of type C 616 * @param scorers list of scorers to run 617 * @return list of score results from running scorers 618 */ 619 public static <S extends Sequence<C>, C extends Compound> double[] runPairwiseScorers( 620 List<PairwiseSequenceScorer<S, C>> scorers) { 621 int n = 1, all = scorers.size(); 622 List<Future<Double>> futures = new ArrayList<>(); 623 for (PairwiseSequenceScorer<S, C> scorer : scorers) { 624 futures.add(ConcurrencyTools.submit(new CallablePairwiseSequenceScorer<S, C>(scorer), 625 String.format("Scoring pair %d of %d", n++, all))); 626 } 627 List<Double> results = getListFromFutures(futures); 628 double[] scores = new double[results.size()]; 629 for (int i = 0; i < scores.length; i++) { 630 scores[i] = results.get(i); 631 } 632 return scores; 633 } 634 635 /** 636 * Factory method to run a list of alignments concurrently. This method runs the alignments in parallel by 637 * submitting all of the alignment tasks to the shared thread pool of the {@link ConcurrencyTools} utility. 638 * 639 * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S 640 * @param <C> each element of a sequence is a {@link Compound} of type C 641 * @param aligners list of alignments to run 642 * @return list of {@link ProfilePair} results from running alignments 643 */ 644 static <S extends Sequence<C>, C extends Compound> List<ProfilePair<S, C>> 645 runProfileAligners(List<ProfileProfileAligner<S, C>> aligners) { 646 int n = 1, all = aligners.size(); 647 List<Future<ProfilePair<S, C>>> futures = new ArrayList<>(); 648 for (ProfileProfileAligner<S, C> aligner : aligners) { 649 futures.add(ConcurrencyTools.submit(new CallableProfileProfileAligner<S, C>(aligner), 650 String.format("Aligning pair %d of %d", n++, all))); 651 } 652 return getListFromFutures(futures); 653 } 654 655}