001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.alignment;
025
026import org.biojava.nbio.core.alignment.template.ProfilePair;
027import org.biojava.nbio.core.alignment.template.SequencePair;
028import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
029import org.biojava.nbio.core.alignment.template.Profile;
030import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
031import org.biojava.nbio.alignment.template.*;
032import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
033import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
034import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
035import org.biojava.nbio.core.sequence.template.Compound;
036import org.biojava.nbio.core.sequence.template.CompoundSet;
037import org.biojava.nbio.core.sequence.template.Sequence;
038import org.biojava.nbio.core.util.ConcurrencyTools;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import java.util.ArrayList;
043import java.util.List;
044import java.util.concurrent.ExecutionException;
045import java.util.concurrent.Future;
046
047/**
048 * Static utility to easily run alignment routines.  To exit cleanly after running any parallel method that mentions
049 * use of the {@link ConcurrencyTools} utility, {@link ConcurrencyTools#shutdown()} or
050 * {@link ConcurrencyTools#shutdownAndAwaitTermination()} must be called.
051 *
052 * @author Mark Chapman
053 */
054public class Alignments {
055
056        private final static Logger logger = LoggerFactory.getLogger(Alignments.class);
057
058        /**
059         * List of implemented sequence pair in a profile scoring routines.
060         */
061        public static enum PairInProfileScorerType {
062                IDENTITIES,  // similar to MUSCLE
063                SIMILARITIES
064        }
065
066        /**
067         * List of implemented pairwise sequence alignment routines.
068         */
069        public static enum PairwiseSequenceAlignerType {
070                GLOBAL,              // Needleman-Wunsch/Gotoh
071                GLOBAL_LINEAR_SPACE, // Guan-Uberbacher
072                LOCAL,               // Smith-Waterman/Gotoh
073                LOCAL_LINEAR_SPACE   // Smith-Waterman/Gotoh with smart traceback at each maximum
074        }
075
076        /**
077         * List of implemented pairwise sequence scoring routines.
078         */
079        public static enum PairwiseSequenceScorerType {
080                GLOBAL,
081                GLOBAL_IDENTITIES,   // similar to CLUSTALW and CLUSTALW2
082                GLOBAL_SIMILARITIES,
083                LOCAL,
084                LOCAL_IDENTITIES,
085                LOCAL_SIMILARITIES,
086                KMERS,               // similar to CLUSTAL and MUSCLE
087                WU_MANBER            // similar to KALIGN
088        }
089
090        /**
091         * List of implemented profile-profile alignment routines.
092         */
093        public static enum ProfileProfileAlignerType {
094                GLOBAL,              // similar to MUSCLE and KALIGN
095                GLOBAL_LINEAR_SPACE, // similar to CLUSTALW and CLUSTALW2
096                GLOBAL_CONSENSUS,    // similar to CLUSTAL
097                LOCAL,
098                LOCAL_LINEAR_SPACE,
099                LOCAL_CONSENSUS
100        }
101
102        /**
103         * List of implemented profile refinement routines.
104         */
105        public static enum RefinerType {
106                PARTITION_SINGLE,     // similar to CLUSTALW2
107                PARTITION_SINGLE_ALL, // similar to CLUSTALW2
108                PARTITION_TREE,       // similar to MUSCLE
109                PARTITION_TREE_ALL,
110                RESCORE_IDENTITIES,   // similar to MUSCLE
111                RESCORE_SIMILARITIES
112        }
113
114        // prevents instantiation
115        private Alignments() { }
116
117        // public factory methods
118
119        /**
120         * Factory method which computes a sequence alignment for all {@link Sequence} pairs in the given {@link List}.
121         * This method runs the alignments in parallel by submitting all of the alignments to the shared thread pool of the
122         * {@link ConcurrencyTools} utility.
123         *
124         * @param <S> each {@link Sequence} of an alignment pair is of type S
125         * @param <C> each element of a sequence is a {@link Compound} of type C
126         * @param sequences the {@link List} of {@link Sequence}s to align
127         * @param type chosen type from list of pairwise sequence alignment routines
128         * @param gapPenalty the gap penalties used during alignment
129         * @param subMatrix the set of substitution scores used during alignment
130         * @return list of sequence alignment pairs
131         */
132        public static <S extends Sequence<C>, C extends Compound> List<SequencePair<S, C>> getAllPairsAlignments(
133                        List<S> sequences, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
134                        SubstitutionMatrix<C> subMatrix) {
135                return runPairwiseAligners(getAllPairsAligners(sequences, type, gapPenalty, subMatrix));
136        }
137
138        /**
139         * Factory method which computes a multiple sequence alignment for the given {@link List} of {@link Sequence}s.
140         *
141         * @param <S> each {@link Sequence} of the {@link List} is of type S
142         * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
143         * @param sequences the {@link List} of {@link Sequence}s to align
144         * @param settings optional settings that adjust the alignment
145         * @return multiple sequence alignment {@link Profile}
146         */
147        public static <S extends Sequence<C>, C extends Compound> Profile<S, C> getMultipleSequenceAlignment(
148                        List<S> sequences, Object... settings) { // TODO convert other factories to this parameter style?
149                CompoundSet<C> cs = sequences.get(0).getCompoundSet();
150                PairwiseSequenceScorerType ps = PairwiseSequenceScorerType.GLOBAL_IDENTITIES;
151                GapPenalty gapPenalty = new SimpleGapPenalty();
152                SubstitutionMatrix<C> subMatrix = null;
153                if (cs == AminoAcidCompoundSet.getAminoAcidCompoundSet()) {
154                        @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
155                        SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getBlosum62();
156                        subMatrix = temp;
157                } else if (cs == DNACompoundSet.getDNACompoundSet()) {
158                        @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
159                        SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getNuc4_4();
160                        subMatrix = temp;
161
162                } else if (cs == AmbiguityDNACompoundSet.getDNACompoundSet()) {
163                        @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
164                        SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getNuc4_4();
165                        subMatrix = temp;
166
167                }
168                ProfileProfileAlignerType pa = ProfileProfileAlignerType.GLOBAL;
169                for (Object o : settings) {
170                        if (o instanceof PairwiseSequenceScorerType) {
171                                ps = (PairwiseSequenceScorerType) o;
172                        } else if (o instanceof GapPenalty) {
173                                gapPenalty = (GapPenalty) o;
174                        } else if (o instanceof SubstitutionMatrix<?>) {
175                                if (cs != ((SubstitutionMatrix<?>) o).getCompoundSet()) {
176                                        throw new IllegalArgumentException(
177                                                        "Compound sets of the sequences and substitution matrix must match.");
178                                }
179                                @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
180                                SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) o;
181                                subMatrix = temp;
182                        } else if (o instanceof ProfileProfileAlignerType) {
183                                pa = (ProfileProfileAlignerType) o;
184                        }
185                }
186
187                // stage 1: pairwise similarity calculation
188                List<PairwiseSequenceScorer<S, C>> scorers = getAllPairsScorers(sequences, ps, gapPenalty, subMatrix);
189                runPairwiseScorers(scorers);
190
191                // stage 2: hierarchical clustering into a guide tree
192                GuideTree<S, C> tree = new GuideTree<>(sequences, scorers);
193                scorers = null;
194
195                // stage 3: progressive alignment
196                Profile<S, C> msa = getProgressiveAlignment(tree, pa, gapPenalty, subMatrix);
197
198                // TODO stage 4: refinement
199                return msa;
200        }
201
202        /**
203         * Factory method which computes a sequence alignment for the given {@link Sequence} pair.
204         *
205         * @param <S> each {@link Sequence} of the pair is of type S
206         * @param <C> each element of a sequence is a {@link Compound} of type C
207         * @param query the first {@link Sequence}s to align
208         * @param target the second {@link Sequence}s to align
209         * @param type chosen type from list of pairwise sequence alignment routines
210         * @param gapPenalty the gap penalties used during alignment
211         * @param subMatrix the set of substitution scores used during alignment
212         * @return sequence alignment pair
213         */
214        public static <S extends Sequence<C>, C extends Compound> SequencePair<S, C> getPairwiseAlignment(
215                        S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
216                        SubstitutionMatrix<C> subMatrix) {
217                return getPairwiseAligner(query, target, type, gapPenalty, subMatrix).getPair();
218        }
219
220        // default access (package private) factory methods
221
222        /**
223         * Factory method which sets up a sequence alignment for all {@link Sequence} pairs in the given {@link List}.
224         *
225         * @param <S> each {@link Sequence} of an alignment pair is of type S
226         * @param <C> each element of a sequence is a {@link Compound} of type C
227         * @param sequences the {@link List} of {@link Sequence}s to align
228         * @param type chosen type from list of pairwise sequence alignment routines
229         * @param gapPenalty the gap penalties used during alignment
230         * @param subMatrix the set of substitution scores used during alignment
231         * @return list of pairwise sequence aligners
232         */
233        static <S extends Sequence<C>, C extends Compound> List<PairwiseSequenceAligner<S, C>> getAllPairsAligners(
234                        List<S> sequences, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
235                        SubstitutionMatrix<C> subMatrix) {
236                List<PairwiseSequenceAligner<S, C>> allPairs = new ArrayList<>();
237                for (int i = 0; i < sequences.size(); i++) {
238                        for (int j = i+1; j < sequences.size(); j++) {
239                                allPairs.add(getPairwiseAligner(sequences.get(i), sequences.get(j), type, gapPenalty, subMatrix));
240                        }
241                }
242                return allPairs;
243        }
244
245        /**
246         * Factory method which sets up a sequence pair scorer for all {@link Sequence} pairs in the given {@link List}.
247         *
248         * @param <S> each {@link Sequence} of a pair is of type S
249         * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
250         * @param sequences the {@link List} of {@link Sequence}s to align
251         * @param type chosen type from list of pairwise sequence scoring routines
252         * @param gapPenalty the gap penalties used during alignment
253         * @param subMatrix the set of substitution scores used during alignment
254         * @return list of sequence pair scorers
255         */
256        public static <S extends Sequence<C>, C extends Compound> List<PairwiseSequenceScorer<S, C>> getAllPairsScorers(
257                        List<S> sequences, PairwiseSequenceScorerType type, GapPenalty gapPenalty,
258                        SubstitutionMatrix<C> subMatrix) {
259                List<PairwiseSequenceScorer<S, C>> allPairs = new ArrayList<>();
260                for (int i = 0; i < sequences.size(); i++) {
261                        for (int j = i+1; j < sequences.size(); j++) {
262                                allPairs.add(getPairwiseScorer(sequences.get(i), sequences.get(j), type, gapPenalty, subMatrix));
263                        }
264                }
265                return allPairs;
266        }
267
268        /**
269         * Factory method which computes a sequence pair score for all {@link Sequence} pairs in the given {@link List}.
270         * This method runs the scorings in parallel by submitting all of the scorings to the shared thread pool of the
271         * {@link ConcurrencyTools} utility.
272         *
273         * @param <S> each {@link Sequence} of a pair is of type S
274         * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
275         * @param sequences the {@link List} of {@link Sequence}s to align
276         * @param type chosen type from list of pairwise sequence scoring routines
277         * @param gapPenalty the gap penalties used during alignment
278         * @param subMatrix the set of substitution scores used during alignment
279         * @return list of sequence pair scores
280         */
281        public static <S extends Sequence<C>, C extends Compound> double[] getAllPairsScores( List<S> sequences,
282                        PairwiseSequenceScorerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
283                return runPairwiseScorers(getAllPairsScorers(sequences, type, gapPenalty, subMatrix));
284        }
285
286        /**
287         * Factory method which retrieves calculated elements from a list of tasks on the concurrent execution queue.
288         *
289         * @param <E> each task calculates a value of type E
290         * @param futures list of tasks
291         * @return calculated elements
292         */
293        static <E> List<E> getListFromFutures(List<Future<E>> futures) {
294                List<E> list = new ArrayList<>();
295                for (Future<E> f : futures) {
296                        // TODO when added to ConcurrencyTools, log completions and exceptions instead of printing stack traces
297                        try {
298                                list.add(f.get());
299                        } catch (InterruptedException e) {
300                                logger.error("Interrupted Exception: ", e);
301                        } catch (ExecutionException e) {
302                                logger.error("Execution Exception: ", e);
303                        }
304                }
305                return list;
306        }
307
308        /**
309         * Factory method which constructs a pairwise sequence aligner.
310         *
311         * @param <S> each {@link Sequence} of an alignment pair is of type S
312         * @param <C> each element of a sequence is a {@link Compound} of type C
313         * @param query the first {@link Sequence} to align
314         * @param target the second {@link Sequence} to align
315         * @param type chosen type from list of pairwise sequence alignment routines
316         * @param gapPenalty the gap penalties used during alignment
317         * @param subMatrix the set of substitution scores used during alignment
318         * @return pairwise sequence aligner
319         */
320        public static <S extends Sequence<C>, C extends Compound> PairwiseSequenceAligner<S, C> getPairwiseAligner(
321                        S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
322                        SubstitutionMatrix<C> subMatrix) {
323                if (!query.getCompoundSet().equals(target.getCompoundSet())) {
324                        throw new IllegalArgumentException("Sequence compound sets must be the same");
325                }
326                switch (type) {
327                default:
328                case GLOBAL:
329                        return new NeedlemanWunsch<>(query, target, gapPenalty, subMatrix);
330                case LOCAL:
331                        return new SmithWaterman<>(query, target, gapPenalty, subMatrix);
332                case GLOBAL_LINEAR_SPACE:
333                case LOCAL_LINEAR_SPACE:
334                        // TODO other alignment options (Myers-Miller, Thompson)
335                        throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " +
336                                        type + " alignment");
337                }
338        }
339
340        /**
341         * Factory method which computes a similarity score for the given {@link Sequence} pair.
342         *
343         * @param <S> each {@link Sequence} of the pair is of type S
344         * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
345         * @param query the first {@link Sequence} to score
346         * @param target the second {@link Sequence} to score
347         * @param type chosen type from list of pairwise sequence scoring routines
348         * @param gapPenalty the gap penalties used during alignment
349         * @param subMatrix the set of substitution scores used during alignment
350         * @return sequence pair score
351         */
352        static <S extends Sequence<C>, C extends Compound> double getPairwiseScore(S query, S target,
353                        PairwiseSequenceScorerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
354                return getPairwiseScorer(query, target, type, gapPenalty, subMatrix).getScore();
355        }
356
357        /**
358         * Factory method which constructs a pairwise sequence scorer.
359         *
360         * @param <S> each {@link Sequence} of a pair is of type S
361         * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
362         * @param query the first {@link Sequence} to score
363         * @param target the second {@link Sequence} to score
364         * @param type chosen type from list of pairwise sequence scoring routines
365         * @param gapPenalty the gap penalties used during alignment
366         * @param subMatrix the set of substitution scores used during alignment
367         * @return sequence pair scorer
368         */
369        static <S extends Sequence<C>, C extends Compound> PairwiseSequenceScorer<S, C> getPairwiseScorer(
370                        S query, S target, PairwiseSequenceScorerType type, GapPenalty gapPenalty,
371                        SubstitutionMatrix<C> subMatrix) {
372                switch (type) {
373                default:
374                case GLOBAL:
375                        return getPairwiseAligner(query, target, PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix);
376                case GLOBAL_IDENTITIES:
377                        return new FractionalIdentityScorer<>(getPairwiseAligner(query, target,
378                                        PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix));
379                case GLOBAL_SIMILARITIES:
380                        return new FractionalSimilarityScorer<>(getPairwiseAligner(query, target,
381                                        PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix));
382                case LOCAL:
383                        return getPairwiseAligner(query, target, PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix);
384                case LOCAL_IDENTITIES:
385                        return new FractionalIdentityScorer<>(getPairwiseAligner(query, target,
386                                        PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix));
387                case LOCAL_SIMILARITIES:
388                        return new FractionalSimilarityScorer<>(getPairwiseAligner(query, target,
389                                        PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix));
390                case KMERS:
391                case WU_MANBER:
392                        // TODO other scoring options
393                        throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " +
394                                        type + " scoring");
395                }
396        }
397
398        /**
399         * Factory method which constructs a profile-profile aligner.
400         *
401         * @param <S> each {@link Sequence} of an alignment profile is of type S
402         * @param <C> each element of a sequence is a {@link Compound} of type C
403         * @param profile1 the first {@link Profile} to align
404         * @param profile2 the second {@link Profile} to align
405         * @param type chosen type from list of profile-profile alignment routines
406         * @param gapPenalty the gap penalties used during alignment
407         * @param subMatrix the set of substitution scores used during alignment
408         * @return profile-profile aligner
409         */
410        static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
411                        Profile<S, C> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, GapPenalty gapPenalty,
412                        SubstitutionMatrix<C> subMatrix) {
413                switch (type) {
414                default:
415                case GLOBAL:
416                        return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix);
417                case GLOBAL_LINEAR_SPACE:
418                case GLOBAL_CONSENSUS:
419                case LOCAL:
420                case LOCAL_LINEAR_SPACE:
421                case LOCAL_CONSENSUS:
422                        // TODO other alignment options (Myers-Miller, consensus, local)
423                        throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " +
424                                        type + " alignment");
425                }
426        }
427
428        /**
429         * Factory method which constructs a profile-profile aligner.
430         *
431         * @param <S> each {@link Sequence} of an alignment profile is of type S
432         * @param <C> each element of a sequence is a {@link Compound} of type C
433         * @param profile1 the first {@link Profile} to align
434         * @param profile2 the second {@link Profile} to align
435         * @param type chosen type from list of profile-profile alignment routines
436         * @param gapPenalty the gap penalties used during alignment
437         * @param subMatrix the set of substitution scores used during alignment
438         * @return profile-profile aligner
439         */
440        static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
441                        Future<ProfilePair<S, C>> profile1, Future<ProfilePair<S, C>> profile2, ProfileProfileAlignerType type,
442                        GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
443                switch (type) {
444                default:
445                case GLOBAL:
446                        return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix);
447                case GLOBAL_LINEAR_SPACE:
448                case GLOBAL_CONSENSUS:
449                case LOCAL:
450                case LOCAL_LINEAR_SPACE:
451                case LOCAL_CONSENSUS:
452                        // TODO other alignment options (Myers-Miller, consensus, local)
453                        throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " +
454                                        type + " alignment");
455                }
456        }
457
458        /**
459         * Factory method which constructs a profile-profile aligner.
460         *
461         * @param <S> each {@link Sequence} of an alignment profile is of type S
462         * @param <C> each element of a sequence is a {@link Compound} of type C
463         * @param profile1 the first {@link Profile} to align
464         * @param profile2 the second {@link Profile} to align
465         * @param type chosen type from list of profile-profile alignment routines
466         * @param gapPenalty the gap penalties used during alignment
467         * @param subMatrix the set of substitution scores used during alignment
468         * @return profile-profile aligner
469         */
470        static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
471                        Profile<S, C> profile1, Future<ProfilePair<S, C>> profile2, ProfileProfileAlignerType type,
472                        GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
473                switch (type) {
474                default:
475                case GLOBAL:
476                        return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix);
477                case GLOBAL_LINEAR_SPACE:
478                case GLOBAL_CONSENSUS:
479                case LOCAL:
480                case LOCAL_LINEAR_SPACE:
481                case LOCAL_CONSENSUS:
482                        // TODO other alignment options (Myers-Miller, consensus, local)
483                        throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " +
484                                        type + " alignment");
485                }
486        }
487
488        /**
489         * Factory method which constructs a profile-profile aligner.
490         *
491         * @param <S> each {@link Sequence} of an alignment profile is of type S
492         * @param <C> each element of a sequence is a {@link Compound} of type C
493         * @param profile1 the first {@link Profile} to align
494         * @param profile2 the second {@link Profile} to align
495         * @param type chosen type from list of profile-profile alignment routines
496         * @param gapPenalty the gap penalties used during alignment
497         * @param subMatrix the set of substitution scores used during alignment
498         * @return profile-profile aligner
499         */
500        static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
501                        Future<ProfilePair<S, C>> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type,
502                        GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
503                switch (type) {
504                default:
505                case GLOBAL:
506                        return new SimpleProfileProfileAligner<>(profile1, profile2, gapPenalty, subMatrix);
507                case GLOBAL_LINEAR_SPACE:
508                case GLOBAL_CONSENSUS:
509                case LOCAL:
510                case LOCAL_LINEAR_SPACE:
511                case LOCAL_CONSENSUS:
512                        // TODO other alignment options (Myers-Miller, consensus, local)
513                        throw new UnsupportedOperationException(Alignments.class.getSimpleName() + " does not yet support " +
514                                        type + " alignment");
515                }
516        }
517
518        /**
519         * Factory method which computes a profile alignment for the given {@link Profile} pair.
520         *
521         * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S
522         * @param <C> each element of a sequence is a {@link Compound} of type C
523         * @param profile1 the first {@link Profile} to align
524         * @param profile2 the second {@link Profile} to align
525         * @param type chosen type from list of profile-profile alignment routines
526         * @param gapPenalty the gap penalties used during alignment
527         * @param subMatrix the set of substitution scores used during alignment
528         * @return alignment profile
529         */
530        static <S extends Sequence<C>, C extends Compound> ProfilePair<S, C> getProfileProfileAlignment(
531                        Profile<S, C> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, GapPenalty gapPenalty,
532                        SubstitutionMatrix<C> subMatrix) {
533                return getProfileProfileAligner(profile1, profile2, type, gapPenalty, subMatrix).getPair();
534        }
535
536        /**
537         * Factory method to run the profile-profile alignments of a progressive multiple sequence alignment concurrently.
538         * This method runs the alignments in parallel by submitting all of the alignment tasks to the shared thread pool
539         * of the {@link ConcurrencyTools} utility.
540         *
541         * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S
542         * @param <C> each element of a sequence is a {@link Compound} of type C
543         * @param tree guide tree to follow aligning profiles from leaves to root
544         * @param type chosen type from list of profile-profile alignment routines
545         * @param gapPenalty the gap penalties used during alignment
546         * @param subMatrix the set of substitution scores used during alignment
547         * @return multiple sequence alignment
548         */
549        public static <S extends Sequence<C>, C extends Compound> Profile<S, C> getProgressiveAlignment(GuideTree<S, C> tree,
550                        ProfileProfileAlignerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
551
552                // find inner nodes in post-order traversal of tree (each leaf node has a single sequence profile)
553                List<GuideTreeNode<S, C>> innerNodes = new ArrayList<>();
554                for (GuideTreeNode<S, C> n : tree) {
555                        if (n.getProfile() == null) {
556                                innerNodes.add(n);
557                        }
558                }
559
560                // submit alignment tasks to the shared thread pool
561                int i = 1, all = innerNodes.size();
562                for (GuideTreeNode<S, C> n : innerNodes) {
563                        Profile<S, C> p1 = n.getChild1().getProfile(), p2 = n.getChild2().getProfile();
564                        Future<ProfilePair<S, C>> pf1 = n.getChild1().getProfileFuture(), pf2 = n.getChild2().getProfileFuture();
565                        ProfileProfileAligner<S, C> aligner =
566                                        (p1 != null) ? ((p2 != null) ? getProfileProfileAligner(p1, p2, type, gapPenalty, subMatrix) :
567                                                        getProfileProfileAligner(p1, pf2, type, gapPenalty, subMatrix)) :
568                                        ((p2 != null) ? getProfileProfileAligner(pf1, p2, type, gapPenalty, subMatrix) :
569                                                        getProfileProfileAligner(pf1, pf2, type, gapPenalty, subMatrix));
570                        n.setProfileFuture(ConcurrencyTools.submit(new CallableProfileProfileAligner<S, C>(aligner), String.format(
571                                        "Aligning pair %d of %d", i++, all)));
572                }
573
574                // retrieve the alignment results
575                for (GuideTreeNode<S, C> n : innerNodes) {
576                        // TODO when added to ConcurrencyTools, log completions and exceptions instead of printing stack traces
577                        try {
578                                n.setProfile(n.getProfileFuture().get());
579                        } catch (InterruptedException e) {
580                                logger.error("Interrupted Exception: ", e);
581                        } catch (ExecutionException e) {
582                                logger.error("Execution Exception: ", e);
583                        }
584                }
585
586                // the alignment profile at the root of the tree is the full multiple sequence alignment
587                return tree.getRoot().getProfile();
588        }
589
590        /**
591         * Factory method to run a list of alignments concurrently.  This method runs the alignments in parallel by
592         * submitting all of the alignment tasks to the shared thread pool of the {@link ConcurrencyTools} utility.
593         *
594         * @param <S> each {@link Sequence} of an alignment pair is of type S
595         * @param <C> each element of a sequence is a {@link Compound} of type C
596         * @param aligners list of alignments to run
597         * @return list of {@link SequencePair} results from running alignments
598         */
599        static <S extends Sequence<C>, C extends Compound> List<SequencePair<S, C>>
600                        runPairwiseAligners(List<PairwiseSequenceAligner<S, C>> aligners) {
601                int n = 1, all = aligners.size();
602                List<Future<SequencePair<S, C>>> futures = new ArrayList<>();
603                for (PairwiseSequenceAligner<S, C> aligner : aligners) {
604                        futures.add(ConcurrencyTools.submit(new CallablePairwiseSequenceAligner<S, C>(aligner),
605                                        String.format("Aligning pair %d of %d", n++, all)));
606                }
607                return getListFromFutures(futures);
608        }
609
610        /**
611         * Factory method to run a list of scorers concurrently.  This method runs the scorers in parallel by submitting
612         * all of the scoring tasks to the shared thread pool of the {@link ConcurrencyTools} utility.
613         *
614         * @param <S> each {@link Sequence} of an alignment pair is of type S
615         * @param <C> each element of a sequence is a {@link Compound} of type C
616         * @param scorers list of scorers to run
617         * @return list of score results from running scorers
618         */
619        public static <S extends Sequence<C>, C extends Compound> double[] runPairwiseScorers(
620                        List<PairwiseSequenceScorer<S, C>> scorers) {
621                int n = 1, all = scorers.size();
622                List<Future<Double>> futures = new ArrayList<>();
623                for (PairwiseSequenceScorer<S, C> scorer : scorers) {
624                        futures.add(ConcurrencyTools.submit(new CallablePairwiseSequenceScorer<S, C>(scorer),
625                                        String.format("Scoring pair %d of %d", n++, all)));
626                }
627                List<Double> results = getListFromFutures(futures);
628                double[] scores = new double[results.size()];
629                for (int i = 0; i < scores.length; i++) {
630                        scores[i] = results.get(i);
631                }
632                return scores;
633        }
634
635        /**
636         * Factory method to run a list of alignments concurrently.  This method runs the alignments in parallel by
637         * submitting all of the alignment tasks to the shared thread pool of the {@link ConcurrencyTools} utility.
638         *
639         * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S
640         * @param <C> each element of a sequence is a {@link Compound} of type C
641         * @param aligners list of alignments to run
642         * @return list of {@link ProfilePair} results from running alignments
643         */
644        static <S extends Sequence<C>, C extends Compound> List<ProfilePair<S, C>>
645                        runProfileAligners(List<ProfileProfileAligner<S, C>> aligners) {
646                int n = 1, all = aligners.size();
647                List<Future<ProfilePair<S, C>>> futures = new ArrayList<>();
648                for (ProfileProfileAligner<S, C> aligner : aligners) {
649                        futures.add(ConcurrencyTools.submit(new CallableProfileProfileAligner<S, C>(aligner),
650                                        String.format("Aligning pair %d of %d", n++, all)));
651                }
652                return getListFromFutures(futures);
653        }
654
655}