001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on August 11, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.alignment.routines;
025
026import org.biojava.nbio.core.alignment.SimpleSequencePair;
027import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor;
028import org.biojava.nbio.alignment.template.AbstractPairwiseSequenceAligner;
029import org.biojava.nbio.core.alignment.template.AlignedSequence;
030import org.biojava.nbio.core.alignment.template.AlignedSequence.Step;
031import org.biojava.nbio.alignment.template.GapPenalty;
032import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
033import org.biojava.nbio.core.sequence.template.Compound;
034import org.biojava.nbio.core.sequence.template.Sequence;
035
036import java.util.ArrayList;
037import java.util.List;
038
039/**
040 * This algorithm uses a divide-and-conquer approach to find optimal pairwise global sequence alignments (from the
041 * first until the last {@link Compound} of each {@link Sequence}) with the restriction that any alignment produced
042 * will connect the query sequence to the target sequence at the <em>anchors</em>.  This class performs such global
043 * sequence comparisons efficiently by dynamic programming with a space requirement reduced from quadratic (a multiple
044 * of query sequence length times target sequence length) to only linear (a multiple of query sequence length).  The
045 * counterpoint to this reduction in space complexity is a modest (a multiple < 2) increase in time.
046 *
047 * @author Mark Chapman
048 * @author Daniel Cameron
049 * @param <S> each {@link Sequence} of the alignment pair is of type S
050 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
051 */
052public class AnchoredPairwiseSequenceAligner<S extends Sequence<C>, C extends Compound> extends
053                AbstractPairwiseSequenceAligner<S, C> {
054
055        /**
056         * Before running a pairwise global sequence alignment, data must be sent in via calls to
057         * {@link #setQuery(Sequence)}, {@link #setTarget(Sequence)}, {@link #setGapPenalty(GapPenalty)}, and
058         * {@link #setSubstitutionMatrix(SubstitutionMatrix)}.
059         */
060        public AnchoredPairwiseSequenceAligner() {
061        }
062
063        /**
064         * Prepares for a pairwise global sequence alignment.
065         *
066         * @param query the first {@link Sequence} of the pair to align
067         * @param target the second {@link Sequence} of the pair to align
068         * @param gapPenalty the gap penalties used during alignment
069         * @param subMatrix the set of substitution scores used during alignment
070         * @param cutsPerSection the number of cuts added to each section during each pass
071         */
072        public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
073                this(query, target, gapPenalty, subMatrix, null);
074        }
075
076        /**
077         * Prepares for a pairwise global sequence alignment.
078         *
079         * @param query the first {@link Sequence} of the pair to align
080         * @param target the second {@link Sequence} of the pair to align
081         * @param gapPenalty the gap penalties used during alignment
082         * @param subMatrix the set of substitution scores used during alignment
083         * @param cutsPerSection the number of cuts added to each section during each pass
084         * @param anchors the initial list of anchors
085         */
086        public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix, int[] anchors) {
087                super(query, target, gapPenalty, subMatrix);
088                setAnchors(anchors);
089        }
090
091        /**
092         * Returns the list of anchors.  The populated elements correspond to query compounds with a connection established
093         * to a target compound.
094         *
095         * @return the list of anchors
096         */
097        public int[] getAnchors() {
098                int[] anchor = new int[getScoreMatrixDimensions()[0] - 1];
099                for (int i = 0; i < anchor.length; i++) {
100                        anchor[i] = -1;
101                }
102                for (int i = 0; i < anchors.size(); i++) {
103                        anchor[anchors.get(i).getQueryIndex()] = anchors.get(i).getTargetIndex();
104                }
105                return anchor;
106        }
107
108        /**
109         * Sets the starting list of anchors before running the alignment routine.
110         *
111         * @param anchors list of points that are tied to the given indices in the target
112         */
113        public void setAnchors(int[] anchors) {
114                super.anchors = new ArrayList<Anchor>();
115                if (anchors != null) {
116                        for (int i = 0; i < anchors.length; i++) {
117                                if (anchors[i] >= 0) {
118                                        addAnchor(i, anchors[i]);
119                                }
120                        }
121                }
122        }
123        /**
124         * Adds an additional anchor to the set of anchored compounds
125         * @param queryIndex 0-based index of query sequence compound
126         * @param targetIndex 0-base index of target sequence compound to anchor to
127         */
128        public void addAnchor(int queryIndex, int targetIndex) {
129                anchors.add(new Anchor(queryIndex, targetIndex));
130        }
131
132        // method for AbstractMatrixAligner
133
134        @Override
135        protected void setProfile(List<Step> sx, List<Step> sy) {
136                profile = pair = new SimpleSequencePair<S, C>(getQuery(), getTarget(), sx, sy);
137        }
138
139}