001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on August 11, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.alignment.routines;
025
026import org.biojava.nbio.core.alignment.SimpleSequencePair;
027import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor;
028import org.biojava.nbio.alignment.template.AbstractPairwiseSequenceAligner;
029import org.biojava.nbio.core.alignment.template.AlignedSequence;
030import org.biojava.nbio.core.alignment.template.AlignedSequence.Step;
031import org.biojava.nbio.alignment.template.GapPenalty;
032import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
033import org.biojava.nbio.core.sequence.template.Compound;
034import org.biojava.nbio.core.sequence.template.Sequence;
035
036import java.util.ArrayList;
037import java.util.List;
038
039/**
040 * This algorithm uses a divide-and-conquer approach to find optimal pairwise global sequence alignments (from the
041 * first until the last {@link Compound} of each {@link Sequence}) with the restriction that any alignment produced
042 * will connect the query sequence to the target sequence at the <em>anchors</em>.  This class performs such global
043 * sequence comparisons efficiently by dynamic programming with a space requirement reduced from quadratic (a multiple
044 * of query sequence length times target sequence length) to only linear (a multiple of query sequence length).  The
045 * counterpoint to this reduction in space complexity is a modest (a multiple &lt; 2) increase in time.
046 *
047 * @author Mark Chapman
048 * @author Daniel Cameron
049 * @param <S> each {@link Sequence} of the alignment pair is of type S
050 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
051 */
052public class AnchoredPairwiseSequenceAligner<S extends Sequence<C>, C extends Compound> extends
053                AbstractPairwiseSequenceAligner<S, C> {
054
055        /**
056         * Before running a pairwise global sequence alignment, data must be sent in via calls to
057         * {@link #setQuery(Sequence)}, {@link #setTarget(Sequence)}, {@link #setGapPenalty(GapPenalty)}, and
058         * {@link #setSubstitutionMatrix(SubstitutionMatrix)}.
059         */
060        public AnchoredPairwiseSequenceAligner() {
061        }
062
063        /**
064         * Prepares for a pairwise global sequence alignment.
065         *
066         * @param query the first {@link Sequence} of the pair to align
067         * @param target the second {@link Sequence} of the pair to align
068         * @param gapPenalty the gap penalties used during alignment
069         * @param subMatrix the set of substitution scores used during alignment
070         */
071        public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
072                this(query, target, gapPenalty, subMatrix, null);
073        }
074
075        /**
076         * Prepares for a pairwise global sequence alignment.
077         *
078         * @param query the first {@link Sequence} of the pair to align
079         * @param target the second {@link Sequence} of the pair to align
080         * @param gapPenalty the gap penalties used during alignment
081         * @param subMatrix the set of substitution scores used during alignment
082         * @param anchors the initial list of anchors
083         */
084        public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix, int[] anchors) {
085                super(query, target, gapPenalty, subMatrix);
086                setAnchors(anchors);
087        }
088
089        /**
090         * Returns the list of anchors.  The populated elements correspond to query compounds with a connection established
091         * to a target compound.
092         *
093         * @return the list of anchors
094         */
095        public int[] getAnchors() {
096                int[] anchor = new int[getScoreMatrixDimensions()[0] - 1];
097                for (int i = 0; i < anchor.length; i++) {
098                        anchor[i] = -1;
099                }
100                for (int i = 0; i < anchors.size(); i++) {
101                        anchor[anchors.get(i).getQueryIndex()] = anchors.get(i).getTargetIndex();
102                }
103                return anchor;
104        }
105
106        /**
107         * Sets the starting list of anchors before running the alignment routine.
108         *
109         * @param anchors list of points that are tied to the given indices in the target
110         */
111        public void setAnchors(int[] anchors) {
112                super.anchors = new ArrayList<>();
113                if (anchors != null) {
114                        for (int i = 0; i < anchors.length; i++) {
115                                if (anchors[i] >= 0) {
116                                        addAnchor(i, anchors[i]);
117                                }
118                        }
119                }
120        }
121        /**
122         * Adds an additional anchor to the set of anchored compounds
123         * @param queryIndex 0-based index of query sequence compound
124         * @param targetIndex 0-base index of target sequence compound to anchor to
125         */
126        public void addAnchor(int queryIndex, int targetIndex) {
127                anchors.add(new Anchor(queryIndex, targetIndex));
128        }
129
130        // method for AbstractMatrixAligner
131
132        @Override
133        protected void setProfile(List<Step> sx, List<Step> sy) {
134                profile = pair = new SimpleSequencePair<>(getQuery(), getTarget(), sx, sy);
135        }
136
137}