001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on August 11, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.alignment.routines; 025 026import org.biojava.nbio.core.alignment.SimpleSequencePair; 027import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor; 028import org.biojava.nbio.alignment.template.AbstractPairwiseSequenceAligner; 029import org.biojava.nbio.core.alignment.template.AlignedSequence; 030import org.biojava.nbio.core.alignment.template.AlignedSequence.Step; 031import org.biojava.nbio.alignment.template.GapPenalty; 032import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 033import org.biojava.nbio.core.sequence.template.Compound; 034import org.biojava.nbio.core.sequence.template.Sequence; 035 036import java.util.ArrayList; 037import java.util.List; 038 039/** 040 * This algorithm uses a divide-and-conquer approach to find optimal pairwise global sequence alignments (from the 041 * first until the last {@link Compound} of each {@link Sequence}) with the restriction that any alignment produced 042 * will connect the query sequence to the target sequence at the <em>anchors</em>. This class performs such global 043 * sequence comparisons efficiently by dynamic programming with a space requirement reduced from quadratic (a multiple 044 * of query sequence length times target sequence length) to only linear (a multiple of query sequence length). The 045 * counterpoint to this reduction in space complexity is a modest (a multiple < 2) increase in time. 046 * 047 * @author Mark Chapman 048 * @author Daniel Cameron 049 * @param <S> each {@link Sequence} of the alignment pair is of type S 050 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C 051 */ 052public class AnchoredPairwiseSequenceAligner<S extends Sequence<C>, C extends Compound> extends 053 AbstractPairwiseSequenceAligner<S, C> { 054 055 /** 056 * Before running a pairwise global sequence alignment, data must be sent in via calls to 057 * {@link #setQuery(Sequence)}, {@link #setTarget(Sequence)}, {@link #setGapPenalty(GapPenalty)}, and 058 * {@link #setSubstitutionMatrix(SubstitutionMatrix)}. 059 */ 060 public AnchoredPairwiseSequenceAligner() { 061 } 062 063 /** 064 * Prepares for a pairwise global sequence alignment. 065 * 066 * @param query the first {@link Sequence} of the pair to align 067 * @param target the second {@link Sequence} of the pair to align 068 * @param gapPenalty the gap penalties used during alignment 069 * @param subMatrix the set of substitution scores used during alignment 070 */ 071 public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 072 this(query, target, gapPenalty, subMatrix, null); 073 } 074 075 /** 076 * Prepares for a pairwise global sequence alignment. 077 * 078 * @param query the first {@link Sequence} of the pair to align 079 * @param target the second {@link Sequence} of the pair to align 080 * @param gapPenalty the gap penalties used during alignment 081 * @param subMatrix the set of substitution scores used during alignment 082 * @param anchors the initial list of anchors 083 */ 084 public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix, int[] anchors) { 085 super(query, target, gapPenalty, subMatrix); 086 setAnchors(anchors); 087 } 088 089 /** 090 * Returns the list of anchors. The populated elements correspond to query compounds with a connection established 091 * to a target compound. 092 * 093 * @return the list of anchors 094 */ 095 public int[] getAnchors() { 096 int[] anchor = new int[getScoreMatrixDimensions()[0] - 1]; 097 for (int i = 0; i < anchor.length; i++) { 098 anchor[i] = -1; 099 } 100 for (int i = 0; i < anchors.size(); i++) { 101 anchor[anchors.get(i).getQueryIndex()] = anchors.get(i).getTargetIndex(); 102 } 103 return anchor; 104 } 105 106 /** 107 * Sets the starting list of anchors before running the alignment routine. 108 * 109 * @param anchors list of points that are tied to the given indices in the target 110 */ 111 public void setAnchors(int[] anchors) { 112 super.anchors = new ArrayList<>(); 113 if (anchors != null) { 114 for (int i = 0; i < anchors.length; i++) { 115 if (anchors[i] >= 0) { 116 addAnchor(i, anchors[i]); 117 } 118 } 119 } 120 } 121 /** 122 * Adds an additional anchor to the set of anchored compounds 123 * @param queryIndex 0-based index of query sequence compound 124 * @param targetIndex 0-base index of target sequence compound to anchor to 125 */ 126 public void addAnchor(int queryIndex, int targetIndex) { 127 anchors.add(new Anchor(queryIndex, targetIndex)); 128 } 129 130 // method for AbstractMatrixAligner 131 132 @Override 133 protected void setProfile(List<Step> sx, List<Step> sy) { 134 profile = pair = new SimpleSequencePair<>(getQuery(), getTarget(), sx, sy); 135 } 136 137}