001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on August 11, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.alignment.routines; 025 026import org.biojava.nbio.core.alignment.SimpleSequencePair; 027import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor; 028import org.biojava.nbio.alignment.template.AbstractPairwiseSequenceAligner; 029import org.biojava.nbio.core.alignment.template.AlignedSequence; 030import org.biojava.nbio.core.alignment.template.AlignedSequence.Step; 031import org.biojava.nbio.alignment.template.GapPenalty; 032import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 033import org.biojava.nbio.core.sequence.template.Compound; 034import org.biojava.nbio.core.sequence.template.Sequence; 035 036import java.util.ArrayList; 037import java.util.List; 038 039/** 040 * This algorithm uses a divide-and-conquer approach to find optimal pairwise global sequence alignments (from the 041 * first until the last {@link Compound} of each {@link Sequence}) with the restriction that any alignment produced 042 * will connect the query sequence to the target sequence at the <em>anchors</em>. This class performs such global 043 * sequence comparisons efficiently by dynamic programming with a space requirement reduced from quadratic (a multiple 044 * of query sequence length times target sequence length) to only linear (a multiple of query sequence length). The 045 * counterpoint to this reduction in space complexity is a modest (a multiple < 2) increase in time. 046 * 047 * @author Mark Chapman 048 * @author Daniel Cameron 049 * @param <S> each {@link Sequence} of the alignment pair is of type S 050 * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C 051 */ 052public class AnchoredPairwiseSequenceAligner<S extends Sequence<C>, C extends Compound> extends 053 AbstractPairwiseSequenceAligner<S, C> { 054 055 /** 056 * Before running a pairwise global sequence alignment, data must be sent in via calls to 057 * {@link #setQuery(Sequence)}, {@link #setTarget(Sequence)}, {@link #setGapPenalty(GapPenalty)}, and 058 * {@link #setSubstitutionMatrix(SubstitutionMatrix)}. 059 */ 060 public AnchoredPairwiseSequenceAligner() { 061 } 062 063 /** 064 * Prepares for a pairwise global sequence alignment. 065 * 066 * @param query the first {@link Sequence} of the pair to align 067 * @param target the second {@link Sequence} of the pair to align 068 * @param gapPenalty the gap penalties used during alignment 069 * @param subMatrix the set of substitution scores used during alignment 070 * @param cutsPerSection the number of cuts added to each section during each pass 071 */ 072 public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) { 073 this(query, target, gapPenalty, subMatrix, null); 074 } 075 076 /** 077 * Prepares for a pairwise global sequence alignment. 078 * 079 * @param query the first {@link Sequence} of the pair to align 080 * @param target the second {@link Sequence} of the pair to align 081 * @param gapPenalty the gap penalties used during alignment 082 * @param subMatrix the set of substitution scores used during alignment 083 * @param cutsPerSection the number of cuts added to each section during each pass 084 * @param anchors the initial list of anchors 085 */ 086 public AnchoredPairwiseSequenceAligner(S query, S target, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix, int[] anchors) { 087 super(query, target, gapPenalty, subMatrix); 088 setAnchors(anchors); 089 } 090 091 /** 092 * Returns the list of anchors. The populated elements correspond to query compounds with a connection established 093 * to a target compound. 094 * 095 * @return the list of anchors 096 */ 097 public int[] getAnchors() { 098 int[] anchor = new int[getScoreMatrixDimensions()[0] - 1]; 099 for (int i = 0; i < anchor.length; i++) { 100 anchor[i] = -1; 101 } 102 for (int i = 0; i < anchors.size(); i++) { 103 anchor[anchors.get(i).getQueryIndex()] = anchors.get(i).getTargetIndex(); 104 } 105 return anchor; 106 } 107 108 /** 109 * Sets the starting list of anchors before running the alignment routine. 110 * 111 * @param anchors list of points that are tied to the given indices in the target 112 */ 113 public void setAnchors(int[] anchors) { 114 super.anchors = new ArrayList<Anchor>(); 115 if (anchors != null) { 116 for (int i = 0; i < anchors.length; i++) { 117 if (anchors[i] >= 0) { 118 addAnchor(i, anchors[i]); 119 } 120 } 121 } 122 } 123 /** 124 * Adds an additional anchor to the set of anchored compounds 125 * @param queryIndex 0-based index of query sequence compound 126 * @param targetIndex 0-base index of target sequence compound to anchor to 127 */ 128 public void addAnchor(int queryIndex, int targetIndex) { 129 anchors.add(new Anchor(queryIndex, targetIndex)); 130 } 131 132 // method for AbstractMatrixAligner 133 134 @Override 135 protected void setProfile(List<Step> sx, List<Step> sy) { 136 profile = pair = new SimpleSequencePair<S, C>(getQuery(), getTarget(), sx, sy); 137 } 138 139}