001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on July 2, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.alignment.template;
025
026import org.biojava.nbio.core.alignment.template.Profile;
027import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
028import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor;
029import org.biojava.nbio.alignment.routines.AlignerHelper.Last;
030import org.biojava.nbio.alignment.routines.AlignerHelper.Subproblem;
031import org.biojava.nbio.core.alignment.template.AlignedSequence.Step;
032import org.biojava.nbio.core.sequence.template.Compound;
033import org.biojava.nbio.core.sequence.template.CompoundSet;
034import org.biojava.nbio.core.sequence.template.Sequence;
035
036import java.util.ArrayList;
037import java.util.Arrays;
038import java.util.List;
039
040import static org.biojava.nbio.alignment.routines.AlignerHelper.setScoreVector;
041import static org.biojava.nbio.alignment.routines.AlignerHelper.setSteps;
042
043/**
044 * Implements common code for an {@link Aligner} which builds a score matrix during computation.
045 *
046 * @author Mark Chapman
047 * @author Daniel Cameron
048 * @param <S> each element of the alignment {@link Profile} is of type S
049 * @param <C> each element of a sequence is a {@link Compound} of type C
050 */
051public abstract class AbstractMatrixAligner<S extends Sequence<C>, C extends Compound> extends AbstractScorer
052                implements MatrixAligner<S, C> {
053
054        // input fields
055        protected GapPenalty gapPenalty;
056        private SubstitutionMatrix<C> subMatrix;
057        private boolean local, storingScoreMatrix;
058        protected List<Anchor> anchors = new ArrayList<>();
059        protected int cutsPerSection;
060
061        // output fields
062        protected Profile<S, C> profile;
063        /**
064         * Start position of the aligned sequence in the query and target respectively
065         */
066        protected int[] xyStart;
067        /**
068         * End position of the aligned sequence in the query and target respectively
069         */
070        protected int[] xyMax;
071        protected int max, min, score;
072        /**
073         * Dynamic programming score matrix
074         * The first dimension has the length of the first (query) sequence + 1
075         * The second has the length of the second (target) sequence + 1
076         * The third has length 1 for linear gap penalty and 3 for affine/constant gap
077         * (one each for match/substitution, deletion, insertion)
078         */
079        protected int[][][] scores;
080        /**
081         * Friendly name of each copy of the scoring matrix.
082         * The number of elements must match the number of elements in third dimension of @see scores
083         */
084        private String[] types;
085        protected long time = -1;
086
087        /**
088         * Before running an alignment, data must be sent in via calls to {@link #setGapPenalty(GapPenalty)} and
089         * {@link #setSubstitutionMatrix(SubstitutionMatrix)}.
090         */
091        protected AbstractMatrixAligner() {
092        }
093
094        /**
095         * Prepares for an alignment.
096         *
097         * @param gapPenalty the gap penalties used during alignment
098         * @param subMatrix the set of substitution scores used during alignment
099         */
100        protected AbstractMatrixAligner(GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
101                this(gapPenalty, subMatrix, false);
102        }
103
104        /**
105         * Prepares for an alignment.
106         *
107         * @param gapPenalty the gap penalties used during alignment
108         * @param subMatrix the set of substitution scores used during alignment
109         * @param local if true, find a region of similarity rather than aligning every compound
110         */
111        protected AbstractMatrixAligner(GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix, boolean local) {
112                this.gapPenalty = gapPenalty;
113                this.subMatrix = subMatrix;
114                this.local = local;
115                reset();
116        }
117
118        /**
119         * Returns the gap penalties.
120         *
121         * @return the gap penalties used during alignment
122         */
123        public GapPenalty getGapPenalty() {
124                return gapPenalty;
125        }
126
127        /**
128         * Returns the substitution matrix.
129         *
130         * @return the set of substitution scores used during alignment
131         */
132        public SubstitutionMatrix<C> getSubstitutionMatrix() {
133                return subMatrix;
134        }
135
136        /**
137         * Returns whether alignment finds a region of similarity rather than aligning every compound.
138         *
139         * @return true if alignment finds a region of similarity rather than aligning every compound
140         */
141        public boolean isLocal() {
142                return local;
143        }
144
145        /**
146         * Returns choice to cache the score matrix or to save memory by deleting score matrix after alignment.
147         *
148         * @return choice to cache the score matrix
149         */
150        public boolean isStoringScoreMatrix() {
151                return storingScoreMatrix;
152        }
153
154        /**
155         * Sets the gap penalties.
156         *
157         * @param gapPenalty the gap penalties used during alignment
158         */
159        public void setGapPenalty(GapPenalty gapPenalty) {
160                this.gapPenalty = gapPenalty;
161                reset();
162        }
163
164        /**
165         * Sets the substitution matrix.
166         *
167         * @param subMatrix the set of substitution scores used during alignment
168         */
169        public void setSubstitutionMatrix(SubstitutionMatrix<C> subMatrix) {
170                this.subMatrix = subMatrix;
171                reset();
172        }
173
174        /**
175         * Sets choice to cache the score matrix or to save memory by deleting score matrix after alignment.
176         *
177         * @param storingScoreMatrix choice to cache the score matrix
178         */
179        public void setStoringScoreMatrix(boolean storingScoreMatrix) {
180                this.storingScoreMatrix = storingScoreMatrix;
181                if (!storingScoreMatrix) {
182                        scores = null;
183                }
184        }
185
186        // methods for MatrixAligner
187
188        @Override
189        public int[][][] getScoreMatrix() {
190                boolean tempStoringScoreMatrix = storingScoreMatrix;
191                if (scores == null) {
192                        storingScoreMatrix = true;
193                        align();
194                        if (scores == null) {
195                                return null;
196                        }
197                }
198                int[][][] copy = scores;
199                if (tempStoringScoreMatrix) {
200                        copy = new int[scores.length][scores[0].length][];
201                        for (int i = 0; i < copy.length; i++) {
202                                for (int j = 0; j < copy[0].length; j++) {
203                                        copy[i][j] = Arrays.copyOf(scores[i][j], scores[i][j].length);
204                                }
205                        }
206                }
207                setStoringScoreMatrix(tempStoringScoreMatrix);
208                return copy;
209        }
210
211        @Override
212        public String getScoreMatrixAsString() {
213                int[][][] scores = getScoreMatrix();
214                return scoreMatrixToString(scores);
215        }
216        private String scoreMatrixToString(int[][][] scores) {
217                StringBuilder s = new StringBuilder();
218                CompoundSet<C> compoundSet = getCompoundSet();
219                int lengthCompound = compoundSet.getMaxSingleCompoundStringLength(), lengthRest =
220                                Math.max(Math.max(Integer.toString(min).length(), Integer.toString(max).length()), lengthCompound) + 1;
221                String padCompound = "%" + Integer.toString(lengthCompound) + "s",
222                                padRest = "%" + Integer.toString(lengthRest);
223                List<C> query = getCompoundsOfQuery(), target = getCompoundsOfTarget();
224                for (int type = 0; type < scores[0][0].length; type++) {
225                        if (type > 0) {
226                                s.append(String.format("%n"));
227                        }
228                        if (types[type] != null) {
229                                s.append(String.format("%s%n", types[type]));
230                        }
231                        s.append(String.format(padCompound, ""));
232                        s.append(String.format(padRest + "s", ""));
233                        for (C col : target) {
234                                s.append(String.format(padRest + "s", compoundSet.getStringForCompound(col)));
235                        }
236                        s.append(String.format("%n"));
237                        for (int x = 0; x < scores.length; x++) {
238                                s.append(String.format(padCompound, (x == 0) ? "" :
239                                        compoundSet.getStringForCompound(query.get(x - 1))));
240                                for (int y = 0; y < scores[0].length; y++) {
241                                        s.append(scores[x][y][type] >= min ? String.format(padRest + "d", scores[x][y][type]) :
242                                                        String.format(padRest + "s", "-\u221E"));
243                                }
244                                s.append(String.format("%n"));
245                        }
246                }
247                return s.toString();
248        }
249
250        // methods for Aligner
251        @Override
252        public long getComputationTime() {
253                if (profile == null) {
254                        align();
255                }
256                return time;
257        }
258
259        @Override
260        public Profile<S, C> getProfile() {
261                if (profile == null) {
262                        align();
263                }
264                return profile;
265        }
266
267        // methods for Scorer
268
269        @Override
270        public double getMaxScore() {
271                if (profile == null) {
272                        align();
273                }
274                return max;
275        }
276
277        @Override
278        public double getMinScore() {
279                if (profile == null) {
280                        align();
281                }
282                return min;
283        }
284
285        @Override
286        public double getScore() {
287                if (profile == null) {
288                        align();
289                }
290                return score;
291        }
292
293        // helper methods
294
295        /**
296         * Performs alignment
297         */
298        protected void align() {
299                if (!isReady()) {
300                        return;
301                }
302
303                long timeStart = System.nanoTime();
304
305                int[] dim = getScoreMatrixDimensions();
306                if (storingScoreMatrix) {
307                        scores = new int[dim[0]][dim[1]][dim[2]];
308                } else {
309                        scores = new int[dim[0]][][];
310                        scores[0] = new int[dim[1]][dim[2]];
311                        scores[1] = new int[dim[1]][dim[2]];
312                }
313                boolean linear = (gapPenalty.getType() == GapPenalty.Type.LINEAR);
314                Last[][][] traceback = new Last[dim[0]][][];
315                List<Step> sx = new ArrayList<>(), sy = new ArrayList<>();
316
317                if (!local) {
318                        xyMax = new int[] { dim[0] - 1, dim[1] - 1 };
319                        xyStart = new int[] { 0, 0 };
320                        score = 0;
321                        List<Subproblem> problems = Subproblem.getSubproblems(anchors, xyMax[0], xyMax[1]);
322                        assert problems.size() == anchors.size() + 1;
323                        for (int i = 0; i < problems.size(); i++) {
324                                Subproblem subproblem = problems.get(i);
325                                for (int x = subproblem.getQueryStartIndex(); x <= subproblem.getQueryEndIndex(); x++) {
326
327                                        traceback[x] =
328
329                                          linear ?
330                                                setScoreVector(x, subproblem, gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x, subproblem), storingScoreMatrix, scores) :
331
332                                                setScoreVector(x, subproblem, gapPenalty.getOpenPenalty(), gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x, subproblem), storingScoreMatrix, scores);
333                                }
334                        }
335                        setSteps(traceback, scores, sx, sy);
336                        score = Integer.MIN_VALUE;
337                        int[] finalScore = scores[xyMax[0]][xyMax[1]];
338                        for (int z = 0; z < finalScore.length; z++) {
339                                score = Math.max(score, finalScore[z]);
340                        }
341                } else {
342                        for (int x = 0; x < dim[0]; x++) {
343
344                                traceback[x] =
345
346                                  linear ?
347                                        setScoreVector(x, gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x), storingScoreMatrix, scores, xyMax, score) :
348
349                                        setScoreVector(x, gapPenalty.getOpenPenalty(), gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x), storingScoreMatrix, scores, xyMax, score);
350
351                                if (xyMax[0] == x) {
352                                        score = scores[x][xyMax[1]][0];
353                                }
354                        }
355                        xyStart = local ? setSteps(traceback, xyMax, sx, sy) : setSteps(traceback, scores, sx, sy);
356                }
357
358                setProfile(sx, sy);
359
360                if (!storingScoreMatrix) {
361                        scores = null;
362                }
363
364                time = System.nanoTime() - timeStart;
365        }
366
367        /**
368         * Returns score for the alignment of the query column to all target columns
369         * @param queryColumn
370         * @return
371         */
372        protected int[] getSubstitutionScoreVector(int queryColumn) {
373                return getSubstitutionScoreVector(queryColumn, new Subproblem(0, 0, scores.length - 1, scores[0].length - 1));
374        }
375
376        /**
377         * Returns score for the alignment of the query column to all target columns
378         * @param queryColumn
379         * @param subproblem
380         * @return
381         */
382        protected int[] getSubstitutionScoreVector(int queryColumn, Subproblem subproblem) {
383                int[] subs = new int[subproblem.getTargetEndIndex() + 1];
384                if (queryColumn > 0) {
385                        for (int y = Math.max(1, subproblem.getTargetStartIndex()); y <= subproblem.getTargetEndIndex(); y++) {
386                                subs[y] = getSubstitutionScore(queryColumn, y);
387                        }
388                }
389                return subs;
390        }
391
392        /**
393         * Resets output fields; should be overridden to set max and min
394         */
395        protected void reset() {
396                xyMax = new int[] {0, 0};
397                xyStart = new int[] {0, 0};
398                scores = null;
399                types = (gapPenalty == null || gapPenalty.getType() == GapPenalty.Type.LINEAR) ? new String[] { null } :
400                                new String[] { "Substitution", "Deletion", "Insertion" };
401                time = -1;
402                profile = null;
403        }
404        // abstract methods
405
406        // returns compound set of sequences
407        protected abstract CompoundSet<C> getCompoundSet();
408
409        // returns compounds in query sequence/profile
410        protected abstract List<C> getCompoundsOfQuery();
411
412        // returns compounds in target sequence/profile
413        protected abstract List<C> getCompoundsOfTarget();
414
415        // returns the 3 score matrix dimensions
416        protected abstract int[] getScoreMatrixDimensions();
417
418        // returns score for the alignment of two columns
419        protected abstract int getSubstitutionScore(int queryColumn, int targetColumn);
420
421        // prepares for alignment; returns true if everything is set to run the alignment
422        protected abstract boolean isReady();
423
424        // sets profile following the given alignment path
425        protected abstract void setProfile(List<Step> sx, List<Step> sy);
426
427}