001/* This class is based on the original FATCAT implementation by
002 * <pre>
003 * Yuzhen Ye & Adam Godzik (2003)
004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists.
005 * Bioinformatics vol.19 suppl. 2. ii246-ii255.
006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198
007 * </pre>
008 *
009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code.
010 *
011 * This code may be freely distributed and modified under the
012 * terms of the GNU Lesser General Public Licence.  This should
013 * be distributed with the code.  If you do not have a copy,
014 * see:
015 *
016 *      http://www.gnu.org/copyleft/lesser.html
017 *
018 * Copyright for this code is held jointly by the individual
019 * authors.  These should be listed in @author doc comments.
020 *
021 *
022 * Created on Jun 17, 2009
023 * Created by Andreas Prlic - RCSB PDB
024 *
025 */
026
027package org.biojava.nbio.structure.align.fatcat.calc;
028
029public class FCAlignHelper
030{
031
032        private int     M; //length of protein 1
033        private int     N; //length of protein 2
034        private double  g; //gap-create
035        private double  h; //gap-extend
036        private double  m; //g + h
037        private double[][]  sij;
038        private char[][]    trace; //trace-record
039        private char[][]    etrace; //trace-record
040        private char[][]    dtrace; //trace-record
041        private int     B1; //beginning position of protein 1 in alignment
042        private int     B2; //beginning position of protein 2 in alignment
043        private int     E1; //end position of protein 1 in alignment
044        private int     E2; //end position of protein 2 in alignment
045        private double  alignScore;
046        private double  identity;
047        private double  similarity;
048        private int[]   sapp;
049        private int[]     sapp0;
050        private int sappPos;
051        private int     last;
052
053        private char[]    seq1;
054        private char[]    seq2;
055        private char[]    aln1;
056        private char[]    aln2;
057        private char[]    mark;
058
059        /**
060         * do an alignment given the provided matrix sij0
061         *
062         * @param sij0 - the matrix to perform the calculations on.
063         * @param M0
064         * @param N0
065         * @param g0
066         * @param h0
067         */
068        public FCAlignHelper(double[][] sij0, int M0, int N0, double g0, double h0){
069                init(M0, N0, g0, h0);
070                int     i, j;
071                for(i = 0; i < M; i ++) {
072                        for(j = 0; j < N; j ++){
073                                sij[i][j] = sij0[i][j];
074                                //System.out.println(i+"-"+j+":" +sij[i][j]);
075                        }
076                }
077
078                doAlign();
079        }
080
081
082        private void init(int M0, int N0, double g0, double h0)
083        {
084                M = M0;
085                N = N0;
086                g = g0; //gap-create
087                h = h0; //gap-extend
088                m = g + h; //gap-create + gap-extend
089                trace  = new char[M+1][N+1];
090                etrace = new char[M+1][N+1];
091                dtrace = new char[M+1][N+1];
092                B1 = B2 = E1 = E2 = 0;
093                alignScore = 0;
094                last = 0;
095                sapp = new int[M+N];
096                sapp0 = sapp;
097                sappPos = 0;
098                sij = new double[M][N];
099                seq1 = new char[M+1];
100                seq2 = new char[N+1];
101                aln1 = aln2 = mark = null;
102                identity = similarity = 0;
103        }
104
105        //trace-back strategy
106        //affine-gap penalty
107        //local-model
108
109        private void doAlign(){
110
111
112                int     i, j;
113                double  s, e, c, d, wa;
114                double[]  CC = new double[N+1]; //note N + 1
115                double[]  DD = new double[N+1];
116                double  maxs = -100;
117                char    trace_e, trace_d;
118
119                //forward-phase
120                CC[0] = 0;
121                for(j = 1; j <= N; j ++)        {
122                        CC[j] = 0;
123                        DD[j] = -g;
124                } //local-alignment, no terminal penalty
125                for(i = 1; i <= M; i ++)        {
126                        CC[0] = c = s = 0;
127                        e = -g;
128                        for(j = 1; j <= N; j ++)        {
129                                trace_e = 'e';
130                                if ((c =   c   - m) > (e =   e   - h)) {
131                                        e = c;  trace_e = 'E';
132                                }//insertion
133                                trace_d = 'd';
134                                if ((c = CC[j] - m) > (d = DD[j] - h)) {
135                                        d = c;  trace_d = 'D';
136                                }//deletion
137                                //ie   CC[j]==CC[i-1][j]   DD[j]==DD[i-1][j]
138                                wa = sij[i - 1][j - 1]; //note i - 1, j - 1
139                                c = s + wa; //s==CC[i-1][j-1]
140                                trace[i][j] = 's';
141                                if (e > c) {
142                                        c = e;
143                                        trace[i][j] = trace_e;
144                                }
145                                if (d > c) {
146                                        c = d;
147                                        trace[i][j] = trace_d;
148                                }
149                                etrace[i][j] = trace_e;
150                                dtrace[i][j] = trace_d;
151                                s = CC[j]; //important for next replace
152                                CC[j] = c; //CC[i][j]
153                                DD[j] = d; //DD[i][j]
154                                if(c < 0)       {
155                                        CC[j] = 0;
156                                        DD[j] = -g;
157                                        c = 0;
158                                        e = -g;
159                                        trace[i][j] = '0';
160                                } //local-N
161                                if(c > maxs)    {
162                                        E1 = i;
163                                        E2 = j;
164                                        maxs = c;
165                                } //local-C
166                        }
167                }
168                alignScore = maxs;
169                //printf("alignment score %f\n", alignScore);
170
171
172                //trace-back
173                if(trace[E1][E2] != 's')        {
174                        throw new RuntimeException("FCAlignHelper encoutered Exception: Not ending with substitution");
175
176                }
177                //Trace(maxs, E1, E2);
178                trace('s', E1, E2);
179                //printf("B1 %d B2 %d, E1 %d E2 %d\n", B1, B2, E1, E2);
180
181                //check-alignment
182                checkAlign();
183
184
185        }
186
187
188        /**
189         * trace-back, recorded in sapp, wrong method!
190         */
191
192        private void trace(char mod, int i, int j)
193        {
194                if(mod == '0' || i <= 0 || j <= 0)      {
195                        B1 = i + 1;
196                        B2 = j + 1;
197                }
198                if(mod == 's')  {
199                        trace(trace[i - 1][j - 1], i - 1, j - 1);
200                        rep();
201                }
202                else if(mod == 'D')     {
203                        trace(trace[i - 1][j], i - 1, j);
204                        del(1);
205                }
206                else if(mod == 'd')     {
207                        trace(dtrace[i - 1][j], i - 1, j);
208                        del(1);
209                }
210                else if(mod == 'E')     {
211                        trace(trace[i][j - 1], i, j - 1);
212                        ins(1);
213                }
214                else if(mod == 'e')     {
215                        trace(etrace[i][j - 1], i, j - 1);
216                        ins(1);
217                }
218        }
219
220        //-----------------------------------------------------------------------------
221        //record the alignment in sapp
222        //deletion, sapp < 0, sequences in i, gaps in j
223        //-----------------------------------------------------------------------------
224        private void del(int k)
225        {
226                //if(last < 0)    last = sapp[-1] -= (k);
227                //else            last = *sapp++ = -(k);
228
229                if(last < 0)    last = sapp[sappPos-1]   -=  (k);
230                else            last = sapp[(sappPos++)]  = -(k);
231        }
232
233        //Insertion, sapp > 0, gaps in i, sequences in j
234        //-----------------------------------------------------------------------------
235        private void ins(int k)
236        {
237
238                //if(last > 0)    last = sapp[-1] += k;
239                //else            last = *sapp++ = (k);
240                if(last > 0)    last = sapp[sappPos-1]   +=  k;
241                else            last = sapp[(sappPos++)]  = (k);
242        }
243
244        //-----------------------------------------------------------------------------
245        private void rep()
246        {
247
248                // last = *sapp++ = 0;
249                last = sapp[(sappPos++)] = 0;
250        }
251
252        private void checkAlign(){
253
254                if(sapp[0] != 0)        {
255                        System.err.printf("warn: not a local-alignment result, first operation %d%n%n", sapp[0]);
256                }
257                double  sco = checkScore();
258                if(Math.abs(sco - alignScore) > 1e-3)       {
259                        System.err.printf("FCAlignHelper: warn: alignment scores are different %f(check) %f(align)%n%n", sco, alignScore);
260                }
261        }
262
263        /**
264         *  checkscore - return the score of the alignment stored in sapp
265         */
266
267        private double checkScore()
268        {
269                int     i, j, op, s;
270                double  sco;
271
272                sco = 0;
273                op = 0;
274                s = 0;
275
276                i = B1;
277                j = B2;
278                while (i <= E1 && j <= E2) {
279                        op = sapp0[s ++];
280                        if (op == 0)    {
281                                sco += sij[i - 1][j - 1];
282                                //if (debug)
283                                        //System.err.println(String.format("%d-%d %f\n", i - 1, j - 1, sij[i - 1][j - 1]));
284                                i ++;
285                                j ++;
286                        }
287                        else if (op > 0) {
288                                sco -= g+op*h;
289                                j = j+op;
290                        }
291                        else {
292                                sco -= g-op*h;
293                                i = i-op;
294                        }
295                }
296                return(sco);
297        }
298
299        /**
300         * record the aligned pairs in alignList[][0], alignList[][1];
301         * return the number of aligned pairs
302         * @param alignList
303         * @return the number of aligned pairs
304         */
305        public int getAlignPos(int[][] alignList)
306        {
307                int     i = B1;
308                int     j = B2;
309                int     s = 0;
310                int     a = 0;
311                int     op;
312                while(i <= E1 && j <= E2)       {
313                        op = sapp0[s ++];
314
315                        if (op == 0)    {
316                                alignList[0][a] = i - 1; //i - 1
317                                alignList[1][a] = j - 1;
318                                a ++;
319                                i ++;
320                                j ++;
321                        }
322                        else if (op > 0) {
323                                j += op;
324                        }
325                        else {
326                                i -= op;
327                        }
328                }
329                return a;
330        }
331
332}