001/* This class is based on the original FATCAT implementation by
002 * <pre>
003 * Yuzhen Ye & Adam Godzik (2003)
004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists.
005 * Bioinformatics vol.19 suppl. 2. ii246-ii255.
006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198
007 * </pre>
008 *
009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code.
010 *
011 * This code may be freely distributed and modified under the
012 * terms of the GNU Lesser General Public Licence.  This should
013 * be distributed with the code.  If you do not have a copy,
014 * see:
015 *
016 *      http://www.gnu.org/copyleft/lesser.html
017 *
018 * Copyright for this code is held jointly by the individual
019 * authors.  These should be listed in @author doc comments.
020 *
021 *
022 * Created on Jun 17, 2009
023 * Created by Andreas Prlic - RCSB PDB
024 *
025 */
026
027package org.biojava.nbio.structure.align.fatcat.calc;
028
029public class FCAlignHelper
030{
031
032        int     M; //length of protein 1
033        int     N; //length of protein 2
034        double  g; //gap-create
035        double  h; //gap-extend
036        double  m; //g + h
037        double[][]  sij;
038        char[][]    trace; //trace-record
039        char[][]    etrace; //trace-record
040        char[][]    dtrace; //trace-record
041        int     B1; //beginning position of protein 1 in alignment
042        int     B2; //beginning position of protein 2 in alignment
043        int     E1; //end position of protein 1 in alignment
044        int     E2; //end position of protein 2 in alignment
045        double  alignScore;
046        double  identity;
047        double  similarity;
048        int[]   sapp;
049        int[]     sapp0;
050        int sappPos;
051        int     last;
052
053        char[]    seq1;
054        char[]    seq2;
055        char[]    aln1;
056        char[]    aln2;
057        char[]    mark;
058
059        /** do an alignment given the provided matrix sij0
060         *
061         * @param sij0 - the matrix to perform the calculations on.
062         * @param M0
063         * @param N0
064         * @param g0
065         * @param h0
066         */
067        public FCAlignHelper(double[][] sij0, int M0, int N0, double g0, double h0){
068                init(M0, N0, g0, h0);
069                int     i, j;
070                for(i = 0; i < M; i ++) {
071                        for(j = 0; j < N; j ++){
072                                sij[i][j] = sij0[i][j];
073                                //System.out.println(i+"-"+j+":" +sij[i][j]);
074                        }
075                }
076
077                doAlign();
078        }
079
080
081        private void init(int M0, int N0, double g0, double h0)
082        {
083                M = M0;
084                N = N0;
085                g = g0; //gap-create
086                h = h0; //gap-extend
087                m = g + h; //gap-create + gap-extend
088                trace  = new char[M+1][N+1];
089                etrace = new char[M+1][N+1];
090                dtrace = new char[M+1][N+1];
091                B1 = B2 = E1 = E2 = 0;
092                alignScore = 0;
093                last = 0;
094                sapp = new int[M+N];
095                sapp0 = sapp;
096                sappPos = 0;
097                sij = new double[M][N];
098                seq1 = new char[M+1];
099                seq2 = new char[N+1];
100                aln1 = aln2 = mark = null;
101                identity = similarity = 0;
102        }
103
104        //trace-back strategy
105        //affine-gap penalty
106        //local-model
107
108        private void doAlign(){
109
110
111                int     i, j;
112                double  s, e, c, d, wa;
113                double[]  CC = new double[N+1]; //note N + 1
114                double[]  DD = new double[N+1];
115                double  maxs = -100;
116                char    trace_e, trace_d;
117
118                //forward-phase
119                CC[0] = 0;
120                for(j = 1; j <= N; j ++)        {
121                        CC[j] = 0;
122                        DD[j] = -g;
123                } //local-alignment, no terminal penalty
124                for(i = 1; i <= M; i ++)        {
125                        CC[0] = c = s = 0;
126                        e = -g;
127                        for(j = 1; j <= N; j ++)        {
128                                trace_e = 'e';
129                                if ((c =   c   - m) > (e =   e   - h)) {
130                                        e = c;  trace_e = 'E';
131                                }//insertion
132                                trace_d = 'd';
133                                if ((c = CC[j] - m) > (d = DD[j] - h)) {
134                                        d = c;  trace_d = 'D';
135                                }//deletion
136                                //ie   CC[j]==CC[i-1][j]   DD[j]==DD[i-1][j]
137                                wa = sij[i - 1][j - 1]; //note i - 1, j - 1
138                                c = s + wa; //s==CC[i-1][j-1]
139                                trace[i][j] = 's';
140                                if (e > c) {
141                                        c = e;
142                                        trace[i][j] = trace_e;
143                                }
144                                if (d > c) {
145                                        c = d;
146                                        trace[i][j] = trace_d;
147                                }
148                                etrace[i][j] = trace_e;
149                                dtrace[i][j] = trace_d;
150                                s = CC[j]; //important for next replace
151                                CC[j] = c; //CC[i][j]
152                                DD[j] = d; //DD[i][j]
153                                if(c < 0)       {
154                                        CC[j] = 0;
155                                        DD[j] = -g;
156                                        c = 0;
157                                        e = -g;
158                                        trace[i][j] = '0';
159                                } //local-N
160                                if(c > maxs)    {
161                                        E1 = i;
162                                        E2 = j;
163                                        maxs = c;
164                                } //local-C
165                        }
166                }
167                alignScore = maxs;
168                //printf("alignment score %f\n", alignScore);
169
170
171                //trace-back
172                if(trace[E1][E2] != 's')        {
173                        throw new RuntimeException("FCAlignHelper encoutered Exception: Not ending with substitution");
174
175                }
176                //Trace(maxs, E1, E2);
177                trace('s', E1, E2);
178                //printf("B1 %d B2 %d, E1 %d E2 %d\n", B1, B2, E1, E2);
179
180                //check-alignment
181                checkAlign();
182
183
184        }
185
186
187        /**
188         * trace-back, recorded in sapp, wrong method!
189         */
190
191        private void trace(char mod, int i, int j)
192        {
193                if(mod == '0' || i <= 0 || j <= 0)      {
194                        B1 = i + 1;
195                        B2 = j + 1;
196                }
197                if(mod == 's')  {
198                        trace(trace[i - 1][j - 1], i - 1, j - 1);
199                        rep();
200                }
201                else if(mod == 'D')     {
202                        trace(trace[i - 1][j], i - 1, j);
203                        del(1);
204                }
205                else if(mod == 'd')     {
206                        trace(dtrace[i - 1][j], i - 1, j);
207                        del(1);
208                }
209                else if(mod == 'E')     {
210                        trace(trace[i][j - 1], i, j - 1);
211                        ins(1);
212                }
213                else if(mod == 'e')     {
214                        trace(etrace[i][j - 1], i, j - 1);
215                        ins(1);
216                }
217        }
218
219        //-----------------------------------------------------------------------------
220        //record the alignment in sapp
221        //deletion, sapp < 0, sequences in i, gaps in j
222        //-----------------------------------------------------------------------------
223        private void del(int k)
224        {
225                //if(last < 0)    last = sapp[-1] -= (k);
226                //else            last = *sapp++ = -(k);
227
228                if(last < 0)    last = sapp[sappPos-1]   -=  (k);
229                else            last = sapp[(sappPos++)]  = -(k);
230        }
231
232        //Insertion, sapp > 0, gaps in i, sequences in j
233        //-----------------------------------------------------------------------------
234        private void ins(int k)
235        {
236
237                //if(last > 0)    last = sapp[-1] += k;
238                //else            last = *sapp++ = (k);
239                if(last > 0)    last = sapp[sappPos-1]   +=  k;
240                else            last = sapp[(sappPos++)]  = (k);
241        }
242
243        //-----------------------------------------------------------------------------
244        private void rep()
245        {
246
247                // last = *sapp++ = 0;
248                last = sapp[(sappPos++)] = 0;
249        }
250
251        private void checkAlign(){
252
253                if(sapp[0] != 0)        {
254                        System.err.println(String.format("warn: not a local-alignment result, first operation %d\n", sapp[0]));
255                }
256                double  sco = checkScore();
257                if(Math.abs(sco - alignScore) > 1e-3)       {
258                        System.err.println(String.format("FCAlignHelper: warn: alignment scores are different %f(check) %f(align)\n", sco, alignScore));
259                }
260        }
261
262        /**
263         *  checkscore - return the score of the alignment stored in sapp
264         */
265
266        private double checkScore()
267        {
268                int     i, j, op, s;
269                double  sco;
270
271                sco = 0;
272                op = 0;
273                s = 0;
274
275                i = B1;
276                j = B2;
277                while (i <= E1 && j <= E2) {
278                        op = sapp0[s ++];
279                        if (op == 0)    {
280                                sco += sij[i - 1][j - 1];
281                                //if (debug)
282                                        //System.err.println(String.format("%d-%d %f\n", i - 1, j - 1, sij[i - 1][j - 1]));
283                                i ++;
284                                j ++;
285                        }
286                        else if (op > 0) {
287                                sco -= g+op*h;
288                                j = j+op;
289                        }
290                        else {
291                                sco -= g-op*h;
292                                i = i-op;
293                        }
294                }
295                return(sco);
296        }
297
298        /**
299         * record the aligned pairs in alignList[][0], alignList[][1];
300         * return the number of aligned pairs
301         * @param alignList
302         * @return the number of aligned pairs
303         */
304        public int getAlignPos(int[][] alignList)
305        {
306                int     i = B1;
307                int     j = B2;
308                int     s = 0;
309                int     a = 0;
310                int     op;
311                while(i <= E1 && j <= E2)       {
312                        op = sapp0[s ++];
313
314                        if (op == 0)    {
315                                alignList[0][a] = i - 1; //i - 1
316                                alignList[1][a] = j - 1;
317                                a ++;
318                                i ++;
319                                j ++;
320                        }
321                        else if (op > 0) {
322                                j += op;
323                        }
324                        else {
325                                i -= op;
326                        }
327                }
328                return a;
329        }
330
331}
332
333