001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029public class FCAlignHelper 030{ 031 032 int M; //length of protein 1 033 int N; //length of protein 2 034 double g; //gap-create 035 double h; //gap-extend 036 double m; //g + h 037 double[][] sij; 038 char[][] trace; //trace-record 039 char[][] etrace; //trace-record 040 char[][] dtrace; //trace-record 041 int B1; //beginning position of protein 1 in alignment 042 int B2; //beginning position of protein 2 in alignment 043 int E1; //end position of protein 1 in alignment 044 int E2; //end position of protein 2 in alignment 045 double alignScore; 046 double identity; 047 double similarity; 048 int[] sapp; 049 int[] sapp0; 050 int sappPos; 051 int last; 052 053 char[] seq1; 054 char[] seq2; 055 char[] aln1; 056 char[] aln2; 057 char[] mark; 058 059 /** do an alignment given the provided matrix sij0 060 * 061 * @param sij0 - the matrix to perform the calculations on. 062 * @param M0 063 * @param N0 064 * @param g0 065 * @param h0 066 */ 067 public FCAlignHelper(double[][] sij0, int M0, int N0, double g0, double h0){ 068 init(M0, N0, g0, h0); 069 int i, j; 070 for(i = 0; i < M; i ++) { 071 for(j = 0; j < N; j ++){ 072 sij[i][j] = sij0[i][j]; 073 //System.out.println(i+"-"+j+":" +sij[i][j]); 074 } 075 } 076 077 doAlign(); 078 } 079 080 081 private void init(int M0, int N0, double g0, double h0) 082 { 083 M = M0; 084 N = N0; 085 g = g0; //gap-create 086 h = h0; //gap-extend 087 m = g + h; //gap-create + gap-extend 088 trace = new char[M+1][N+1]; 089 etrace = new char[M+1][N+1]; 090 dtrace = new char[M+1][N+1]; 091 B1 = B2 = E1 = E2 = 0; 092 alignScore = 0; 093 last = 0; 094 sapp = new int[M+N]; 095 sapp0 = sapp; 096 sappPos = 0; 097 sij = new double[M][N]; 098 seq1 = new char[M+1]; 099 seq2 = new char[N+1]; 100 aln1 = aln2 = mark = null; 101 identity = similarity = 0; 102 } 103 104 //trace-back strategy 105 //affine-gap penalty 106 //local-model 107 108 private void doAlign(){ 109 110 111 int i, j; 112 double s, e, c, d, wa; 113 double[] CC = new double[N+1]; //note N + 1 114 double[] DD = new double[N+1]; 115 double maxs = -100; 116 char trace_e, trace_d; 117 118 //forward-phase 119 CC[0] = 0; 120 for(j = 1; j <= N; j ++) { 121 CC[j] = 0; 122 DD[j] = -g; 123 } //local-alignment, no terminal penalty 124 for(i = 1; i <= M; i ++) { 125 CC[0] = c = s = 0; 126 e = -g; 127 for(j = 1; j <= N; j ++) { 128 trace_e = 'e'; 129 if ((c = c - m) > (e = e - h)) { 130 e = c; trace_e = 'E'; 131 }//insertion 132 trace_d = 'd'; 133 if ((c = CC[j] - m) > (d = DD[j] - h)) { 134 d = c; trace_d = 'D'; 135 }//deletion 136 //ie CC[j]==CC[i-1][j] DD[j]==DD[i-1][j] 137 wa = sij[i - 1][j - 1]; //note i - 1, j - 1 138 c = s + wa; //s==CC[i-1][j-1] 139 trace[i][j] = 's'; 140 if (e > c) { 141 c = e; 142 trace[i][j] = trace_e; 143 } 144 if (d > c) { 145 c = d; 146 trace[i][j] = trace_d; 147 } 148 etrace[i][j] = trace_e; 149 dtrace[i][j] = trace_d; 150 s = CC[j]; //important for next replace 151 CC[j] = c; //CC[i][j] 152 DD[j] = d; //DD[i][j] 153 if(c < 0) { 154 CC[j] = 0; 155 DD[j] = -g; 156 c = 0; 157 e = -g; 158 trace[i][j] = '0'; 159 } //local-N 160 if(c > maxs) { 161 E1 = i; 162 E2 = j; 163 maxs = c; 164 } //local-C 165 } 166 } 167 alignScore = maxs; 168 //printf("alignment score %f\n", alignScore); 169 170 171 //trace-back 172 if(trace[E1][E2] != 's') { 173 throw new RuntimeException("FCAlignHelper encoutered Exception: Not ending with substitution"); 174 175 } 176 //Trace(maxs, E1, E2); 177 trace('s', E1, E2); 178 //printf("B1 %d B2 %d, E1 %d E2 %d\n", B1, B2, E1, E2); 179 180 //check-alignment 181 checkAlign(); 182 183 184 } 185 186 187 /** 188 * trace-back, recorded in sapp, wrong method! 189 */ 190 191 private void trace(char mod, int i, int j) 192 { 193 if(mod == '0' || i <= 0 || j <= 0) { 194 B1 = i + 1; 195 B2 = j + 1; 196 } 197 if(mod == 's') { 198 trace(trace[i - 1][j - 1], i - 1, j - 1); 199 rep(); 200 } 201 else if(mod == 'D') { 202 trace(trace[i - 1][j], i - 1, j); 203 del(1); 204 } 205 else if(mod == 'd') { 206 trace(dtrace[i - 1][j], i - 1, j); 207 del(1); 208 } 209 else if(mod == 'E') { 210 trace(trace[i][j - 1], i, j - 1); 211 ins(1); 212 } 213 else if(mod == 'e') { 214 trace(etrace[i][j - 1], i, j - 1); 215 ins(1); 216 } 217 } 218 219 //----------------------------------------------------------------------------- 220 //record the alignment in sapp 221 //deletion, sapp < 0, sequences in i, gaps in j 222 //----------------------------------------------------------------------------- 223 private void del(int k) 224 { 225 //if(last < 0) last = sapp[-1] -= (k); 226 //else last = *sapp++ = -(k); 227 228 if(last < 0) last = sapp[sappPos-1] -= (k); 229 else last = sapp[(sappPos++)] = -(k); 230 } 231 232 //Insertion, sapp > 0, gaps in i, sequences in j 233 //----------------------------------------------------------------------------- 234 private void ins(int k) 235 { 236 237 //if(last > 0) last = sapp[-1] += k; 238 //else last = *sapp++ = (k); 239 if(last > 0) last = sapp[sappPos-1] += k; 240 else last = sapp[(sappPos++)] = (k); 241 } 242 243 //----------------------------------------------------------------------------- 244 private void rep() 245 { 246 247 // last = *sapp++ = 0; 248 last = sapp[(sappPos++)] = 0; 249 } 250 251 private void checkAlign(){ 252 253 if(sapp[0] != 0) { 254 System.err.println(String.format("warn: not a local-alignment result, first operation %d\n", sapp[0])); 255 } 256 double sco = checkScore(); 257 if(Math.abs(sco - alignScore) > 1e-3) { 258 System.err.println(String.format("FCAlignHelper: warn: alignment scores are different %f(check) %f(align)\n", sco, alignScore)); 259 } 260 } 261 262 /** 263 * checkscore - return the score of the alignment stored in sapp 264 */ 265 266 private double checkScore() 267 { 268 int i, j, op, s; 269 double sco; 270 271 sco = 0; 272 op = 0; 273 s = 0; 274 275 i = B1; 276 j = B2; 277 while (i <= E1 && j <= E2) { 278 op = sapp0[s ++]; 279 if (op == 0) { 280 sco += sij[i - 1][j - 1]; 281 //if (debug) 282 //System.err.println(String.format("%d-%d %f\n", i - 1, j - 1, sij[i - 1][j - 1])); 283 i ++; 284 j ++; 285 } 286 else if (op > 0) { 287 sco -= g+op*h; 288 j = j+op; 289 } 290 else { 291 sco -= g-op*h; 292 i = i-op; 293 } 294 } 295 return(sco); 296 } 297 298 /** 299 * record the aligned pairs in alignList[][0], alignList[][1]; 300 * return the number of aligned pairs 301 * @param alignList 302 * @return the number of aligned pairs 303 */ 304 public int getAlignPos(int[][] alignList) 305 { 306 int i = B1; 307 int j = B2; 308 int s = 0; 309 int a = 0; 310 int op; 311 while(i <= E1 && j <= E2) { 312 op = sapp0[s ++]; 313 314 if (op == 0) { 315 alignList[0][a] = i - 1; //i - 1 316 alignList[1][a] = j - 1; 317 a ++; 318 i ++; 319 j ++; 320 } 321 else if (op > 0) { 322 j += op; 323 } 324 else { 325 i -= op; 326 } 327 } 328 return a; 329 } 330 331} 332 333