001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029public class FCAlignHelper 030{ 031 032 private int M; //length of protein 1 033 private int N; //length of protein 2 034 private double g; //gap-create 035 private double h; //gap-extend 036 private double m; //g + h 037 private double[][] sij; 038 private char[][] trace; //trace-record 039 private char[][] etrace; //trace-record 040 private char[][] dtrace; //trace-record 041 private int B1; //beginning position of protein 1 in alignment 042 private int B2; //beginning position of protein 2 in alignment 043 private int E1; //end position of protein 1 in alignment 044 private int E2; //end position of protein 2 in alignment 045 private double alignScore; 046 private double identity; 047 private double similarity; 048 private int[] sapp; 049 private int[] sapp0; 050 private int sappPos; 051 private int last; 052 053 private char[] seq1; 054 private char[] seq2; 055 private char[] aln1; 056 private char[] aln2; 057 private char[] mark; 058 059 /** 060 * do an alignment given the provided matrix sij0 061 * 062 * @param sij0 - the matrix to perform the calculations on. 063 * @param M0 064 * @param N0 065 * @param g0 066 * @param h0 067 */ 068 public FCAlignHelper(double[][] sij0, int M0, int N0, double g0, double h0){ 069 init(M0, N0, g0, h0); 070 int i, j; 071 for(i = 0; i < M; i ++) { 072 for(j = 0; j < N; j ++){ 073 sij[i][j] = sij0[i][j]; 074 //System.out.println(i+"-"+j+":" +sij[i][j]); 075 } 076 } 077 078 doAlign(); 079 } 080 081 082 private void init(int M0, int N0, double g0, double h0) 083 { 084 M = M0; 085 N = N0; 086 g = g0; //gap-create 087 h = h0; //gap-extend 088 m = g + h; //gap-create + gap-extend 089 trace = new char[M+1][N+1]; 090 etrace = new char[M+1][N+1]; 091 dtrace = new char[M+1][N+1]; 092 B1 = B2 = E1 = E2 = 0; 093 alignScore = 0; 094 last = 0; 095 sapp = new int[M+N]; 096 sapp0 = sapp; 097 sappPos = 0; 098 sij = new double[M][N]; 099 seq1 = new char[M+1]; 100 seq2 = new char[N+1]; 101 aln1 = aln2 = mark = null; 102 identity = similarity = 0; 103 } 104 105 //trace-back strategy 106 //affine-gap penalty 107 //local-model 108 109 private void doAlign(){ 110 111 112 int i, j; 113 double s, e, c, d, wa; 114 double[] CC = new double[N+1]; //note N + 1 115 double[] DD = new double[N+1]; 116 double maxs = -100; 117 char trace_e, trace_d; 118 119 //forward-phase 120 CC[0] = 0; 121 for(j = 1; j <= N; j ++) { 122 CC[j] = 0; 123 DD[j] = -g; 124 } //local-alignment, no terminal penalty 125 for(i = 1; i <= M; i ++) { 126 CC[0] = c = s = 0; 127 e = -g; 128 for(j = 1; j <= N; j ++) { 129 trace_e = 'e'; 130 if ((c = c - m) > (e = e - h)) { 131 e = c; trace_e = 'E'; 132 }//insertion 133 trace_d = 'd'; 134 if ((c = CC[j] - m) > (d = DD[j] - h)) { 135 d = c; trace_d = 'D'; 136 }//deletion 137 //ie CC[j]==CC[i-1][j] DD[j]==DD[i-1][j] 138 wa = sij[i - 1][j - 1]; //note i - 1, j - 1 139 c = s + wa; //s==CC[i-1][j-1] 140 trace[i][j] = 's'; 141 if (e > c) { 142 c = e; 143 trace[i][j] = trace_e; 144 } 145 if (d > c) { 146 c = d; 147 trace[i][j] = trace_d; 148 } 149 etrace[i][j] = trace_e; 150 dtrace[i][j] = trace_d; 151 s = CC[j]; //important for next replace 152 CC[j] = c; //CC[i][j] 153 DD[j] = d; //DD[i][j] 154 if(c < 0) { 155 CC[j] = 0; 156 DD[j] = -g; 157 c = 0; 158 e = -g; 159 trace[i][j] = '0'; 160 } //local-N 161 if(c > maxs) { 162 E1 = i; 163 E2 = j; 164 maxs = c; 165 } //local-C 166 } 167 } 168 alignScore = maxs; 169 //printf("alignment score %f\n", alignScore); 170 171 172 //trace-back 173 if(trace[E1][E2] != 's') { 174 throw new RuntimeException("FCAlignHelper encoutered Exception: Not ending with substitution"); 175 176 } 177 //Trace(maxs, E1, E2); 178 trace('s', E1, E2); 179 //printf("B1 %d B2 %d, E1 %d E2 %d\n", B1, B2, E1, E2); 180 181 //check-alignment 182 checkAlign(); 183 184 185 } 186 187 188 /** 189 * trace-back, recorded in sapp, wrong method! 190 */ 191 192 private void trace(char mod, int i, int j) 193 { 194 if(mod == '0' || i <= 0 || j <= 0) { 195 B1 = i + 1; 196 B2 = j + 1; 197 } 198 if(mod == 's') { 199 trace(trace[i - 1][j - 1], i - 1, j - 1); 200 rep(); 201 } 202 else if(mod == 'D') { 203 trace(trace[i - 1][j], i - 1, j); 204 del(1); 205 } 206 else if(mod == 'd') { 207 trace(dtrace[i - 1][j], i - 1, j); 208 del(1); 209 } 210 else if(mod == 'E') { 211 trace(trace[i][j - 1], i, j - 1); 212 ins(1); 213 } 214 else if(mod == 'e') { 215 trace(etrace[i][j - 1], i, j - 1); 216 ins(1); 217 } 218 } 219 220 //----------------------------------------------------------------------------- 221 //record the alignment in sapp 222 //deletion, sapp < 0, sequences in i, gaps in j 223 //----------------------------------------------------------------------------- 224 private void del(int k) 225 { 226 //if(last < 0) last = sapp[-1] -= (k); 227 //else last = *sapp++ = -(k); 228 229 if(last < 0) last = sapp[sappPos-1] -= (k); 230 else last = sapp[(sappPos++)] = -(k); 231 } 232 233 //Insertion, sapp > 0, gaps in i, sequences in j 234 //----------------------------------------------------------------------------- 235 private void ins(int k) 236 { 237 238 //if(last > 0) last = sapp[-1] += k; 239 //else last = *sapp++ = (k); 240 if(last > 0) last = sapp[sappPos-1] += k; 241 else last = sapp[(sappPos++)] = (k); 242 } 243 244 //----------------------------------------------------------------------------- 245 private void rep() 246 { 247 248 // last = *sapp++ = 0; 249 last = sapp[(sappPos++)] = 0; 250 } 251 252 private void checkAlign(){ 253 254 if(sapp[0] != 0) { 255 System.err.printf("warn: not a local-alignment result, first operation %d%n%n", sapp[0]); 256 } 257 double sco = checkScore(); 258 if(Math.abs(sco - alignScore) > 1e-3) { 259 System.err.printf("FCAlignHelper: warn: alignment scores are different %f(check) %f(align)%n%n", sco, alignScore); 260 } 261 } 262 263 /** 264 * checkscore - return the score of the alignment stored in sapp 265 */ 266 267 private double checkScore() 268 { 269 int i, j, op, s; 270 double sco; 271 272 sco = 0; 273 op = 0; 274 s = 0; 275 276 i = B1; 277 j = B2; 278 while (i <= E1 && j <= E2) { 279 op = sapp0[s ++]; 280 if (op == 0) { 281 sco += sij[i - 1][j - 1]; 282 //if (debug) 283 //System.err.println(String.format("%d-%d %f\n", i - 1, j - 1, sij[i - 1][j - 1])); 284 i ++; 285 j ++; 286 } 287 else if (op > 0) { 288 sco -= g+op*h; 289 j = j+op; 290 } 291 else { 292 sco -= g-op*h; 293 i = i-op; 294 } 295 } 296 return(sco); 297 } 298 299 /** 300 * record the aligned pairs in alignList[][0], alignList[][1]; 301 * return the number of aligned pairs 302 * @param alignList 303 * @return the number of aligned pairs 304 */ 305 public int getAlignPos(int[][] alignList) 306 { 307 int i = B1; 308 int j = B2; 309 int s = 0; 310 int a = 0; 311 int op; 312 while(i <= E1 && j <= E2) { 313 op = sapp0[s ++]; 314 315 if (op == 0) { 316 alignList[0][a] = i - 1; //i - 1 317 alignList[1][a] = j - 1; 318 a ++; 319 i ++; 320 j ++; 321 } 322 else if (op > 0) { 323 j += op; 324 } 325 else { 326 i -= op; 327 } 328 } 329 return a; 330 } 331 332}