001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029 030import org.biojava.nbio.structure.*; 031import org.biojava.nbio.structure.jama.Matrix; 032 033 034 035public class StructureAlignmentOptimizer 036{ 037 038 //private static final boolean showAlig = false; 039 040 int pro1Len; 041 int pro2Len; 042 int maxLen; 043 Atom[] cod1; 044 Atom[] cod2; 045 046 int[][] equSet; 047 int equLen; 048 int equLen0; 049 double[][]sij; 050 051 int maxKeepStep; 052 int keepStep; 053 054 double Dc; //the criteria for structural equivalent residues, eg. 3.0 (CE), 6.0(ProSup) 055 double rmsdCut;//the criteria for stoping optimization 056 double increase; 057 double stopLenPer; 058 double stopRmsdPer; 059 double stopRmsd; 060 061 double gapIni; 062 double gapExt; 063 064 double rmsd; 065 066 private static final boolean debug = FatCatAligner.debug; 067 068 /** 069 * optimize the structural alignment by update the equivalent residues 070 * and then run dynamic programming 071 * input: len1 the length of structure 1; c1: the structure information of 1 072 * len2 the length of structure 2; c2: the structure information of 2 073 * iniLen and iniSet is the length and list of initial equivalent residues 074 */ 075 076 public StructureAlignmentOptimizer(int b1, int end1, Atom[] c1, int b2, int end2, Atom[] c2, 077 int iniLen, int[][] iniSet) throws StructureException{ 078 079 //System.err.println("optimizing range:" + b1 + "-" + end1 + "("+ (end1-b1) + ") b2: " + b2 + "-" + end2+ "("+ (end2-b2) + ") iniLen " + iniLen); 080 //System.out.println("ca1: " + c1.length + " ca2: " + c2.length); 081 082 int len1 = end1-b1; 083 int len2 = end2-b2; 084 085 //System.err.println("len1: " + len1 + " len2:" + len2); 086 087 pro1Len = len1; 088 pro2Len = len2; 089 090 cod1 = new Atom[len1]; 091 cod2 = new Atom[len2]; 092 093 for(int i = 0; i < len1; i ++) { 094 Atom a = c1[i+b1]; 095 //cod1[i] = (Atom)a.clone(); 096 Group parent = (Group)a.getGroup().clone(); 097 //cod1[i].setParent(parent); 098 cod1[i] = parent.getAtom(a.getName()); 099 //cod1[i] = c1[i]; 100 } 101 for(int i = 0; i < len2; i ++) { 102 Atom a = c2[i+b2]; 103 //cod2[i]= (Atom)a.clone(); 104 Group parent = (Group)a.getGroup().clone(); 105 //cod2[i].setParent(parent); 106 cod2[i] = parent.getAtom(a.getName()); 107 //cod2[i] = c2[i]; 108 } 109 110 111 //initial equivalent sets 112 maxLen = (len1 < len2)?len1:len2; 113 114 equSet = new int[2][maxLen]; 115 for(int i = 0; i < iniLen; i ++) { 116 117 equSet[0][i] = iniSet[0][i]; 118 equSet[1][i] = iniSet[1][i]; 119 if(iniSet[0][i] > len1 || iniSet[1][i] > len2) { 120 throw new RuntimeException(String.format("StructureAlignmentOptimizer: focus exceeds the protein 1 or 2 length!")); 121 } 122 } 123 124 equLen = iniLen; 125 equLen0 = equLen; 126 127 setParameters(); 128 129 sij = new double[pro1Len][pro2Len]; 130 131// if (showAlig) 132// showCurrentAlignment(iniLen, equSet, "initial alignment"); 133 134 } 135 136// private void showCurrentAlignment(int len, int[][]set, String title){ 137// BiojavaJmol jmol = new BiojavaJmol(); 138// jmol.setTitle(title); 139// 140// Chain c1 = new ChainImpl(); 141// c1.setName("A"); 142// 143// Chain c2 = new ChainImpl(); 144// c2.setName("B"); 145// for(int i = 0; i < len; i ++) { 146// Atom a1 = cod1[set[0][i]]; 147// Atom a2 = cod2[set[1][i]]; 148// 149// Group g1 = a1.getParent(); 150// Group g2 = a2.getParent(); 151// 152// try { 153// Group n1 = new AminoAcidImpl(); 154// n1.setPDBCode(g1.getPDBCode()); 155// n1.setPDBName(g1.getPDBName()); 156// n1.addAtom(a1); 157// 158// Group n2 = new AminoAcidImpl(); 159// n2.setPDBCode(g2.getPDBCode()); 160// n2.setPDBName(g2.getPDBName()); 161// n2.addAtom(a2); 162// 163// 164// c1.addGroup(n1); 165// c2.addGroup(n2); 166// } catch (Exception e){ 167// // 168// } 169// 170// } 171// 172// Structure s = new StructureImpl(); 173// s.setPDBCode(title); 174// List<Chain> model1 = new ArrayList<Chain>(); 175// model1.add(c1); 176// List<Chain> model2 = new ArrayList<Chain>(); 177// model2.add(c2); 178// s.addModel(model1); 179// s.addModel(model2); 180// s.setNmr(true); 181// jmol.setStructure(s); 182// jmol.evalString("select *; backbone 0.4; wireframe off; spacefill off; " + 183// "select not protein and not solvent; spacefill on;"); 184// jmol.evalString("select */1 ; color red; model 1; "); 185// 186// // now show both models again. 187// jmol.evalString("model 0;"); 188// 189// } 190 191 192 /** run the optimization 193 * 194 * @param maxi maximum nr. of iterations 195 * @throws StructureException 196 */ 197 public void runOptimization(int maxi) throws StructureException{ 198 superimposeBySet(); 199 if ( debug) 200 System.err.println(" initial rmsd " + rmsd); 201 202// if (showAlig) 203// showCurrentAlignment(equLen, equSet, "after initial superimposeBySet Len:" +equLen + " rmsd:" +rmsd); 204 205 maxKeepStep = 4; 206 keepStep = 0; 207 208 optimize(maxi); 209 } 210 211 /** 212 * refer CE, similarity = Dc - dij, Dc is increased by 0.5 each cycle, 213 * optimization continues until either 214 * i)alignment length is less than 95% of alignment length before optimization 215 * ii)rmsd is less than 110% of rmsd at the cycle when condition i) was first satisfied 216 */ 217 private void setParameters() 218 { 219 Dc = 3.0; //Dc = 2.0 220 increase = 0.5; 221 stopLenPer = 0.95; 222 stopRmsdPer = 1.1; 223 stopRmsd = -1.0; 224 rmsdCut = 3.0; 225 gapIni = 5.0; 226 gapExt = 0.5; 227 } 228 229 230 /** 231 * superimpose two structures according to the equivalent residues 232 */ 233 private void superimposeBySet () 234 throws StructureException 235 { 236 237 //extract the coordinations of equivalent residues 238 Atom[] tmp1 = new Atom[equLen]; 239 Atom[] tmp2 = new Atom[equLen]; 240 int i, r1, r2; 241 242 243 for(i = 0; i < equLen; i ++) { 244 r1 = equSet[0][i]; 245 r2 = equSet[1][i]; 246 247 tmp1[i] = cod1[ r1 ]; 248 tmp2[i] = (Atom)cod2[ r2 ].clone(); // have to be cloned! 249 //tmp2[i] = cod2[ r2 ]; 250 251 252 /*try { 253 System.out.println("before superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]])); 254 } catch (Exception e){ 255 e.printStackTrace(); 256 }*/ 257 } 258 259 //superimpose the equivalent residues 260 SVDSuperimposer svd = new SVDSuperimposer(tmp1, tmp2); 261 262 Matrix m = svd.getRotation(); 263 Atom t = svd.getTranslation(); 264 265 for (Atom a: tmp2) { 266 267 Calc.rotate(a,m); 268 Calc.shift(a,t); 269 270 } 271 272 // weird, why does it take the RMSD before the rotation? 273 // the rmsd is only for the subset contained in the tmp arrays. 274 rmsd = SVDSuperimposer.getRMS(tmp1,tmp2); 275 276 //System.err.println("rmsd after superimpose by set: " + rmsd); 277 278 //transform structure 2 according to the superimposition of the equivalent residues 279 for (i = 0 ; i< cod2.length; i++) { 280 Atom a = cod2[i]; 281 Calc.rotate(a,m); 282 Calc.shift(a,t); 283 284 } 285 286 287 288// for(i = 0; i < equLen; i ++) { 289// try { 290// System.err.println("after superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]])); 291// } catch (Exception e){ 292// e.printStackTrace(); 293// } 294// } 295 296 297 298 } 299 300 301 private void optimize(int maxi) throws StructureException 302 { 303 long optStart = System.currentTimeMillis(); 304 if ( debug) 305 System.out.println("Optimizing up to " + maxi + " iterations.. "); 306 boolean ifstop = true;; 307 int i, alnLen; 308 alnLen = 0; 309 310 int[][] alnList = new int[2][maxLen]; 311 for(i = 0; i < maxi; i ++) { 312 313 //if ( debug){ 314 // System.out.println("optimize iteration: " + i); 315 //} 316 317 calMatrix(); 318 319 FCAlignHelper aln = new FCAlignHelper(sij,pro1Len,pro2Len,gapIni, gapExt); 320 321 //ALIGN0 *aln = new ALIGN0(sij, pro1Len, pro2Len, gapIni, gapExt); 322 alnLen = aln.getAlignPos(alnList); 323 if(alnLen < 3) ifstop = true; //very rare, mark by Y.Y on 5/1/03 324 else ifstop = defineEquPos(alnLen, alnList); 325 326 if(ifstop) break; 327 Dc += increase; 328 329// if (showAlig) 330// if ( i == 0 ) 331// showCurrentAlignment(alnLen, alnList, "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd); 332 } 333 334 if (debug){ 335 if(i < maxi) { 336 System.out.println(String.format(" optimize converged at %d iterations\n", i)); 337 } 338 else System.out.println(" optimize stop without convergence\n"); 339 System.out.println("optimization time: " + (System.currentTimeMillis() - optStart) + " ms."); 340 } 341 342// if (showAlig) 343// showCurrentAlignment(alnLen, alnList, "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd); 344 } 345 346 //-------------------------------------------------------------------------------------------------------- 347 //the definition of matrix between residues: 348 // Sij = Dc^2 - Dij^2 if Dij <= Dc 349 // 0 else 350 //-------------------------------------------------------------------------------------------------------- 351 private void calMatrix() throws StructureException 352 { 353 int i, j; 354 double dis; 355 for(i = 0; i < pro1Len; i ++) { 356 for(j = 0; j < pro2Len; j ++) { 357 dis = Calc.getDistance(cod1[i],cod2[j]); 358 359 if(dis < Dc) { 360 sij[i][j] = Dc - dis; 361 } 362 else { 363 sij[i][j] = 0; 364 } 365 } 366 } 367 } 368 369 /** 370 * the equivalent residues: residues where Dij <= Dc and i,j is an aligned pair 371 * use the previous superimposing 372 */ 373 private boolean defineEquPos(int alnLen, int[][] alnList) 374 throws StructureException 375 { 376 int i, r1, r2; 377 int equLenOld = equLen; 378 int[][] equSetOld = new int[2][equLenOld]; 379 for(i = 0; i < equLen; i ++) { 380 equSetOld[0][i] = equSet[0][i]; 381 equSetOld[1][i] = equSet[1][i]; 382 } 383 double rmsdOld = rmsd; 384 double dis; 385 equLen = 0; 386 //if (debug) 387 // System.out.println(String.format(" OPT: Dc %f, equLenOld %d, rmsdOld %f, alnLen %d", Dc, equLenOld, rmsdOld, alnLen)); 388 for(i = 0; i < alnLen; i ++) { 389 r1 = alnList[0][i]; 390 r2 = alnList[1][i]; 391 dis = Calc.getDistance(cod1[r1],cod2[r2]); 392 if(dis <= Dc) { 393 //System.out.println(r1 + "-" + r2 + " d:" + dis); 394 equSet[0][equLen] = r1; 395 equSet[1][equLen] = r2; 396 equLen ++; 397 } 398 } 399 400 superimposeBySet(); 401 402 //if (debug) 403 // System.out.println(String.format(" OPT: new equLen %d rmsd %f", equLen, rmsd)); 404 405 boolean ifstop = false; 406 407// if (debug) { 408// System.out.print(" OPT: rmsd diff: " + Math.abs(rmsd - rmsdOld) + " equLens: " + equLenOld + ":"+ equLen); 409// if ( Math.abs(rmsd - rmsdOld) < 1e-10) 410// System.out.println(" NO DIFF!"); 411// else 412// System.out.println(" DIFF!"); 413// } 414 415 if((Math.abs(rmsd - rmsdOld) < 1e-10 ) && (equLenOld == equLen)) keepStep ++; 416 else keepStep = 0; 417 418 if(keepStep > maxKeepStep) { 419 ifstop = true; //converge 420 } //allowing up to maxKeepStep instead of 1 is essential for some special cases 421 else if(stopRmsd < 0) { 422 ifstop = false; //condition 1, continue 423 } 424 else if((rmsd <= stopRmsd * stopRmsdPer) || (rmsd < rmsdCut)) { 425 ifstop = false; //condition 2, continue 426 } //rmsdCut is adopted or not? to be tuned 427 else { 428 ifstop = true; //get worse 429 } 430 431 432 if((stopRmsd <0) && (equLen >= stopLenPer * equLen0)) { 433 //System.err.println("stopRmsd: " + stopRmsd + " setting to rmsd:" + rmsd); 434 stopRmsd = rmsd; //condition 1 435 } 436 437 return ifstop; 438 } 439 440 441 public double optimizeResult(int[] optLen, int optLenpos, int[][] list) 442 { 443 optLen[optLenpos] = equLen; 444 445 for(int i = 0; i < equLen; i ++) { 446 list[0][i] = equSet[0][i]; 447 list[1][i] = equSet[1][i]; 448 } 449 return rmsd; 450 } 451 452}