001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029 030import javax.vecmath.Matrix4d; 031 032import org.biojava.nbio.structure.*; 033import org.biojava.nbio.structure.geometry.SuperPositions; 034 035 036 037public class StructureAlignmentOptimizer 038{ 039 040 //private static final boolean showAlig = false; 041 042 int pro1Len; 043 int pro2Len; 044 int maxLen; 045 Atom[] cod1; 046 Atom[] cod2; 047 048 int[][] equSet; 049 int equLen; 050 int equLen0; 051 double[][]sij; 052 053 int maxKeepStep; 054 int keepStep; 055 056 double Dc; //the criteria for structural equivalent residues, eg. 3.0 (CE), 6.0(ProSup) 057 double rmsdCut;//the criteria for stoping optimization 058 double increase; 059 double stopLenPer; 060 double stopRmsdPer; 061 double stopRmsd; 062 063 double gapIni; 064 double gapExt; 065 066 double rmsd; 067 068 private static final boolean debug = FatCatAligner.debug; 069 070 /** 071 * optimize the structural alignment by update the equivalent residues 072 * and then run dynamic programming 073 * input: len1 the length of structure 1; c1: the structure information of 1 074 * len2 the length of structure 2; c2: the structure information of 2 075 * iniLen and iniSet is the length and list of initial equivalent residues 076 */ 077 078 public StructureAlignmentOptimizer(int b1, int end1, Atom[] c1, int b2, int end2, Atom[] c2, 079 int iniLen, int[][] iniSet) throws StructureException{ 080 081 //System.err.println("optimizing range:" + b1 + "-" + end1 + "("+ (end1-b1) + ") b2: " + b2 + "-" + end2+ "("+ (end2-b2) + ") iniLen " + iniLen); 082 //System.out.println("ca1: " + c1.length + " ca2: " + c2.length); 083 084 int len1 = end1-b1; 085 int len2 = end2-b2; 086 087 //System.err.println("len1: " + len1 + " len2:" + len2); 088 089 pro1Len = len1; 090 pro2Len = len2; 091 092 cod1 = new Atom[len1]; 093 cod2 = new Atom[len2]; 094 095 for(int i = 0; i < len1; i ++) { 096 Atom a = c1[i+b1]; 097 //cod1[i] = (Atom)a.clone(); 098 Group parent = (Group)a.getGroup().clone(); 099 //cod1[i].setParent(parent); 100 cod1[i] = parent.getAtom(a.getName()); 101 //cod1[i] = c1[i]; 102 } 103 for(int i = 0; i < len2; i ++) { 104 Atom a = c2[i+b2]; 105 //cod2[i]= (Atom)a.clone(); 106 Group parent = (Group)a.getGroup().clone(); 107 //cod2[i].setParent(parent); 108 cod2[i] = parent.getAtom(a.getName()); 109 //cod2[i] = c2[i]; 110 } 111 112 113 //initial equivalent sets 114 maxLen = (len1 < len2)?len1:len2; 115 116 equSet = new int[2][maxLen]; 117 for(int i = 0; i < iniLen; i ++) { 118 119 equSet[0][i] = iniSet[0][i]; 120 equSet[1][i] = iniSet[1][i]; 121 if(iniSet[0][i] > len1 || iniSet[1][i] > len2) { 122 throw new RuntimeException(String.format("StructureAlignmentOptimizer: focus exceeds the protein 1 or 2 length!")); 123 } 124 } 125 126 equLen = iniLen; 127 equLen0 = equLen; 128 129 setParameters(); 130 131 sij = new double[pro1Len][pro2Len]; 132 133// if (showAlig) 134// showCurrentAlignment(iniLen, equSet, "initial alignment"); 135 136 } 137 138// private void showCurrentAlignment(int len, int[][]set, String title){ 139// BiojavaJmol jmol = new BiojavaJmol(); 140// jmol.setTitle(title); 141// 142// Chain c1 = new ChainImpl(); 143// c1.setName("A"); 144// 145// Chain c2 = new ChainImpl(); 146// c2.setName("B"); 147// for(int i = 0; i < len; i ++) { 148// Atom a1 = cod1[set[0][i]]; 149// Atom a2 = cod2[set[1][i]]; 150// 151// Group g1 = a1.getParent(); 152// Group g2 = a2.getParent(); 153// 154// try { 155// Group n1 = new AminoAcidImpl(); 156// n1.setPDBCode(g1.getPDBCode()); 157// n1.setPDBName(g1.getPDBName()); 158// n1.addAtom(a1); 159// 160// Group n2 = new AminoAcidImpl(); 161// n2.setPDBCode(g2.getPDBCode()); 162// n2.setPDBName(g2.getPDBName()); 163// n2.addAtom(a2); 164// 165// 166// c1.addGroup(n1); 167// c2.addGroup(n2); 168// } catch (Exception e){ 169// // 170// } 171// 172// } 173// 174// Structure s = new StructureImpl(); 175// s.setPDBCode(title); 176// List<Chain> model1 = new ArrayList<Chain>(); 177// model1.add(c1); 178// List<Chain> model2 = new ArrayList<Chain>(); 179// model2.add(c2); 180// s.addModel(model1); 181// s.addModel(model2); 182// s.setNmr(true); 183// jmol.setStructure(s); 184// jmol.evalString("select *; backbone 0.4; wireframe off; spacefill off; " + 185// "select not protein and not solvent; spacefill on;"); 186// jmol.evalString("select */1 ; color red; model 1; "); 187// 188// // now show both models again. 189// jmol.evalString("model 0;"); 190// 191// } 192 193 194 /** run the optimization 195 * 196 * @param maxi maximum nr. of iterations 197 * @throws StructureException 198 */ 199 public void runOptimization(int maxi) throws StructureException{ 200 superimposeBySet(); 201 if ( debug) 202 System.err.println(" initial rmsd " + rmsd); 203 204// if (showAlig) 205// showCurrentAlignment(equLen, equSet, "after initial superimposeBySet Len:" +equLen + " rmsd:" +rmsd); 206 207 maxKeepStep = 4; 208 keepStep = 0; 209 210 optimize(maxi); 211 } 212 213 /** 214 * refer CE, similarity = Dc - dij, Dc is increased by 0.5 each cycle, 215 * optimization continues until either 216 * i)alignment length is less than 95% of alignment length before optimization 217 * ii)rmsd is less than 110% of rmsd at the cycle when condition i) was first satisfied 218 */ 219 private void setParameters() 220 { 221 Dc = 3.0; //Dc = 2.0 222 increase = 0.5; 223 stopLenPer = 0.95; 224 stopRmsdPer = 1.1; 225 stopRmsd = -1.0; 226 rmsdCut = 3.0; 227 gapIni = 5.0; 228 gapExt = 0.5; 229 } 230 231 232 /** 233 * superimpose two structures according to the equivalent residues 234 */ 235 private void superimposeBySet () 236 throws StructureException 237 { 238 239 //extract the coordinations of equivalent residues 240 Atom[] tmp1 = new Atom[equLen]; 241 Atom[] tmp2 = new Atom[equLen]; 242 int i, r1, r2; 243 244 245 for(i = 0; i < equLen; i ++) { 246 r1 = equSet[0][i]; 247 r2 = equSet[1][i]; 248 249 tmp1[i] = cod1[ r1 ]; 250 tmp2[i] = (Atom)cod2[ r2 ].clone(); // have to be cloned! 251 //tmp2[i] = cod2[ r2 ]; 252 253 254 /*try { 255 System.out.println("before superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]])); 256 } catch (Exception e){ 257 e.printStackTrace(); 258 }*/ 259 } 260 261 //superimpose the equivalent residues 262 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(tmp1), 263 Calc.atomsToPoints(tmp2)); 264 265 Calc.transform(tmp2, trans); 266 267 // weird, why does it take the RMSD before the rotation? 268 // the rmsd is only for the subset contained in the tmp arrays. 269 rmsd = Calc.rmsd(tmp1,tmp2); 270 271 //System.err.println("rmsd after superimpose by set: " + rmsd); 272 273 //transform structure 2 according to the superimposition of the equivalent residues 274 Calc.transform(cod2, trans); 275 276// for(i = 0; i < equLen; i ++) { 277// try { 278// System.err.println("after superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]])); 279// } catch (Exception e){ 280// e.printStackTrace(); 281// } 282// } 283 284 } 285 286 287 private void optimize(int maxi) throws StructureException 288 { 289 long optStart = System.currentTimeMillis(); 290 if ( debug) 291 System.out.println("Optimizing up to " + maxi + " iterations.. "); 292 boolean ifstop = true;; 293 int i, alnLen; 294 alnLen = 0; 295 296 int[][] alnList = new int[2][maxLen]; 297 for(i = 0; i < maxi; i ++) { 298 299 //if ( debug){ 300 // System.out.println("optimize iteration: " + i); 301 //} 302 303 calMatrix(); 304 305 FCAlignHelper aln = new FCAlignHelper(sij,pro1Len,pro2Len,gapIni, gapExt); 306 307 //ALIGN0 *aln = new ALIGN0(sij, pro1Len, pro2Len, gapIni, gapExt); 308 alnLen = aln.getAlignPos(alnList); 309 if(alnLen < 3) ifstop = true; //very rare, mark by Y.Y on 5/1/03 310 else ifstop = defineEquPos(alnLen, alnList); 311 312 if(ifstop) break; 313 Dc += increase; 314 315// if (showAlig) 316// if ( i == 0 ) 317// showCurrentAlignment(alnLen, alnList, "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd); 318 } 319 320 if (debug){ 321 if(i < maxi) { 322 System.out.println(String.format(" optimize converged at %d iterations\n", i)); 323 } 324 else System.out.println(" optimize stop without convergence\n"); 325 System.out.println("optimization time: " + (System.currentTimeMillis() - optStart) + " ms."); 326 } 327 328// if (showAlig) 329// showCurrentAlignment(alnLen, alnList, "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd); 330 } 331 332 //-------------------------------------------------------------------------------------------------------- 333 //the definition of matrix between residues: 334 // Sij = Dc^2 - Dij^2 if Dij <= Dc 335 // 0 else 336 //-------------------------------------------------------------------------------------------------------- 337 private void calMatrix() throws StructureException 338 { 339 int i, j; 340 double dis; 341 for(i = 0; i < pro1Len; i ++) { 342 for(j = 0; j < pro2Len; j ++) { 343 dis = Calc.getDistance(cod1[i],cod2[j]); 344 345 if(dis < Dc) { 346 sij[i][j] = Dc - dis; 347 } 348 else { 349 sij[i][j] = 0; 350 } 351 } 352 } 353 } 354 355 /** 356 * the equivalent residues: residues where Dij <= Dc and i,j is an aligned pair 357 * use the previous superimposing 358 */ 359 private boolean defineEquPos(int alnLen, int[][] alnList) 360 throws StructureException 361 { 362 int i, r1, r2; 363 int equLenOld = equLen; 364 int[][] equSetOld = new int[2][equLenOld]; 365 for(i = 0; i < equLen; i ++) { 366 equSetOld[0][i] = equSet[0][i]; 367 equSetOld[1][i] = equSet[1][i]; 368 } 369 double rmsdOld = rmsd; 370 double dis; 371 equLen = 0; 372 //if (debug) 373 // System.out.println(String.format(" OPT: Dc %f, equLenOld %d, rmsdOld %f, alnLen %d", Dc, equLenOld, rmsdOld, alnLen)); 374 for(i = 0; i < alnLen; i ++) { 375 r1 = alnList[0][i]; 376 r2 = alnList[1][i]; 377 dis = Calc.getDistance(cod1[r1],cod2[r2]); 378 if(dis <= Dc) { 379 //System.out.println(r1 + "-" + r2 + " d:" + dis); 380 equSet[0][equLen] = r1; 381 equSet[1][equLen] = r2; 382 equLen ++; 383 } 384 } 385 386 superimposeBySet(); 387 388 //if (debug) 389 // System.out.println(String.format(" OPT: new equLen %d rmsd %f", equLen, rmsd)); 390 391 boolean ifstop = false; 392 393// if (debug) { 394// System.out.print(" OPT: rmsd diff: " + Math.abs(rmsd - rmsdOld) + " equLens: " + equLenOld + ":"+ equLen); 395// if ( Math.abs(rmsd - rmsdOld) < 1e-10) 396// System.out.println(" NO DIFF!"); 397// else 398// System.out.println(" DIFF!"); 399// } 400 401 if((Math.abs(rmsd - rmsdOld) < 1e-10 ) && (equLenOld == equLen)) keepStep ++; 402 else keepStep = 0; 403 404 if(keepStep > maxKeepStep) { 405 ifstop = true; //converge 406 } //allowing up to maxKeepStep instead of 1 is essential for some special cases 407 else if(stopRmsd < 0) { 408 ifstop = false; //condition 1, continue 409 } 410 else if((rmsd <= stopRmsd * stopRmsdPer) || (rmsd < rmsdCut)) { 411 ifstop = false; //condition 2, continue 412 } //rmsdCut is adopted or not? to be tuned 413 else { 414 ifstop = true; //get worse 415 } 416 417 418 if((stopRmsd <0) && (equLen >= stopLenPer * equLen0)) { 419 //System.err.println("stopRmsd: " + stopRmsd + " setting to rmsd:" + rmsd); 420 stopRmsd = rmsd; //condition 1 421 } 422 423 return ifstop; 424 } 425 426 427 public double optimizeResult(int[] optLen, int optLenpos, int[][] list) 428 { 429 optLen[optLenpos] = equLen; 430 431 for(int i = 0; i < equLen; i ++) { 432 list[0][i] = equSet[0][i]; 433 list[1][i] = equSet[1][i]; 434 } 435 return rmsd; 436 } 437 438}