001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Sep 25, 2009 021 * Author: Andreas Prlic 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027import org.biojava.nbio.core.alignment.matrices.ScaledSubstitutionMatrix; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.structure.*; 030import org.biojava.nbio.structure.align.model.AFP; 031import org.biojava.nbio.structure.align.model.AFPChain; 032import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 033import org.biojava.nbio.structure.jama.Matrix; 034import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 035import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 036 037import java.util.ArrayList; 038import java.util.List; 039 040 041 042/** This is based on the original Combinatorial Extension (CE) source code from 2003 or 2004 (CE version 2.3), 043 * as has been originally developed by I. Shindyalov and P.Bourne (1998). 044 * The original CE paper is available from here: <a href="http://peds.oxfordjournals.org/cgi/content/short/11/9/739">http://peds.oxfordjournals.org/cgi/content/short/11/9/739</a>. 045 * 046 * This class is a pretty much exact 1:1 port from C, where I cared about exact reproduce of the CE results 047 * and not about Java style. 048 * 049 * @author Spencer Bliven 050 * @author Andreas Prlic 051 * 052 053 * 054 */ 055public class CeCalculatorEnhanced { 056 057 protected static final boolean isPrint = true; 058 private static final boolean showAlignmentSteps = true; 059 private static final boolean debug = true; 060 061 int[] f1; 062 int[] f2; 063 double[][]dist1; 064 double[][]dist2; 065 protected double[][]mat; 066 protected int[] bestTrace1; 067 protected int[] bestTrace2; 068 protected int[][] bestTraces1; 069 protected int[][] bestTraces2; 070 protected int nBestTrace; 071 protected int nBestTraces; 072 double d_[] = new double[20]; 073 protected int[] bestTracesN; 074 protected double bestTraceScore; 075 protected int nTrace; 076 protected double[] bestTracesScores; 077 protected int[] trace1; 078 protected int[] trace2; 079 080 protected static final double zThr=-0.1; 081 082 long timeStart ; 083 long timeEnd; 084 private int nAtom; 085 086 // the equivalent positions in the alignment... 087 private int[] align_se1; 088 private int[] align_se2; 089 090 091 private int alignmentPositionOrLength; 092 private int[] bestTraceLen; 093 private Matrix r; 094 private Atom t; 095 protected int nTraces; 096 097 private double z; 098 private int[] traceIndexContainer; 099 100 protected CeParameters params; 101 // SHOULD these fields be PARAMETERS? 102 103 protected static final int nIter = 1; 104 private static final boolean distAll = false; 105 106 List<MatrixListener> matrixListeners; 107 108 public static final boolean GLOBAL_ALIGN1 = false; 109 public static final boolean GLOBAL_ALIGN2 = false; 110 111 public CeCalculatorEnhanced(CeParameters params){ 112 timeStart = System.currentTimeMillis(); 113 dist1= new double[0][0]; 114 dist2= new double[0][0]; 115 this.params = params; 116 matrixListeners = new ArrayList<MatrixListener>(); 117 118 } 119 120 /** 121 * 122 * @param afpChain A new AFPChain, which will be filled in by this function 123 * @param ca1 124 * @param ca2 125 * @return afpChain 126 * @throws StructureException 127 */ 128 public AFPChain extractFragments(AFPChain afpChain, 129 Atom[] ca1, Atom[] ca2) throws StructureException{ 130 131 int nse1 = ca1.length; 132 int nse2 = ca2.length; 133 134 afpChain.setCa1Length(nse1); 135 afpChain.setCa2Length(nse2); 136 137 int traceMaxSize=nse1<nse2?nse1:nse2; 138 139 f1 = new int[nse1]; 140 f2 = new int[nse2]; 141 142 dist1 = initIntraDistmatrix(ca1, nse1); 143 dist2 = initIntraDistmatrix(ca2, nse2); 144 145 146 if ( debug ) 147 System.out.println("parameters: " + params); 148 149 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 150 if ( params.getSeqWeight() < 1) 151 params.setSeqWeight(2); 152 } 153 154 int winSize = params.getWinSize(); 155 156 int winSizeComb1 = (winSize-1)*(winSize-2)/2; 157 158 traceIndexContainer = new int[traceMaxSize]; 159 160 // CE: unused code. distAll is always false and both loops do the same??? 161 // CE v2.3 calls this Weight factors for trace extension 162 if(distAll ) { 163 for(int i=0; i<traceMaxSize; i++) 164 traceIndexContainer[i]=(i+1)*i*winSize*winSize/2+(i+1)*winSizeComb1; 165 } else { 166 for(int i=0; i<traceMaxSize; i++) { 167 traceIndexContainer[i]=(i+1)*i*winSize/2+(i+1)*winSizeComb1; 168 169 170 } 171 } 172 173 // verified: a[] is set correctly. 174 175 mat = initSumOfDistances(nse1, nse2, winSize, winSizeComb1, ca1, ca2); 176 177 178 179 // try { 180 // Matrix m2 = new Matrix(mat).copy(); 181 // JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 182 // JFrame frame = new JFrame(); 183 // frame.addWindowListener(new WindowAdapter(){ 184 // public void windowClosing(WindowEvent e){ 185 // JFrame f = (JFrame) e.getSource(); 186 // f.setVisible(false); 187 // f.dispose(); 188 // } 189 // }); 190 // 191 // 192 // frame.getContentPane().add(panel); 193 // 194 // frame.pack(); 195 // frame.setVisible(true); 196 // } catch (Exception e) { 197 // e.printStackTrace(); 198 // } 199 200 201 // Set the distance matrix 202 //afpChain.setDistanceMatrix(new Matrix(mat.clone())); 203 204 205 // 206 // double rmsdThr = params.getRmsdThr(); 207 // StringBuffer buf = new StringBuffer(" "); 208 // for(int i=0; i<nse2; i++) 209 // buf.append(String.format("%c", i%10==0?(i%100)/10+48:32)); 210 // buf.append("\n"); 211 // for(int i=0; i<nse1; i++) { 212 // buf.append(String.format("%c ", i%10==0?(i%100)/10+48:32)); 213 // for(int j=0; j<nse2; j++) 214 // buf.append(String.format("%c", (mat[i][j])<rmsdThr?'+':'X')); 215 // //printf("%c", ((int)*(mat[i]+j)/40)>9?'*':((int)*(mat[i]+j)/40)+48); 216 // buf.append("\n"); 217 // } 218 // buf.append("\n"); 219 // 220 // System.out.println(buf.toString()); 221 // 222 223 224 return afpChain; 225 } 226 227 /** 228 * Evaluates the distance between two atoms 229 * Several scoring functions are implemented and can be changed by calling 230 * {@link CeParameters#setScoringStrategy(Integer) setScoringStrategy()} 231 * on {@link CeParameters parameter} object this CECalculator was created with. 232 * <p> 233 * Scoring Strategies:<dl> 234 * <dt>DEFAULT_SCORING_STRATEGY</dt> 235 * <dd>Strategy of the original CE publication; CA-CA distance</dd> 236 * 237 * <dt>SIDE_CHAIN_SCORING</dt> 238 * <dd>CB-CB distance. This performs better for sheets and helices than CA.</dd> 239 * 240 * <dt>SIDE_CHAIN_ANGLE_SCORING</dt> 241 * <dd>Use the dot product (eg the cosine) of the two CA-CB vectors.</dd> 242 * 243 * <dt>CA_AND_SIDE_CHAIN_ANGLE_SCORING</dt> 244 * <dd>Equivalent to DEFAULT_SCORING_STRATEGY + SIDE_CHAIN_ANGLE_SCORING</dd> 245 * </dl> 246 * 247 * <dt>SEQUENCE_CONSERVATION</dt> 248 * <dd>A mix between the DEFAULT_SCORING_STRATEGY and a scoring function that favors the alignment of sequence conserved positions in the alignment</dd> 249 * </dl> 250 * 251 * 252 * 253 * @param ca1 The CA of the first residue 254 * @param ca2 The CA of the second residue 255 * @return The distance between the two fragments, according to the selected 256 * scoring strategy. Lower distances are better alignments. 257 * @throws StructureException 258 */ 259 private double getDistanceWithSidechain(Atom ca1, Atom ca2) throws StructureException { 260 261 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_SCORING) { 262 263 return Calc.getDistance(ca1,ca2); 264 265 } 266 267 double dist; 268 Group g1 = ca1.getGroup(); 269 Atom cb1 = null; 270 if ( g1.hasAtom(StructureTools.CB_ATOM_NAME)) { 271 cb1 = g1.getAtom(StructureTools.CB_ATOM_NAME); 272 } 273 // 274 Group g2 = ca2.getGroup(); 275 Atom cb2 = null; 276 if ( g2.hasAtom(StructureTools.CB_ATOM_NAME)) { 277 cb2 = g2.getAtom(StructureTools.CB_ATOM_NAME); 278 } 279 280 281 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_SCORING) { 282 283 284 // here we are using side chain orientation for scoring... 285 286 287 // score type 1 consider side chain distances 288 if ( cb1 != null && cb2 != null) { 289 // CB distance 290 dist = Calc.getDistance(cb1,cb2); 291 //dist = dist / 2.; 292 } else { 293 dist = Calc.getDistance(ca1,ca2); 294 } 295 296 return dist; 297 } 298 299 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_ANGLE_SCORING){ 300 301 // score type 2 add angle info 302 303 304 if ( cb1 != null && cb2 != null) { 305 // If the CA were overlaid, what is the distance between the CB? 306 // Recall c^2 = a^2 + b^2 -2ab*cos(theta), so this is a function of angle 307 Atom c1 = Calc.subtract(cb1, ca1); 308 Atom c2 = Calc.subtract(cb2, ca2); 309 Atom newA = Calc.subtract(c2, c1); 310 dist = Calc.amount(newA); 311 } else { 312 //dist += Calc.getDistance(ca1,ca2); 313 dist = 0; 314 } 315 316 return dist; 317 318 } 319 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_AND_SIDE_CHAIN_ANGLE_SCORING){ 320 321 // score type 3 322 // CA distance + cos(angle) 323 dist = 0; 324 if ( cb1 != null && cb2 != null) { 325 Atom cacb1 = Calc.subtract(cb1, ca1); 326 Atom cacb2 = Calc.subtract(cb2, ca2); 327 Atom newA = Calc.subtract(cacb2, cacb1); 328 //System.out.format("CACB 1: %s\nCACB 2: %s\ndiff: %s\nd: %f\n",cacb1.toString(),cacb2.toString(),newA.toString(),Calc.amount(newA)); 329 dist += Calc.amount(newA); 330 } 331 dist += Calc.getDistance(ca1,ca2); 332 333 return dist; 334 335 } else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 336 if ( cb1 != null && cb2 != null) { 337 // CB distance 338 dist = Calc.getDistance(cb1,cb2); 339 //dist = dist / 2.; 340 } else { 341 dist = Calc.getDistance(ca1,ca2); 342 } 343 return dist; 344 345 346 } 347 else { 348 // unsupported scoring scheme 349 return Calc.getDistance(ca1,ca2); 350 } 351 } 352 353 /** build up intramolecular distance matrix dist1 & dist2 354 * 355 * @param ca 356 * @param nse 357 * @return 358 * @throws StructureException 359 */ 360 private double[][] initIntraDistmatrix(Atom[] ca, int nse) throws StructureException 361 { 362 363 364 double[][] intraDist = new double[nse][nse]; 365 366 // 367 for(int ise1=0; ise1<nse; ise1++) { 368 369 for(int ise2=0; ise2<nse; ise2++) { 370 intraDist[ise1][ise2] = getDistanceWithSidechain(ca[ise1], ca[ise2]); 371 372 } 373 } 374 return intraDist; 375 } 376 377 378 public double[][] initSumOfDistances(int nse1, int nse2, int winSize, int winSizeComb1, Atom[] ca1, Atom[] ca2) { 379 380 double d; 381 382 double[][] mat = new double[nse1][nse2]; 383 384 // init the initial mat[] array. 385 // at this stage mat contains the sum of the distances of fragments of the matrices dist1, dist 386 for(int ise1=0; ise1<nse1; ise1++) { 387 for(int ise2=0; ise2<nse2; ise2++) { 388 389 mat[ise1][ise2]=-1.0; 390 391 if(ise1>nse1-winSize || ise2>nse2-winSize) continue; 392 393 d=0.0; 394 // this sums up over the distances of the fragments 395 for(int is1=0; is1<winSize-2; is1++) 396 for(int is2=is1+2; is2<winSize; is2++) { 397 //System.out.println("pos1 :" + (ise1+is1) + " " + (ise1+is2) + " " + (ise2+is1) + " " + (ise2+is2)); 398 // is this abs or floor? check! 399 d+=Math.abs(dist1[ise1+is1][ise1+is2]-dist2[ise2+is1][ise2+is2]); 400 } 401 mat[ise1][ise2]=d/winSizeComb1; 402 403 //System.out.println("mat ["+ise1+"]["+ise2+"]="+mat[ise1][ise2]); 404 } 405 406 } 407 408 // verified: mat[][] probably ok. 409 410 return mat; 411 } 412 413 414 415 416 417 418 @SuppressWarnings("unused") 419 public void traceFragmentMatrix( AFPChain afpChain, 420 Atom[] ca1, Atom[] ca2) { 421 422 double rmsdThr = params.getRmsdThr(); 423 424 425 double oldBestTraceScore=10000.0; 426 bestTraceScore = 100.0; 427 nBestTrace=0; 428 int nBestTrace0 = 0; 429 int winSize = params.getWinSize(); 430 int winSizeComb1=(winSize-1)*(winSize-2)/2; 431 boolean distAll = false; 432 433 int winSizeComb2=distAll?winSize*winSize:winSize; 434 double rmsdThrJoin = params.getRmsdThrJoin(); 435 436 double z0; 437 438 //double bestTraceZScore=-1.0; 439 440 int nse1 = ca1.length; 441 int nse2 = ca2.length; 442 443 //System.out.println("nse1 :" +nse1 + " nse2: " + nse2); 444 445 int traceMaxSize=nse1<nse2?nse1:nse2; 446 447 bestTrace1 = new int [traceMaxSize]; 448 bestTrace2 = new int [traceMaxSize]; 449 trace1 = new int [traceMaxSize]; 450 trace2 = new int [traceMaxSize]; 451 452 int[] traceIndex = new int [traceMaxSize]; 453 int[] traceIterLevel = new int [traceMaxSize]; 454 455 int ise11; 456 int ise12; 457 int ise21; 458 int ise22; 459 460 int ise1; 461 int ise2; 462 463 int gapMax=params.getMaxGapSize(); 464 465 int iterDepth; 466 if ( gapMax > 0){ 467 iterDepth =gapMax*2+1; 468 } else { 469 iterDepth = traceMaxSize; 470 } 471 double[][] traceScore = new double[traceMaxSize][iterDepth]; 472 473 nTraces =0; 474 long tracesLimit=(long)5e7; 475 double score =-1; 476 double score0 = -1; 477 double score1 = -1 ; 478 double score2 = -1; 479 480 int mse1; 481 int mse2; 482 int jgap; 483 int jdir; 484 int jse1=0; 485 int jse2=0; 486 487 int bestTracesMax=30; 488 bestTraces1 = new int[bestTracesMax][traceMaxSize]; 489 bestTraces2 = new int[bestTracesMax][ traceMaxSize]; 490 bestTracesN=new int [bestTracesMax]; 491 bestTracesScores = new double [bestTracesMax]; 492 for(int it=0; it<bestTracesMax; it++) { 493 bestTracesN[it]=0; 494 bestTracesScores[it]=100; 495 } 496 497 nBestTraces=0; 498 int newBestTrace=0; 499 500 double traceTotalScore=0; 501 double traceScoreMax =0; 502 double userRMSDMax = params.getMaxOptRMSD(); 503 int kse1; 504 int kse2; 505 506 iterLoop: 507 for(int iter=0; iter<nIter; iter++) { 508 509 if(iter>2) { 510 if(oldBestTraceScore<=bestTraceScore) break; 511 } 512 oldBestTraceScore=bestTraceScore; 513 514 if(iter==1) { 515 z0=zStrAlign(winSize, nBestTrace, bestTraceScore, 516 bestTrace1[nBestTrace]+winSize-bestTrace1[0]+ 517 bestTrace2[nBestTrace]+winSize-bestTrace2[0]- 518 nBestTrace*2*winSize); 519 if(z0<zThr) break; 520 nBestTrace0=nBestTrace; 521 nBestTrace=0; 522 bestTraceScore=100.0; 523 524 nTraces=0; 525 } 526 527 528 if(iter==0) { 529 ise11=0; ise12=nse1; 530 ise21=0; ise22=nse2; 531 532 } 533 else { 534 if(iter==1) { 535 ise11=bestTrace1[0]; ise12=bestTrace1[0]+1; 536 ise21=bestTrace2[0]; ise22=bestTrace2[0]+1; 537 } 538 else { 539 ise11=bestTrace1[0]-1; ise12=bestTrace1[0]+2; 540 ise21=bestTrace2[0]-1; ise22=bestTrace2[0]+2; 541 } 542 if(ise11<0) ise11=0; 543 if(ise12>nse1) ise12=nse1; 544 if(ise21<0) ise21=0; 545 if(ise22>nse2) ise22=nse2; 546 } 547 548 //System.out.println("ise1Loop: " + ise11 + " " + ise12 + " " + ise21 + " " + ise22); 549 ise1Loop: 550 for(int ise1_=ise11; ise1_<ise12; ise1_++) { 551 ise2Loop: 552 for(int ise2_=ise21; ise2_<ise22; ise2_++) { 553 554 ise1=ise1_; 555 ise2=ise2_; 556 if(iter>1 && ise1==ise11+1 && ise2==ise21+1) continue ise2Loop; 557 558 //if(ise2==ise21) System.out.println(String.format("(%d, %d)",ise1, nTraces)); 559 560 561 if(iter==0 && (ise1>nse1-winSize*(nBestTrace-1) || 562 ise2>nse2-winSize*(nBestTrace-1))) continue ise2Loop; 563 564 if(mat[ise1][ise2]<0.0) continue ise2Loop; 565 if(mat[ise1][ise2]>rmsdThr) continue ise2Loop; 566 if (mat[ise1][ise2]>userRMSDMax) continue ise2Loop; 567 nTrace=0; 568 trace1[nTrace]=ise1; 569 trace2[nTrace]=ise2; 570 traceIndex[nTrace]=0; 571 traceIterLevel[nTrace]=0; 572 573 score0=mat[ise1][ise2]; 574 575 576 nTrace++; 577 boolean isTraceUp=true; 578 int traceIndex_=0; 579 580 traceLoop: 581 while(nTrace>0) { 582 583 kse1=trace1[nTrace-1]+winSize; 584 kse2=trace2[nTrace-1]+winSize; 585 586 //System.out.println("isTraceUp " + isTraceUp + " " + nTrace + " " + kse1 + " " + kse2); 587 588 while(true) { 589 if(kse1>nse1-winSize-1) break; 590 if(kse2>nse2-winSize-1) break; 591 if(mat[kse1][kse2]>=0.0) break; 592 kse1++; 593 kse2++; 594 } 595 596 597 traceIndex_=-1; 598 599 if(isTraceUp) { 600 601 int nBestExtTrace=nTrace; 602 double bestExtScore=100.0; 603 604 605 // extension of the alignment path 606 // condition 4, 5 607 itLoop: 608 for(int it=0; it<iterDepth; it++) { 609 610 jgap=(it+1)/2; 611 jdir=(it+1)%2; 612 613 if(jdir==0) { 614 mse1=kse1+jgap; 615 mse2=kse2; 616 } 617 else { 618 mse1=kse1; 619 mse2=kse2+jgap; 620 } 621 622 if(mse1>nse1-winSize-1) continue itLoop; 623 if(mse2>nse2-winSize-1) continue itLoop; 624 625 if(mat[mse1][mse2]<0.0) continue itLoop; 626 if(mat[mse1][mse2]>rmsdThr) continue itLoop; 627 if(mat[mse1][mse2]>userRMSDMax) continue itLoop; 628 629 nTraces++; 630 if(nTraces>tracesLimit) { 631 632 return; 633 } 634 635 score=0.0; 636 637 // if(!distAll) { 638 //System.out.println("getting score " + mse1 + " " + mse2 + " " + winSize + " " + jgap + " " + jdir + " " + it + " " + kse1 + " " + kse2); 639 score = getScoreFromDistanceMatrices(mse1,mse2,winSize); 640 //System.out.println("got score: " + score); 641 score1=score/(nTrace*winSize); 642 643 // } else { 644 // // all dist 645 // for(int itrace=0; itrace<nTrace; itrace++) { 646 // for(int is1=0; is1<winSize; is1++) 647 // for(int is2=0; is2<winSize; is2++) 648 // score+=Math.abs(dist1[trace1[itrace]+is1][mse1+is2]- 649 // dist2[trace2[itrace]+is1][mse2+is2]); 650 // } 651 // score1=score/(nTrace*winSize*winSize); 652 // } 653 654 655 //System.out.println("up: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + winSize + " " + traceIndex_ + " " + it + " "); 656 if(score1>rmsdThrJoin) 657 continue itLoop; 658 if(score1>userRMSDMax) 659 continue itLoop; 660 score2=score1; 661 662 // this just got checked, no need to check again.. 663 //if(score2>rmsdThrJoin) 664 // continue itLoop; 665 666 if(nTrace>nBestExtTrace || (nTrace==nBestExtTrace && 667 score2<bestExtScore)) { 668 //System.out.println("setting traceindex to " + it + " " + score2); 669 bestExtScore=score2; 670 nBestExtTrace=nTrace; 671 traceIndex_=it; 672 traceScore[nTrace-1][traceIndex_]=score1; 673 } 674 675 } 676 } 677 678 if(traceIndex_!=-1) { 679 jgap=(traceIndex_+1)/2; 680 jdir=(traceIndex_+1)%2; 681 if(jdir==0) { 682 jse1=kse1+jgap; 683 jse2=kse2; 684 } 685 else { 686 jse1=kse1; 687 jse2=kse2+jgap; 688 } 689 690 if(iter==0){ 691 692 score1=(traceScore[nTrace-1][traceIndex_]*winSizeComb2*nTrace+ 693 mat[jse1][jse2]*winSizeComb1)/(winSizeComb2*nTrace+ 694 winSizeComb1); 695 696 score2 = getScore2(jse1, jse2, traceScore, traceIndex_, traceIndex, winSizeComb1, winSizeComb2, score0, score1); 697 698 if(score2>rmsdThrJoin) 699 traceIndex_=-1; 700 else if ( score2 > userRMSDMax) 701 traceIndex_=-1; 702 else { 703 traceScore[nTrace-1][traceIndex_]=score2; 704 705 traceTotalScore=score2; 706 } 707 708 } 709 else { 710 if(traceScoreMax>rmsdThrJoin && nBestTrace>=nBestTrace0) 711 traceIndex_=-1; 712 traceTotalScore=traceScoreMax; 713 } 714 } 715 716 //System.out.println("middle: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + score2 + " " + traceIndex_); 717 718 if(traceIndex_==-1) { 719 //System.out.println("continue traceLoop " + nTrace); 720 //if(iterLevel==1) break; 721 nTrace--; 722 isTraceUp=false; 723 continue traceLoop; 724 } 725 else { 726 traceIterLevel[nTrace-1]++; 727 trace1[nTrace]=jse1; 728 trace2[nTrace]=jse2; 729 traceIndex[nTrace]=traceIndex_; 730 traceIterLevel[nTrace]=0; 731 nTrace++; 732 isTraceUp=true; 733 734 if(nTrace>nBestTrace || 735 (nTrace==nBestTrace && 736 bestTraceScore>traceTotalScore)) { 737 738 for(int itrace=0; itrace<nTrace; itrace++) { 739 bestTrace1[itrace]=trace1[itrace]; 740 bestTrace2[itrace]=trace2[itrace]; 741 } 742 bestTraceScore=traceTotalScore; 743 nBestTrace=nTrace; 744 } 745 746 if(iter==0) { 747 //System.out.println("doing iter0 " + newBestTrace + " " + traceTotalScore + " " + bestTracesMax); 748 newBestTrace = doIter0(newBestTrace,traceTotalScore, bestTracesMax); 749 750 751 } 752 } 753 } 754 } 755 } 756 757 if ( isPrint) { 758 System.out.println("fragment length: " + params.getWinSize()); 759 System.out.println("ntraces : " + nTraces ); 760 } 761 762 763 764 } 765 766 // try { 767 // Matrix m2 = new Matrix(traceScore).copy(); 768 // JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 769 // JFrame frame = new JFrame(); 770 // frame.addWindowListener(new WindowAdapter(){ 771 // public void windowClosing(WindowEvent e){ 772 // JFrame f = (JFrame) e.getSource(); 773 // f.setVisible(false); 774 // f.dispose(); 775 // } 776 // }); 777 // 778 // 779 // frame.getContentPane().add(panel); 780 // 781 // frame.pack(); 782 // frame.setVisible(true); 783 // } catch (Exception e) { 784 // e.printStackTrace(); 785 // } 786 787 788 if ( params.isShowAFPRanges()){ 789 System.out.println("fragment length: " + params.getWinSize()); 790 System.out.println("ntraces : " + nTraces ); 791 792 } 793 794 } 795 796 protected double getScore2(int jse1, int jse2, double[][] traceScore, int traceIndex_,int[] traceIndex,int winSizeComb1, int winSizeComb2, double score0, double score1 ) { 797 798 799 800 /*double score2= 801 ((nTrace>1?traceScore[nTrace-2][traceIndex[nTrace-1]]:score0) 802 *a[nTrace-1]+score1*(a[nTrace]-a[nTrace-1]))/a[nTrace]; 803 */ 804 double val = 0; 805 if ( nTrace>1) 806 val =traceScore[nTrace-2][traceIndex[nTrace-1]]; 807 else 808 val = score0; 809 810 double score2 = (val * traceIndexContainer[nTrace-1]+score1*(traceIndexContainer[nTrace]-traceIndexContainer[nTrace-1]))/traceIndexContainer[nTrace]; 811 812 //System.out.println("check: score0 " + score0 + " score 1 " + score1 + " sc2: " + score2 + " val: " + val + " nTrace:" + nTrace+ " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace] ); 813 814 return score2; 815 816 817 } 818 819 protected int doIter0(int newBestTrace, double traceTotalScore, double bestTracesMax) { 820 821 822 // only do the whole method if this criteria is fulfilled... 823 if(nTrace>bestTracesN[newBestTrace] || 824 (nTrace==bestTracesN[newBestTrace] && 825 bestTracesScores[newBestTrace]>traceTotalScore)) { 826 827 828 for(int itrace=0; itrace<nTrace; itrace++) { 829 bestTraces1[newBestTrace][itrace]=trace1[itrace]; 830 bestTraces2[newBestTrace][itrace]=trace2[itrace]; 831 bestTracesN[newBestTrace]=nTrace; 832 bestTracesScores[newBestTrace]=traceTotalScore; 833 //System.out.println("bestTracesScrores ["+newBestTrace+"]=" +traceTotalScore); 834 } 835 836 if(nTrace>nBestTrace) nBestTrace=nTrace; 837 838 if(nBestTraces<bestTracesMax) { 839 nBestTraces++; 840 newBestTrace++; 841 } 842 843 if(nBestTraces==bestTracesMax) { 844 //System.out.println("nBestTraces == bestTracesmax " + nBestTraces); 845 newBestTrace=0; 846 double scoreTmp=bestTracesScores[0]; 847 int nTraceTmp=bestTracesN[0]; 848 for(int ir=1; ir<nBestTraces; ir++) { 849 if(bestTracesN[ir]<nTraceTmp || 850 (bestTracesN[ir]==nTraceTmp && 851 scoreTmp<bestTracesScores[ir])) { 852 nTraceTmp=bestTracesN[ir]; 853 scoreTmp=bestTracesScores[ir]; 854 newBestTrace=ir; 855 //System.out.println("setting new bestTracesScore to " + ir + " " + scoreTmp); 856 } 857 } 858 } 859 } 860 861 //System.out.println("iter0 : " + newBestTrace + " " + bestTracesN[newBestTrace] + " " + traceTotalScore + " " + nTrace); 862 863 864 865 /* 866z=zStrAlign(winSize, nTrace, traceTotalScore, 867trace1[nTrace-1]-trace1[0]+trace2[nTrace-1]-trace2[0]- 8682*(nTrace-1)*winSize); 869if(z>bestTraceZScore) { 870for(int itrace=0; itrace<nTrace; itrace++) { 871bestTrace1[itrace]=trace1[itrace]; 872bestTrace2[itrace]=trace2[itrace]; 873} 874bestTraceZScore=z; 875bestTraceScore=*(traceScore[nTrace-2]+traceIndex_); 876nBestTrace=nTrace; 877} 878 */ 879 return newBestTrace; 880 881 } 882 883 884 protected double getScoreFromDistanceMatrices(int mse1, int mse2,int winSize) { 885 886 double score = 0; 887 // (winSize) "best" dist 888 889 // reduce sign. values to C code.. 6 digits.. 890 891 for(int itrace=0; itrace<nTrace; itrace++) { 892 score+= Math.abs(dist1[trace1[itrace]][mse1]- 893 dist2[trace2[itrace]][mse2]); 894 895 score+= Math.abs(dist1[trace1[itrace]+winSize-1] 896 [mse1+winSize-1]- 897 dist2[trace2[itrace]+winSize-1][mse2+winSize-1]); 898 899 for(int id=1; id<winSize-1; id++) 900 score+= Math.abs(dist1[trace1[itrace]+id][mse1+winSize-1-id]- 901 dist2[trace2[itrace]+id][mse2+winSize-1-id]); 902 903 } 904 905 return score; 906 } 907 908 public void nextStep( AFPChain afpChain, 909 Atom[] ca1, Atom[] ca2) throws StructureException{ 910 911 912 if(nBestTrace>0) { 913 checkBestTraces(afpChain,ca1,ca2); 914 } else { 915 noBestTrace(); 916 } 917 918 convertAfpChain(afpChain, ca1, ca2); 919 AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2); 920 } 921 922 923 924 // this part is modified from the original CeCalculator 925 @SuppressWarnings("unused") 926 private void checkBestTraces( AFPChain afpChain, 927 Atom[] ca1, Atom[] ca2) throws StructureException{ 928 929 z=0.0; 930 931 932 int nGaps; 933 int winSize = params.getWinSize(); 934 int nse1 = ca1.length; 935 int nse2 = ca2.length; 936 int traceMaxSize=nse1<nse2?nse1:nse2; 937 int idir; 938 939 940 align_se1=new int [nse1+nse2]; 941 align_se2=new int [nse1+nse2]; 942 alignmentPositionOrLength = 0; 943 944 // we now support alignment using any particular atoms.. 945 946 Atom[] strBuf1 = new Atom[traceMaxSize]; 947 Atom[] strBuf2 = new Atom[traceMaxSize]; 948 949 double rmsdNew; 950 951 952 953 // removing some loops that are run in orig CE 954 // and which did not do anything 955 if ( debug ){ 956 checkPrintRmsdNew(traceMaxSize, winSize, ca1, ca2); 957 } 958 959 double rmsd=100.0; 960 961 int iBestTrace=0; 962 963 for(int ir=0; ir<nBestTraces; ir++) { 964 if(bestTracesN[ir]!=nBestTrace) continue; 965 966 rmsdNew = getRMSDForBestTrace(ir, strBuf1, strBuf2, bestTracesN,bestTraces1, bestTrace2,winSize,ca1,ca2); 967 if ( isPrint) 968 System.out.println(String.format("%d %d %d %.2f", ir, bestTracesN[ir], nBestTrace, rmsdNew)); 969 970 if(rmsd>rmsdNew) { 971 iBestTrace=ir; 972 rmsd=rmsdNew; 973 //System.out.println(" iBestTrace:" + iBestTrace + " new rmsd = " + rmsd); 974 } 975 } 976 for(int it=0; it<bestTracesN[iBestTrace]; it++) { 977 bestTrace1[it]=bestTraces1[iBestTrace][it]; 978 bestTrace2[it]=bestTraces2[iBestTrace][it]; 979 } 980 981 //System.out.println("iBestTrace: "+iBestTrace+" = bestTracesScores " + bestTracesScores[iBestTrace]); 982 983 nBestTrace=bestTracesN[iBestTrace]; 984 985 bestTraceScore=bestTracesScores[iBestTrace]; 986 987 988 //printf("\nOptimizing gaps...\n"); 989 990 int[] traceLen=new int [traceMaxSize]; 991 bestTraceLen=new int [traceMaxSize]; 992 993 994 int strLen=0; 995 996 int jt; 997 strLen=0; 998 nGaps=0; 999 nTrace=nBestTrace; 1000 1001 for(jt=0; jt<nBestTrace; jt++) { 1002 trace1[jt]=bestTrace1[jt]; 1003 trace2[jt]=bestTrace2[jt]; 1004 traceLen[jt]=winSize; 1005 1006 if(jt<nBestTrace-1) { 1007 nGaps+=bestTrace1[jt+1]-bestTrace1[jt]-winSize+ 1008 bestTrace2[jt+1]-bestTrace2[jt]-winSize; 1009 } 1010 } 1011 nBestTrace=0; 1012 for(int it=0; it<nTrace; ) { 1013 int cSize=traceLen[it]; 1014 for(jt=it+1; jt<nTrace; jt++) { 1015 if(trace1[jt]-trace1[jt-1]-traceLen[jt-1]!=0 || 1016 trace2[jt]-trace2[jt-1]-traceLen[jt-1]!=0) break; 1017 cSize+=traceLen[jt]; 1018 } 1019 bestTrace1[nBestTrace]=trace1[it]; 1020 bestTrace2[nBestTrace]=trace2[it]; 1021 bestTraceLen[nBestTrace]=cSize; 1022 nBestTrace++; 1023 strLen+=cSize; 1024 it=jt; 1025 } 1026 1027 1028 int is=0; 1029 for(jt=0; jt<nBestTrace; jt++) { 1030 for(int i=0; i<bestTraceLen[jt]; i++) { 1031 setStrBuf(strBuf1,is+i,ca1,bestTrace1[jt]+i ); 1032 setStrBuf(strBuf2,is+i,ca2,bestTrace2[jt]+i); 1033 } 1034 is+=bestTraceLen[jt]; 1035 } 1036 //sup_str(strBuf1, strBuf2, strLen, d_); 1037 1038 rmsd=calc_rmsd(strBuf1, strBuf2, strLen,true,showAlignmentSteps); 1039 1040 if ( isPrint) 1041 System.out.println("got first rmsd: " + rmsd); 1042 boolean isCopied=false; 1043 1044 outer_loop: 1045 for(int it=1; it<nBestTrace; it++) { 1046 1047 /* not needed... 1048 int igap; 1049 if(bestTrace1[it]-bestTrace1[it-1]-bestTraceLen[it-1]>0) igap=0; 1050 if(bestTrace2[it]-bestTrace2[it-1]-bestTraceLen[it-1]>0) igap=1; 1051 */ 1052 1053 1054 boolean wasBest=false; 1055 main_loop: 1056 for(idir=-1; idir<=1; idir+=2) { 1057 if(wasBest) break; 1058 1059 inner_loop: 1060 for(int idep=1; idep<=winSize/2; idep++) { 1061 1062 if(!isCopied) 1063 for(jt=0; jt<nBestTrace; jt++) { 1064 trace1[jt]=bestTrace1[jt]; 1065 trace2[jt]=bestTrace2[jt]; 1066 traceLen[jt]=bestTraceLen[jt]; 1067 } 1068 isCopied=false; 1069 1070 traceLen[it-1]+=idir; 1071 traceLen[it]-=idir; 1072 trace1[it]+=idir; 1073 trace2[it]+=idir; 1074 1075 is=0; 1076 for(jt=0; jt<nBestTrace; jt++) { 1077 for(int i=0; i<traceLen[jt]; i++) { 1078 if(ca1[trace1[jt]+i].getX()>1e10 || ca2[trace2[jt]+i].getX()>1e10) 1079 continue main_loop; 1080 strBuf1[is+i]=ca1[trace1[jt]+i]; 1081 strBuf2[is+i]=ca2[trace2[jt]+i]; 1082 } 1083 is+=traceLen[jt]; 1084 } 1085 //sup_str(strBuf1, strBuf2, strLen, d_); 1086 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen, true, false); 1087 //System.out.println(String.format("step %d %d %d %.2f old: %.2f", it, idir, idep, rmsdNew, rmsd)); 1088 if(rmsdNew<rmsd) { 1089 1090 for(jt=0; jt<nBestTrace; jt++) { 1091 bestTrace1[jt] = trace1[jt]; 1092 bestTrace2[jt] = trace2[jt]; 1093 bestTraceLen[jt]= traceLen[jt]; 1094 } 1095 isCopied=true; 1096 wasBest=true; 1097 rmsd=rmsdNew; 1098 continue inner_loop; 1099 } 1100 // AP 1101 //bad_ca: break; 1102 continue main_loop; 1103 } 1104 } 1105 } 1106 //if ( showAlignmentSteps) 1107 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen,true, showAlignmentSteps); 1108 if ( isPrint) 1109 System.out.println("rmsdNew: " + rmsdNew + " rmsd " + rmsd); 1110 afpChain.setTotalRmsdIni(rmsdNew); 1111 afpChain.setTotalLenIni(strBuf1.length); 1112 1113 1114 nAtom = strLen; 1115 1116 System.out.println("zStrAlign: " + winSize + " strLen " + strLen + " s/w " + (strLen/winSize) + " " + bestTraceScore + " " + nGaps); 1117 z=zStrAlign(winSize, strLen/winSize, bestTraceScore, nGaps); 1118 1119 if(params.isShowAFPRanges()) { 1120 System.out.println("win size: " + winSize + " strLen/winSize: " + strLen/winSize + " best trace score: " + String.format("%.2f",bestTraceScore) + " nr gaps: " + nGaps + " nr residues: " + nAtom); 1121 1122 System.out.println(String.format("size=%d rmsd=%.2f z=%.1f gaps=%d(%.1f%%) comb=%d", 1123 nAtom, rmsd, z, nGaps, nGaps*100.0/nAtom, 1124 nTraces)); 1125 1126 System.out.println("Best Trace, before optimization"); 1127 for(int k=0; k<nBestTrace; k++) 1128 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1129 bestTraceLen[k])); 1130 1131 } 1132 1133 // start to convert CE internal datastructure to generic AFPChain one... 1134 List<AFP> afpSet = new ArrayList<AFP>(); 1135 for (int afp=0;afp<nBestTrace;afp++){ 1136 // fill in data from nBestTrace into AFP 1137 1138 AFP afpI = new AFP(); 1139 1140 afpI.setFragLen(bestTraceLen[afp]); 1141 afpI.setP1(bestTrace1[afp]+1); 1142 afpI.setP2(bestTrace2[afp]+1); 1143 1144 afpSet.add( afpI); 1145 } 1146 1147 afpChain.setAfpSet(afpSet); 1148 1149 1150 //System.out.println("z:"+z + " zThr" + zThr+ " bestTraceScore " + bestTraceScore + " " + nGaps ); 1151 if(z>=zThr) { 1152 nGaps = optimizeSuperposition(afpChain,nse1, nse2, strLen, rmsd, ca1, ca2,nGaps,strBuf1,strBuf2); 1153 // if(isPrint) { 1154 // /* 1155 // FILE *f=fopen("homologies", "a"); 1156 // fprintf(f, "%s(%d) %s(%d) %3d %4.1f %4.1f %d(%d) ", 1157 // name1, nse1, name2, nse2, nAtom, rmsd, z, 1158 // nGaps, nGaps*100/nAtom); 1159 // for(int k=0; k<nBestTrace; k++) 1160 // fprintf(f, "(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1161 // bestTraceLen[k]); 1162 // fprintf(f, "\n"); 1163 // fclose(f); 1164 // */ 1165 // } 1166 } 1167 else { 1168 int lali_x_ = 0; 1169 for(int k=0; k<nBestTrace; k++) { 1170 for(int l=0; l<bestTraceLen[k]; l++) { 1171 align_se1[alignmentPositionOrLength+l]=bestTrace1[k]+l; 1172 align_se2[alignmentPositionOrLength+l]=bestTrace2[k]+l; 1173 } 1174 lali_x_+=bestTraceLen[k]; 1175 if(k<nBestTrace-1) { 1176 if(bestTrace1[k]+bestTraceLen[k]!=bestTrace1[k+1]) 1177 for(int l=bestTrace1[k]+bestTraceLen[k]; l<bestTrace1[k+1]; l++) { 1178 align_se1[alignmentPositionOrLength]=l; 1179 align_se2[alignmentPositionOrLength]=-1; 1180 alignmentPositionOrLength++; 1181 } 1182 if(bestTrace2[k]+bestTraceLen[k]!=bestTrace2[k+1]) 1183 for(int l=bestTrace2[k]+bestTraceLen[k]; l<bestTrace2[k+1]; l++) { 1184 align_se1[alignmentPositionOrLength]=-1; 1185 align_se2[alignmentPositionOrLength]=l; 1186 alignmentPositionOrLength++; 1187 } 1188 } 1189 } 1190 nAtom=lali_x_; 1191 } 1192 1193 timeEnd = System.currentTimeMillis(); 1194 long time_q=(timeEnd-timeStart); 1195 1196 double gapsP = ( nGaps*100.0/nAtom) ; 1197 if(isPrint) { 1198 String msg = String.format("Alignment length = %d Rmsd = %.2fA Z-Score = %.1f Gaps = %d(%.1f%%)",nAtom,rmsd,z,nGaps, gapsP); 1199 System.out.println(msg + " CPU = " + time_q); 1200 } 1201 1202 // if ( params.isShowAFPRanges()){ 1203 1204 // this is actually the final alignment... 1205 System.out.println("Best Trace: (index1,index2,len)"); 1206 for(int k=0; k<nBestTrace; k++) 1207 System.out.println( 1208 String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, bestTraceLen[k])); 1209 1210 1211 1212 // } 1213 1214 afpChain.setCalculationTime(time_q); 1215 afpChain.setGapLen(nGaps); 1216 1217 int[] optLen = new int[]{nAtom}; 1218 afpChain.setOptLen(optLen); 1219 afpChain.setOptLength(nAtom); 1220 afpChain.setAlnLength(alignmentPositionOrLength); 1221 1222 afpChain.setProbability(z); 1223 1224 1225 } 1226 1227 /** set the Atoms for a particular residue position. 1228 * Requires that atom.getParent returns the correct group! 1229 * take care during cloning of atoms. Best to use StructureTools.cloneCaAtoms(); 1230 * 1231 * @param strBuf 1232 * @param i 1233 * @param ca 1234 * @param j 1235 */ 1236 private void setStrBuf(Atom[] strBuf, int i, Atom[] ca, int j) { 1237 // TODO Auto-generated method stub 1238 //TODO 1239 Group parent = ca[j].getGroup(); 1240 int pos = 0; 1241 String atomName = ca[j].getName(); 1242 1243 Atom a = null; 1244 1245 a= parent.getAtom(atomName); 1246 if ( a != null){ 1247 strBuf[i]=a; 1248 } 1249 else { 1250 // probably a GLY and no CB was found... 1251 //e.printStackTrace(); 1252 } 1253 strBuf[i+pos] = a; 1254 pos++; 1255 1256 1257 1258 } 1259 1260 // TODO: consider all requested Atoms? 1261 private double getRMSDForBestTrace(int ir, Atom[] strBuf1, Atom[] strBuf2, 1262 int[] bestTracesN2, int[][] bestTraces12, int[] bestTrace22, 1263 int winSize,Atom[] ca1, Atom[] ca2 ) throws StructureException { 1264 int is=0; 1265 for(int jt=0; jt<bestTracesN[ir]; jt++) { 1266 for(int i=0; i<winSize; i++) { 1267 1268 setStrBuf(strBuf1, is+i, ca1, bestTraces1[ir][jt]+i); 1269 setStrBuf(strBuf2, is+i, ca2, bestTraces2[ir][jt]+i); 1270 } 1271 is+=winSize; 1272 } 1273 //sup_str(strBuf1, strBuf2, bestTracesN[ir]*winSize, d_); 1274 double rmsdNew=calc_rmsd(strBuf1, strBuf2, bestTracesN[ir]*winSize, true, false); 1275 return rmsdNew; 1276 1277 } 1278 1279 1280 1281 1282 1283 1284 /** calc initial RMSD for bestTrace1 in debug only 1285 * 1286 */ 1287 private void checkPrintRmsdNew(int traceMaxSize, int winSize, Atom[] ca1, Atom[] ca2) throws StructureException{ 1288 1289 int is = 0; 1290 // calc initial RMSD for bestTrace1 1291 Atom[] strBuf1 = new Atom[traceMaxSize]; 1292 Atom[] strBuf2 = new Atom[traceMaxSize]; 1293 for(int jt=0; jt<nBestTrace; jt++) { 1294 for(int i=0; i<winSize; i++) { 1295 setStrBuf(strBuf1, is+i, ca1, bestTrace1[jt]+i ); 1296 setStrBuf(strBuf2, is+i, ca2, bestTrace2[jt]+i ); 1297 } 1298 is+=winSize; 1299 } 1300 1301 //sup_str(strBuf1, strBuf2, nBestTrace*winSize, d_); 1302 double rmsdNew=calc_rmsd(strBuf1, strBuf2, nBestTrace*winSize, true, false); 1303 //afpChain.setTotalRmsdIni(rmsdNew); 1304 1305 1306 if ( isPrint){ 1307 System.out.println("rmsdNew after trace: " +rmsdNew); 1308 1309 for(int k=0; k<nBestTrace; k++) 1310 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1,8)); 1311 } 1312 1313 if ( isPrint){ 1314 System.out.println("best traces: " + nBestTraces); 1315 } 1316 1317 1318 } 1319 1320 1321 1322 1323 1324 1325 private static char getOneLetter(Group g){ 1326 1327 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 1328 1329 return StructureTools.get1LetterCode(g.getPDBName()); 1330 } 1331 1332 1333 1334 private int optimizeSuperposition(AFPChain afpChain, int nse1, int nse2, int strLen, double rmsd, Atom[] ca1, Atom[] ca2,int nGaps, 1335 Atom[] strBuf1, Atom[] strBuf2 ) throws StructureException { 1336 1337 //System.out.println("optimizing Superimposition..."); 1338 1339 //nAtom=strLen; 1340 // optimization on superposition 1341 Atom[] ca3=new Atom[nse2]; 1342 1343 double rmsdLen = 0.0; 1344 1345 // this flag tests if the RMSDLen has been assigned. 1346 // this is to enforce that the alignment ends up 1347 // smaller than 95% of the original alignment. 1348 // +/- 1349 boolean isRmsdLenAssigned=false; 1350 int nAtomPrev=-1; 1351 1352 double oRmsdThr = params.getORmsdThr(); 1353 1354 double distanceIncrement = params.getDistanceIncrement(); 1355 double maxUserRMSD = params.getMaxOptRMSD(); 1356 nAtom=0; 1357 int counter = -1; 1358 1359 int maxNrIterations = params.getMaxNrIterationsForOptimization(); 1360 1361 //double[][] mat = new double[nse1][nse2]; 1362 while((nAtom<strLen*0.95 || 1363 (isRmsdLenAssigned && rmsd<rmsdLen*1.1 && nAtomPrev!=nAtom)) && ( counter< maxNrIterations)) { 1364 1365 counter++; 1366 if ( debug) 1367 System.out.println("nAtom: " + nAtom + " " + nAtomPrev + " " + rmsdLen + " " + isRmsdLenAssigned + " strLen:" + strLen + " nse1,nse2:" + nse1 + " " + nse2); 1368 nAtomPrev=nAtom; 1369 oRmsdThr += distanceIncrement; 1370 1371 rot_mol(ca2, ca3, nse2, r,t); 1372 1373 for(int ise1=0; ise1<nse1; ise1++) { 1374 for(int ise2=0; ise2<nse2; ise2++) { 1375 1376 //mat[ise1][ise2]=-0.001; 1377 1378 // this needs to be a parameter... 1379 1380 1381 double dist = getDistanceWithSidechain(ca1[ise1], ca3[ise2]); 1382 mat[ise1][ise2] = oRmsdThr - dist; 1383 1384 //double distold = Calc.getDistance(ca1[ise1],ca3[ise2]); 1385 //double scoreOld = oRmsdThr - distold ; 1386 //mat[ise1][ise2] = scoreOld; 1387 //mat[ise1][ise2] = oRmsdThr - Calc.getDistance(ca1[ise1],ca3[ise2]); 1388 1389 //if ( counter == 0 && ise1 == ise2) { 1390 1391 // System.out.println("mat[" + ise1 + "][" + ise2 + "] " + mat[ise1][ise2] + " scoreOld:" + scoreOld + " oRmsdThr: " + oRmsdThr +" dist: " + dist + " distold:" + distold ); 1392 // } 1393 1394 1395 } 1396 } 1397 1398 mat = notifyMatrixListener(mat); 1399 1400 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 1401 mat = updateMatrixWithSequenceConservation(mat,ca1,ca2, params); 1402 } 1403 1404 double gapOpen = params.getGapOpen(); 1405 double gapExtension = params.getGapExtension(); 1406 1407 // by default we always do local alignment 1408 double score = dpAlign( nse1, nse2, gapOpen , gapExtension , GLOBAL_ALIGN1, GLOBAL_ALIGN2); 1409 1410 if (debug) { 1411 System.out.println("iter: "+ counter + " score:" + score + " " + " nAtomPrev: " + nAtomPrev + " nAtom:" + nAtom + " oRmsdThr: " + oRmsdThr); 1412 1413 for (int i=0 ; i<alignmentPositionOrLength ; i++){ 1414 if ( align_se2[i] == 172 || align_se2[i] == 173) { 1415 System.out.println("BREAK POINT IS ALIGNED!!!!"); 1416 System.out.println(align_se2[i-1] + " " + align_se2[i] + " " + align_se2[i+1]); 1417 } 1418 } 1419 } 1420 afpChain.setAlignScore(score); 1421 1422 1423 nAtom=0; nGaps=0; 1424 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 1425 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1426 1427 strBuf1[nAtom]=ca1[align_se1[ia]]; 1428 strBuf2[nAtom]=ca2[align_se2[ia]]; 1429 1430 nAtom++; 1431 1432 } 1433 else { 1434 nGaps++; 1435 } 1436 } 1437 1438 for ( int i =0 ; i < strBuf1.length; i++){ 1439 if ( strBuf1[i] == null) 1440 break; 1441 System.out.print(strBuf1[i].getGroup().getChemComp().getOne_letter_code()); 1442 } 1443 System.out.println(); 1444 1445 if(nAtom<4) continue; 1446 1447 //sup_str(strBuf1, strBuf2, nAtom, _d); 1448 // here we don't store the rotation matrix for the user! 1449 rmsd= calc_rmsd(strBuf1, strBuf2, nAtom,false, false); 1450 if ( isPrint ) 1451 System.out.println("iter: " + counter + " nAtom " + nAtom + " rmsd: " + rmsd); 1452 //afpChain.setTotalRmsdOpt(rmsd); 1453 //System.out.println("rmsd: " + rmsd); 1454 1455 if(!(nAtom<strLen*0.95) && (!isRmsdLenAssigned)) { 1456 rmsdLen=rmsd; 1457 isRmsdLenAssigned=true; 1458 } 1459 //System.out.println(String.format("nAtom %d %d rmsd %.1f", nAtom, nAtomPrev, rmsd)); 1460 1461 1462 afpChain.setBlockRmsd(new double[]{rmsd}); 1463 afpChain.setOptRmsd(new double[]{rmsd}); 1464 afpChain.setTotalRmsdOpt(rmsd); 1465 afpChain.setChainRmsd(rmsd); 1466 1467 if ( rmsd >= maxUserRMSD) { 1468 break; 1469 } 1470 1471 } 1472 1473 1474 1475 //System.out.println("done optimizing"); 1476 /* 1477 nAtom=0; nGaps=0; 1478 for(int ia=0; ia<lcmp; ia++) 1479 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1480 if(ca1[align_se1[ia]].X<1e10 && ca2[align_se2[ia]].X<1e10) { 1481 strBuf1[nAtom]=ca1[align_se1[ia]]; 1482 strBuf2[nAtom]=ca2[align_se2[ia]]; 1483 nAtom++; 1484 } 1485 } 1486 else { 1487 nGaps++; 1488 } 1489 1490 sup_str(strBuf1, strBuf2, nAtom, _d); 1491 rmsd=calc_rmsd(strBuf1, strBuf2, nAtom, _d); 1492 */ 1493 nBestTrace=0; 1494 boolean newBestTrace=true; 1495 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 1496 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1497 //System.out.println(" " +align_se1[ia] + " " + align_se2[ia]); 1498 1499 if(newBestTrace) { 1500 bestTrace1[nBestTrace]=align_se1[ia]; 1501 bestTrace2[nBestTrace]=align_se2[ia]; 1502 bestTraceLen[nBestTrace]=0; 1503 newBestTrace=false; 1504 nBestTrace++; 1505 } 1506 bestTraceLen[nBestTrace-1]++; 1507 1508 } 1509 else { 1510 newBestTrace=true; 1511 } 1512 } 1513 1514 return nGaps; 1515 1516 // end of optimization on superposition 1517 1518 } 1519 1520 /** Modifies an alignment matrix by favoring the alignment of similar and identical amino acids and penalizing the alignment of unrelated ones. 1521 * 1522 * @param max alignment matrix 1523 * @param ca1 Atoms for protein 1 1524 * @param ca2 Atoms for Protein 2 1525 * @param params alignment parameters 1526 * @return modified alignment matrix 1527 */ 1528 public static double[][] updateMatrixWithSequenceConservation(double[][] max, Atom[] ca1, Atom[] ca2, CeParameters params) { 1529 Matrix origM = new Matrix(max); 1530 1531 SubstitutionMatrix<AminoAcidCompound> substMatrix = 1532 params.getSubstitutionMatrix(); 1533 1534 int internalScale = 1; 1535 if ( substMatrix instanceof ScaledSubstitutionMatrix) { 1536 ScaledSubstitutionMatrix scaledMatrix = (ScaledSubstitutionMatrix) substMatrix; 1537 internalScale = scaledMatrix.getScale(); 1538 } 1539 1540 1541 AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 1542 1543 for (int i = 0 ; i < origM.getRowDimension() ; i++){ 1544 for ( int j =0; j < origM.getColumnDimension() ; j ++ ) { 1545 double val = origM.get(i,j); 1546 Atom a1 = ca1[i]; 1547 Atom a2 = ca2[j]; 1548 1549 AminoAcidCompound ac1 = 1550 set.getCompoundForString(a1.getGroup().getChemComp().getOne_letter_code()); 1551 AminoAcidCompound ac2 = 1552 set.getCompoundForString(a2.getGroup().getChemComp().getOne_letter_code()); 1553 1554 1555 if ( ac1 == null || ac2 == null) 1556 continue; 1557 1558 short aaScore = substMatrix.getValue(ac1,ac2); 1559 1560 double weightedScore = (aaScore / internalScale) * params.getSeqWeight(); 1561 1562 1563 val += weightedScore; 1564 origM.set(i,j,val); 1565 1566 } 1567 } 1568 max = origM.getArray(); 1569 1570 //SymmetryTools.showMatrix((Matrix)origM.clone(), "in optimizer " + loopCount ); 1571 //SymmetryTools.showMatrix(origM, "iteration matrix " + loopCount + " after"); 1572 1573 return max; 1574 } 1575 1576 private double[][] notifyMatrixListener(double[][] mat2) { 1577 for (MatrixListener li : matrixListeners) { 1578 mat2 = li.matrixInOptimizer(mat2); 1579 } 1580 return mat2; 1581 } 1582 1583 private boolean[][] notifyBreakFlagListener(boolean[][] brkFlag){ 1584 for (MatrixListener li : matrixListeners) { 1585 brkFlag = li.initializeBreakFlag(brkFlag); 1586 } 1587 return brkFlag; 1588 } 1589 1590 public void addMatrixListener(MatrixListener li){ 1591 matrixListeners.add(li); 1592 } 1593 1594 1595 private double dpAlign(int nSeq1, int nSeq2, double gapI, double gapE, 1596 boolean isGlobal1, boolean isGlobal2) { 1597 1598 1599 // isGlobal1,isGlobal2 are always false... 1600 // i.e. we do local alignments by default 1601 1602 int i, j, iStart, jStart, iMax, jMax, k; 1603 boolean hasGapExtensionPenalty=(gapE!=0.0?true:false); 1604 double sum_ret, sum_brk; 1605 1606 boolean[][] brk_flg=new boolean [nSeq1][nSeq2]; 1607 for(i=0; i<nSeq1; i++) brk_flg[i]=new boolean [nSeq2]; 1608 1609 brk_flg = notifyBreakFlagListener(brk_flg); 1610 1611 // ge = true here... 1612 /* 1613 for(i=0; i<nSeq1; i++) 1614 { 1615 printf("\n"); 1616 for(j=0; j<nSeq2; j++) 1617 { 1618 printf("%4d", (int)(*(mat[i]+j)*10)); 1619 } 1620 } 1621 printf("\n\n\n"); 1622 */ 1623 int[][] tracebackMatrix1 = new int[nSeq1][nSeq2]; 1624 int[][] tracebackMatrix2 = new int[nSeq1][nSeq2]; 1625 1626 // System.out.println("===== " + mat[0][0]); 1627 // for ( i = 39 ; i < 42 ;i ++){ 1628 // //System.out.print(" " + i + " "); 1629 // for ( j = 155 ; j < 157 ; j++) { 1630 // System.out.print(String.format("[%s %s %.2f] ",i,j,mat[i][j])); 1631 // } 1632 // System.out.println(); 1633 // } 1634 // System.out.println("----"); 1635 // 1636 // for ( i = 69 ; i < 72 ;i ++){ 1637 // //System.out.println(" " + i + " "); 1638 // for ( j = 170 ; j < 175 ; j++) { 1639 // System.out.print(String.format("[%s %s %.2f] ",i,j,mat[i][j])); 1640 // } 1641 // System.out.println(); 1642 // } 1643 1644 //double sum = 0; 1645 if(!hasGapExtensionPenalty) 1646 { 1647 for(i=nSeq1-1; i>=0; i--) 1648 for(j=nSeq2-1; j>=0; j--) 1649 { 1650 double sum ; 1651 brk_flg[i][j]=false; 1652 if(j<nSeq2-1 && i<nSeq1-1) 1653 { 1654 sum=mat[i+1][j+1]; 1655 } 1656 else 1657 { 1658 sum=0.0; 1659 if((isGlobal1 && i!=nSeq1-1) || (isGlobal2 && j!=nSeq2-1)) 1660 sum=-gapI; 1661 } 1662 if(j+1<nSeq2) 1663 for(k=i+2; k<nSeq1; k++) 1664 { 1665 if(mat[k][j+1]-gapI>sum) 1666 sum=mat[k][j+1]-gapI; 1667 } 1668 if(i+1<nSeq1) 1669 for(k=j+2; k<nSeq2; k++) 1670 { 1671 if(mat[i+1][k]-gapI>sum) 1672 sum=mat[i+1][k]-gapI; 1673 } 1674 sum+=mat[i][j]; 1675 sum_brk=(isGlobal1?-gapI:0.0)+(isGlobal2?-gapI:0.0); 1676 if(sum<sum_brk) 1677 { 1678 sum=sum_brk; 1679 brk_flg[i][j]=true; 1680 //System.out.println("break at: " + i + " " + j); 1681 } 1682 mat[i][j]=sum; 1683 } 1684 } 1685 else 1686 { 1687 for(i=nSeq1-1; i>=0; i--) 1688 for(j=nSeq2-1; j>=0; j--) 1689 { 1690 double maxSum ; 1691 brk_flg[i][j]=false; 1692 if(j<nSeq2-1 && i<nSeq1-1) 1693 { 1694 // any row/column which is not the last 1695 maxSum=mat[i+1][j+1]; 1696 tracebackMatrix1[i][j] = i+1; 1697 tracebackMatrix2[i][j] = j+1; 1698 } 1699 else 1700 { 1701 // sets the last row and column 1702 maxSum=0.0; 1703 if(isGlobal1 && i!=nSeq1-1) maxSum=-gapI-gapE*(nSeq1-i-1); 1704 if(isGlobal2 && j!=nSeq2-1) maxSum=-gapI-gapE*(nSeq2-j-1); 1705 tracebackMatrix1[i][j] = -1; 1706 tracebackMatrix2[i][j] = -1; 1707 1708 } 1709 1710 // do only for rows/columns which are not the last: 1711 if(j+1<nSeq2) 1712 for(k=i+2; k<nSeq1; k++) { 1713 if(mat[k][j+1]-gapI-gapE*(k-i-1)>maxSum) { 1714 maxSum=mat[k][j+1]-gapI-gapE*(k-i-1); 1715 tracebackMatrix1[i][j] = k; 1716 tracebackMatrix2[i][j] = j+1; 1717 1718 } 1719 } 1720 if(i+1<nSeq1) 1721 for(k=j+2; k<nSeq2; k++) { 1722 if(mat[i+1][k]-gapI-gapE*(k-j-1)>maxSum) { 1723 maxSum=mat[i+1][k]-gapI-gapE*(k-j-1); 1724 tracebackMatrix1[i][j] = i+1; 1725 tracebackMatrix2[i][j] = k; 1726 1727 } 1728 } 1729 1730 maxSum+= mat[i][j]; 1731 1732 1733 sum_brk=(isGlobal1?(-gapI-gapE*(nSeq1-1-i)):0.0)+(isGlobal2?(-gapI-gapE*(nSeq2-1-j)):0.0); 1734 if(maxSum<sum_brk) 1735 { 1736 maxSum=sum_brk; 1737 brk_flg[i][j]=true; 1738 } 1739 mat[i][j]=maxSum; 1740 } 1741 } 1742 1743 1744 1745 1746 iStart=0; jStart=0; alignmentPositionOrLength=0; 1747 // no nc-end penalty - begin 1748 sum_ret=mat[0][0]; 1749 1750 // look for the highest score in mat[i][j] 1751 // TODO: move this up ?? 1752 1753 for(i=0; i<nSeq1; i++) 1754 for(j=0; j<nSeq2; j++) 1755 { 1756 if(i==0 && j==0) continue; 1757 double sum=mat[i][j]; 1758 if(isGlobal1) sum+=-gapI-gapE*i; 1759 if(isGlobal2) sum+=-gapI-gapE*j; 1760 if(sum>sum_ret) 1761 { 1762 sum_ret=sum; 1763 iStart=i; jStart=j; 1764 } 1765 } 1766 1767 1768 // ok we got the maximum score in sum_ret and the start position in iStart, jStart 1769 1770 //System.out.println("start at " + is + " " + js); 1771 //for(k=0; k<is; k++) align1[k]=-1; 1772 //for(k=0; k<js; k++) align2[k]=-1; 1773 // no nc-end penalty - end 1774 int prevGapEnd = -1; 1775 // 1776 for(i=iStart, j=jStart; i<nSeq1 && j<nSeq2; i++, j++) 1777 { 1778 iMax=i; jMax=j; 1779 double localMaxScore=mat[i][j]; 1780 if(!hasGapExtensionPenalty) 1781 { 1782 for(k=i+1; k<nSeq1; k++) 1783 if(mat[k][j]-gapI>localMaxScore) 1784 { 1785 iMax=k; jMax=j; 1786 localMaxScore=mat[k][j]-gapI; 1787 } 1788 1789 for(k=j+1; k<nSeq2; k++) 1790 if(mat[i][k]-gapI>localMaxScore) 1791 { 1792 iMax=i; jMax=k; 1793 localMaxScore=mat[i][k]-gapI; 1794 } 1795 } 1796 else 1797 { 1798 for(k=i+1; k<nSeq1; k++) { 1799 if(mat[k][j]-gapI-gapE*(k-i)>localMaxScore) 1800 { 1801 System.out.println(" gap1 " + alignmentPositionOrLength + " " + k + " " + j + " " + localMaxScore + "<" +(mat[k][j]-gapI-gapE*(k-i))); 1802 iMax=k; jMax=j; 1803 localMaxScore=mat[k][j]-gapI-gapE*(k-i); 1804 } 1805 } 1806 for(k=j+1; k<nSeq2; k++) { 1807 if(mat[i][k]-gapI-gapE*(k-j)>localMaxScore) 1808 { 1809 System.out.println(" gap2 " + alignmentPositionOrLength + " " + k + " " + i + " " + localMaxScore + "<"+ (mat[i][k]-gapI-gapE*(k-j))); 1810 iMax=i; jMax=k; 1811 localMaxScore=mat[i][k]-gapI-gapE*(k-j); 1812 } 1813 } 1814 } 1815 1816 //boolean doubleGap = false; 1817 boolean gapPosition = false; 1818 if ( i != iMax || j != jMax ) { 1819 int l1 = iMax - i; 1820 int l2 = jMax - j ; 1821 System.out.println(String.format("FOUND GAP AT: lcmp:%d l1: %d l2: %d | i:%d iMax: %d j:%d jMax:%d ",alignmentPositionOrLength, l1,l2, i, iMax , j, jMax)); 1822 if ( l1 > 0) { 1823 System.out.println(" -- G1 : " + alignmentPositionOrLength + " ->" + (alignmentPositionOrLength + l1) + " " ); 1824 gapPosition = true; 1825 } 1826 if ( l2 > 0) { 1827 System.out.println(" -- G2 : " + alignmentPositionOrLength + " ->" + (alignmentPositionOrLength + l2) + " "); 1828 gapPosition = true; 1829 } 1830 if ( prevGapEnd == alignmentPositionOrLength -1){ 1831 // double gap! 1832 System.out.println( " !! FOUND DOUBLE GAP AT: "+ alignmentPositionOrLength + " | "+ i+ " " + iMax + " " + j + " " + jMax + " " + String.format("%f", mat[i][j]) + " " + getTraceBack(tracebackMatrix1,tracebackMatrix2,i,j)); 1833 //doubleGap = true; 1834 1835 // if ( i != iMax){ 1836 // int pos = align_se1[ alignmentPositionOrLength -1]; 1837 // align_se1[ alignmentPositionOrLength -1] = -1; 1838 // align_se1[ alignmentPositionOrLength ] = pos; 1839 // align_se2[ alignmentPositionOrLength ] = -1; 1840 // } else { 1841 // int pos = align_se2[ alignmentPositionOrLength -1]; 1842 // align_se2[ alignmentPositionOrLength -1] = -1; 1843 // align_se2[ alignmentPositionOrLength ] = pos; 1844 // align_se1[ alignmentPositionOrLength ] = -1; 1845 // } 1846 // 1847 // alignmentPositionOrLength++; 1848 } 1849 } 1850 1851 1852 1853 //System.out.println(" iMax " + iMax + " jMax " + jMax); 1854 // set the gap positions: 1855 //lcmp:53 i:41 j:173 imax:70 jmax:173 1856 System.out.println(String.format(" lcmp:%d i:%d j:%d imax:%d jmax:%d score: %.2f",alignmentPositionOrLength,i,j, iMax, jMax, mat[iMax][jMax])); 1857 1858 1859 for(k=i; k<iMax; k++, i++) { 1860 align_se1[alignmentPositionOrLength]=k; 1861 align_se2[alignmentPositionOrLength]=-1; 1862 1863 1864 alignmentPositionOrLength++; 1865 } 1866 1867 for(k=j; k<jMax; k++, j++) { 1868 align_se1[alignmentPositionOrLength]=-1; 1869 align_se2[alignmentPositionOrLength]=k; 1870 1871 1872 alignmentPositionOrLength++; 1873 } 1874 1875 1876 align_se1[alignmentPositionOrLength]=iMax; 1877 align_se2[alignmentPositionOrLength]=jMax; 1878 1879 1880 1881 if ( gapPosition) 1882 prevGapEnd = alignmentPositionOrLength; 1883 alignmentPositionOrLength++; 1884 1885 if(brk_flg[i][j]) { 1886 //System.out.println("hit break flag at: " + i + " " + j + " sum " + sum_ret + " lcmp " + alignmentPositionOrLength); 1887 break; 1888 1889 } 1890 } 1891 1892 1893 return sum_ret; 1894 } 1895 1896 1897 1898 1899 private String getTraceBack(int[][] tracebackMatrix1, 1900 int[][] tracebackMatrix2, int i, int j) { 1901 1902 1903 if ( tracebackMatrix1[i][j] == -1 ) 1904 return ""; 1905 if ( tracebackMatrix2[i][j] == -1 ) 1906 return ""; 1907 1908 int t1 = tracebackMatrix1[i][j]; 1909 int t2 = tracebackMatrix2[i][j]; 1910 1911 1912 String s = "[ "+t1+","+t2+"] "; 1913 1914 return s + getTraceBack(tracebackMatrix1, tracebackMatrix2, t1, t2); 1915 1916 1917 } 1918 1919 private void rot_mol(Atom[] caA, Atom[] caB, int nse2, Matrix m , Atom shift) throws StructureException{ 1920 1921 1922 1923 for(int l=0; l<nse2; l++) { 1924 Atom a = caA[l]; 1925 Group g = (Group)a.getGroup().clone(); 1926 //Group g = (Group)a.getParent(); 1927 1928 Calc.rotate( g, m); 1929 Calc.shift( g, shift); 1930 caB[l] = g.getAtom(a.getName()); 1931 } 1932 } 1933 1934 //void rot_mol(XYZ *molA, XYZ *molB, int nAtom, double d_[20] ) { 1935 // double dx, dy, dz; 1936 // for(int l=0; l<nAtom; l++) { 1937 // if(molA[l].X<1e10) { 1938 // dx=molA[l].X; 1939 // dy=molA[l].Y; 1940 // dz=molA[l].Z; 1941 // molB[l].X=dx*d_[0]+dy*d_[1]+dz*d_[2]+d_[9]; 1942 // molB[l].Y=dx*d_[3]+dy*d_[4]+dz*d_[5]+d_[10]; 1943 // molB[l].Z=dx*d_[6]+dy*d_[7]+dz*d_[8]+d_[11]; 1944 // } 1945 // else { 1946 // molB[l]=molA[l]; 1947 // } 1948 // } 1949 // } 1950 // 1951 1952 1953 /** superimpose and get rmsd 1954 * 1955 * @param pro1 1956 * @param pro2 1957 * @param strLen 1958 * @param storeTransform 1959 * @param show 1960 * @return RMSD 1961 * @throws StructureException 1962 */ 1963 public double calc_rmsd(Atom[] pro1, Atom[] pro2, int strLen, boolean storeTransform, boolean show) throws StructureException { 1964 1965 Atom[] cod1 = getAtoms(pro1, strLen,false); 1966 Atom[] cod2 = getAtoms(pro2, strLen,true); 1967 1968 assert(cod1.length == cod2.length); 1969 SVDSuperimposer svd = new SVDSuperimposer(cod1, cod2); 1970 1971 Matrix matrix = svd.getRotation(); 1972 1973 if ( debug){ 1974 matrix.print(3,3); 1975 } 1976 1977 Atom shift = svd.getTranslation(); 1978 1979 if ( storeTransform) { 1980 r=matrix; 1981 t = shift; 1982 } 1983 for (Atom a : cod2){ 1984 1985 Calc.rotate(a.getGroup(), matrix); 1986 Calc.shift(a.getGroup(), shift); 1987 //Calc.rotate(a,r); 1988 //Calc.shift(a,t); 1989 } 1990 // if ( show){ 1991 // StructureAlignmentJmol jmol = new StructureAlignmentJmol(); 1992 // 1993 // jmol.setAtoms(cod1); 1994 // jmol.evalString("select * ; wireframe off; spacefill off; backbone on; color chain; select ligand;color cpk; wireframe 40;spacefill 120; "); 1995 // jmol.setTitle("calCaRmsd - pdb1"); 1996 // 1997 // StructureAlignmentJmol jmol2 = new StructureAlignmentJmol(); 1998 // jmol2.setAtoms(cod2); 1999 // jmol2.evalString("select * ; wireframe off; spacefill off; backbone on; color chain; select ligand;color cpk; wireframe 40;spacefill 120; "); 2000 // jmol2.setTitle("calCaRmsd - pdb2"); 2001 // } 2002 return SVDSuperimposer.getRMS(cod1, cod2); 2003 2004 } 2005 2006 /** 2007 * Copies the first length atoms from the input array 2008 * @param ca The array to copy 2009 * @param length the number of atoms to copy 2010 * @param clone If true, preform a deep copy, cloning the underlying Groups 2011 * @return An array with the first length items of ca, possibly cloning the Atoms. 2012 * @throws StructureException 2013 */ 2014 private Atom[] getAtoms(Atom[] ca, int length, boolean clone) throws StructureException{ 2015 2016 List<Atom> atoms = new ArrayList<Atom>(); 2017 for ( int i = 0 ; i < length ; i++){ 2018 2019 Atom a; 2020 if ( clone ){ 2021 Group g = (Group)ca[i].getGroup().clone(); 2022 a = g.getAtom(ca[i].getName()); 2023 } 2024 else { 2025 a = ca[i]; 2026 } 2027 atoms.add(a); 2028 } 2029 return atoms.toArray(new Atom[atoms.size()]); 2030 } 2031 2032 2033 2034 private void noBestTrace(){ 2035 2036 if(isPrint) { 2037 timeEnd = System.currentTimeMillis(); 2038 long time_q=(timeEnd-timeStart); 2039 2040 String msg = String.format("size=0 time=%d comb=%d\n", (int)(time_q), nTraces); 2041 System.out.println(msg); 2042 } 2043 } 2044 2045 2046 2047 private double zToP(double z) { 2048 int ind=(int)(z/0.1); 2049 if(ind<0) ind=0; 2050 if(ind>149) ind=149; 2051 return(tableZtoP[ind]); 2052 } 2053 /////////////////////////////////////////////////////////////////////////// 2054 private double pToZ(double p) { 2055 int ind=(int)(-Math.log10(p)*3.0); 2056 if(ind<0) ind=0; 2057 if(ind>149) ind=149; 2058 return(tablePtoZ[ind]); 2059 } 2060 /////////////////////////////////////////////////////////////////////////// 2061 private double zByZ(double z1, double z2) { 2062 double p1=zToP(z1); 2063 double p2=zToP(z2); 2064 return(pToZ(p1*p2)); 2065 } 2066 2067 protected double zStrAlign(int winSize, int nTrace, double score, int nGaps) { 2068 double z1=zScore(winSize, nTrace, score); 2069 double z2=zGaps(winSize, nTrace, nGaps); 2070 return(zByZ(z1, z2)); 2071 } 2072 2073 double zScore(int winSize, int nTrace, double score) { 2074 2075 if(winSize==8) { 2076 2077 if(nTrace<1) return(0.0); 2078 2079 double scoreAv_, scoreSd_; 2080 if(nTrace<21) { 2081 scoreAv_=scoreAv8[nTrace-1]; 2082 scoreSd_=scoreSd8[nTrace-1]; 2083 } 2084 else { 2085 scoreAv_=0.209874*nTrace+2.944714; 2086 scoreSd_=0.039487*nTrace+0.675735; 2087 } 2088 if(score>scoreAv_) return(0.0); 2089 return((scoreAv_-score)/scoreSd_); 2090 } 2091 2092 if(winSize==6) { 2093 2094 if(nTrace<1) return(0.0); 2095 2096 double scoreAv_, scoreSd_; 2097 if(nTrace<21) { 2098 scoreAv_=scoreAv6[nTrace-1]; 2099 scoreSd_=scoreSd6[nTrace-1]; 2100 } 2101 else { 2102 scoreAv_=0.198534*nTrace+2.636477; 2103 scoreSd_=0.040922*nTrace+0.715636; 2104 } 2105 if(score>scoreAv_) return(0.0); 2106 return((scoreAv_-score)/scoreSd_); 2107 } 2108 2109 return(0.0); 2110 2111 } 2112 /////////////////////////////////////////////////////////////////////////// 2113 double zGaps(int winSize, int nTrace, int nGaps) { 2114 2115 if(nTrace<2) return(0.0); 2116 double scoreAv_, scoreSd_; 2117 2118 if(winSize==8) { 2119 if(nTrace<21) { 2120 scoreAv_=gapsAv8[nTrace-1]; 2121 scoreSd_=gapsSd8[nTrace-1]; 2122 } 2123 else { 2124 scoreAv_=14.949173*nTrace-14.581193; 2125 scoreSd_=2.045067*nTrace+13.191095; 2126 } 2127 if(nGaps>scoreAv_) return(0.0); 2128 return((scoreAv_-nGaps)/scoreSd_); 2129 } 2130 2131 if(winSize==6) { 2132 if(nTrace<21) { 2133 scoreAv_=gapsAv6[nTrace-1]; 2134 scoreSd_=gapsSd6[nTrace-1]; 2135 } 2136 else { 2137 scoreAv_=13.574490*nTrace-13.977223; 2138 scoreSd_=1.719977*nTrace+19.615014; 2139 } 2140 if(nGaps>scoreAv_) return(0.0); 2141 return((scoreAv_-nGaps)/scoreSd_); 2142 } 2143 2144 return(0.0); 2145 } 2146 2147 private static final double scoreAv8[]={2.54, 2.51, 2.72, 3.01, 3.31, 3.61, 3.90, 4.19, 4.47, 4.74, 2148 4.99, 5.22, 5.46, 5.70, 5.94, 6.13, 6.36, 6.52, 6.68, 6.91}; 2149 private static final double scoreSd8[]={1.33, 0.88, 0.73, 0.71, 0.74, 0.80, 0.86, 0.92, 0.98, 1.04, 2150 1.08, 1.10, 1.15, 1.19, 1.23, 1.25, 1.32, 1.34, 1.36, 1.45}; 2151 private static final double gapsAv8[]={0.00, 11.50, 23.32, 35.95, 49.02, 62.44, 76.28, 90.26, 2152 104.86, 119.97, 134.86, 150.54, 164.86, 179.57, 194.39, 2153 209.38, 224.74, 238.96, 253.72, 270.79}; 2154 private static final double gapsSd8[]={0.00, 9.88, 14.34, 17.99, 21.10, 23.89, 26.55, 29.00, 31.11, 2155 33.10, 35.02, 36.03, 37.19, 38.82, 41.04, 43.35, 45.45, 2156 48.41, 50.87, 52.27}; 2157 private static final double scoreAv6[]={1.98, 1.97, 2.22, 2.54, 2.87, 3.18, 3.48, 3.77, 4.05, 4.31, 2158 4.57, 4.82, 5.03, 5.24, 5.43, 5.64, 5.82, 6.02, 6.21, 6.42}; 2159 private static final double scoreSd6[]={1.15, 0.73, 0.63, 0.64, 0.71, 0.80, 0.87, 0.95, 1.01, 1.07, 2160 1.13, 1.19, 1.22, 1.25, 1.28, 1.32, 1.35, 1.39, 1.45, 1.50}; 2161 private static final double gapsAv6[]={0.00, 10.12, 20.25, 31.29, 42.95, 55.20, 67.53, 80.15, 2162 93.30, 106.47, 120.52, 134.38, 148.59, 162.58, 176.64, 2163 191.23, 204.12, 218.64, 231.82, 243.43}; 2164 private static final double gapsSd6[]={0.00, 9.80, 14.44, 18.14, 21.35, 24.37, 27.00, 29.68, 32.22, 2165 34.37, 36.65, 38.63, 40.31, 42.16, 43.78, 44.98, 47.08, 2166 49.09, 50.78, 52.15}; 2167 2168 2169 private static final double tableZtoP[]={ 2170 1.0, 9.20e-01,8.41e-01,7.64e-01,6.89e-01,6.17e-01,5.49e-01,4.84e-01,4.24e-01,3.68e-01, 2171 3.17e-01,2.71e-01,2.30e-01,1.94e-01,1.62e-01,1.34e-01,1.10e-01,8.91e-02,7.19e-02,5.74e-02, 2172 4.55e-02,3.57e-02,2.78e-02,2.14e-02,1.64e-02,1.24e-02,9.32e-03,6.93e-03,5.11e-03,3.73e-03, 2173 2.70e-03,1.94e-03,1.37e-03,9.67e-04,6.74e-04,4.65e-04,3.18e-04,2.16e-04,1.45e-04,9.62e-05, 2174 6.33e-05,4.13e-05,2.67e-05,1.71e-05,1.08e-05,6.80e-06,4.22e-06,2.60e-06,1.59e-06,9.58e-07, 2175 5.73e-07,3.40e-07,1.99e-07,1.16e-07,6.66e-08,3.80e-08,2.14e-08,1.20e-08,6.63e-09,3.64e-09, 2176 1.97e-09,1.06e-09,5.65e-10,2.98e-10,1.55e-10,8.03e-11,4.11e-11,2.08e-11,1.05e-11,5.20e-12, 2177 2.56e-12,1.25e-12,6.02e-13,2.88e-13,1.36e-13,6.38e-14,2.96e-14,1.36e-14,6.19e-15,2.79e-15, 2178 1.24e-15,5.50e-16,2.40e-16,1.04e-16,4.46e-17,1.90e-17,7.97e-18,3.32e-18,1.37e-18,5.58e-19, 2179 2.26e-19,9.03e-20,3.58e-20,1.40e-20,5.46e-21,2.10e-21,7.99e-22,3.02e-22,1.13e-22,4.16e-23, 2180 1.52e-23,5.52e-24,1.98e-24,7.05e-25,2.48e-25,8.64e-26,2.98e-26,1.02e-26,3.44e-27,1.15e-27, 2181 3.82e-28,1.25e-28,4.08e-29,1.31e-29,4.18e-30,1.32e-30,4.12e-31,1.27e-31,3.90e-32,1.18e-32, 2182 3.55e-33,1.06e-33,3.11e-34,9.06e-35,2.61e-35,7.47e-36,2.11e-36,5.91e-37,1.64e-37,4.50e-38, 2183 1.22e-38,3.29e-39,8.77e-40,2.31e-40,6.05e-41,1.56e-41,4.00e-42,1.02e-42,2.55e-43,6.33e-44, 2184 1.56e-44,3.80e-45,9.16e-46,2.19e-46,5.17e-47,1.21e-47,2.81e-48,6.45e-49,1.46e-49,3.30e-50}; 2185 private static final double tablePtoZ[]={ 2186 0.00,0.73,1.24,1.64,1.99,2.30,2.58,2.83,3.07,3.29, 2187 3.50,3.70,3.89,4.07,4.25,4.42,4.58,4.74,4.89,5.04, 2188 5.19,5.33,5.46,5.60,5.73,5.86,5.99,6.11,6.23,6.35, 2189 6.47,6.58,6.70,6.81,6.92,7.02,7.13,7.24,7.34,7.44, 2190 7.54,7.64,7.74,7.84,7.93,8.03,8.12,8.21,8.30,8.40, 2191 8.49,8.57,8.66,8.75,8.84,8.92,9.01,9.09,9.17,9.25, 2192 9.34,9.42,9.50,9.58,9.66,9.73,9.81,9.89,9.97,10.04, 2193 10.12,10.19,10.27,10.34,10.41,10.49,10.56,10.63,10.70,10.77, 2194 10.84,10.91,10.98,11.05,11.12,11.19,11.26,11.32,11.39,11.46, 2195 11.52,11.59,11.66,11.72,11.79,11.85,11.91,11.98,12.04,12.10, 2196 12.17,12.23,12.29,12.35,12.42,12.48,12.54,12.60,12.66,12.72, 2197 12.78,12.84,12.90,12.96,13.02,13.07,13.13,13.19,13.25,13.31, 2198 13.36,13.42,13.48,13.53,13.59,13.65,13.70,13.76,13.81,13.87, 2199 13.92,13.98,14.03,14.09,14.14,14.19,14.25,14.30,14.35,14.41, 2200 14.46,14.51,14.57,14.62,14.67,14.72,14.77,14.83,14.88,14.93}; 2201 2202 /** copy data from this class into AFPChain container object. 2203 * 2204 * @param afpChain 2205 * @param ca1 2206 * @param ca2 2207 */ 2208 public void convertAfpChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2) { 2209 2210 afpChain.setBlockNum(1); 2211 //afpChain.setAlignScore(z); 2212 Matrix[] m ; 2213 2214 if ( r != null ) { 2215 m = new Matrix[1]; 2216 m[0] = r; 2217 } else { 2218 m = new Matrix[0]; 2219 } 2220 2221 Atom[] as ; 2222 if ( t != null) { 2223 as = new Atom[1]; 2224 as[0] = t; 2225 } else { 2226 as = new Atom[0]; 2227 } 2228 2229 afpChain.setBlockRotationMatrix(m); 2230 afpChain.setBlockShiftVector(as); 2231 2232 int nse1 = ca1.length; 2233 int nse2 = ca2.length; 2234 //System.out.println("dist1 :" + dist1.length + " " + dist2.length); 2235 2236 if ( nse1 > 0 && dist1.length > 0 ) 2237 afpChain.setDisTable1(new Matrix(dist1)); 2238 else 2239 afpChain.setDisTable1 (Matrix.identity(3, 3)); 2240 if ( nse2 > 0 && dist2.length > 0 ) 2241 afpChain.setDisTable2(new Matrix(dist2)); 2242 else 2243 afpChain.setDisTable2(Matrix.identity(3, 3)); 2244 2245 2246 char[] alnseq1 = new char[nse1+nse2+1]; 2247 char[] alnseq2 = new char[nse1+nse2+1] ; 2248 char[] alnsymb = new char[nse1+nse2+1]; 2249 2250 int[][][] optAln = new int[1][2][nAtom]; 2251 afpChain.setOptAln(optAln); 2252 2253 int pos = 0; 2254 int nrIdent = 0; 2255 int nrSim = 0; 2256 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 2257 2258 // no gap 2259 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 2260 //System.out.println("ia " + ia + " pos " + pos + " " + align_se1[ia] + " " + align_se2[ia]); 2261 optAln[0][0][pos] = align_se1[ia]; 2262 optAln[0][1][pos] = align_se2[ia]; 2263 2264 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2265 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2266 2267 alnseq1[ia] = Character.toUpperCase(l1); 2268 alnseq2[ia] = Character.toUpperCase(l2); 2269 alnsymb[ia] = ' '; 2270 if ( l1 == l2) { 2271 nrIdent++; 2272 nrSim++; 2273 alnsymb[ia] = '|'; 2274 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 2275 nrSim++; 2276 alnsymb[ia] = ':'; 2277 } 2278 2279 pos++; 2280 2281 } else { 2282 // there is a gap at this position 2283 alnsymb[ia] = ' '; 2284 if (align_se1[ia] == -1 ) { 2285 alnseq1[ia] = '-'; 2286 } else { 2287 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2288 alnseq1[ia] = Character.toUpperCase(l1); 2289 } 2290 if ( align_se2[ia] == -1 ) { 2291 alnseq2[ia] = '-'; 2292 } else { 2293 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2294 alnseq2[ia] = Character.toUpperCase(l2); 2295 } 2296 2297 } 2298 } 2299 2300 2301 afpChain.setAlnseq1(alnseq1); 2302 afpChain.setAlnseq2(alnseq2); 2303 afpChain.setAlnsymb(alnsymb); 2304 2305 2306 // CE uses the aligned pairs as reference not the whole alignment including gaps... 2307 if ( pos > 0) { 2308 afpChain.setIdentity(nrIdent*1.0/pos); 2309 afpChain.setSimilarity(nrSim*1.0/pos); 2310 } else { 2311 afpChain.setIdentity(0); 2312 afpChain.setSimilarity(0); 2313 } 2314 2315 //AFPAlignmentDisplay.getAlign( afpChain,ca1,ca2); 2316 2317 } 2318 2319 public int getnAtom() { 2320 return nAtom; 2321 } 2322 2323 2324 2325 public void setnAtom(int nAtom) { 2326 this.nAtom = nAtom; 2327 } 2328 2329 2330 2331 public int getLcmp() { 2332 return alignmentPositionOrLength; 2333 } 2334 2335 2336 2337 public void setLcmp(int lcmp) { 2338 this.alignmentPositionOrLength = lcmp; 2339 } 2340 2341 2342 2343 public int[] getAlign_se1() { 2344 return align_se1; 2345 } 2346 2347 2348 2349 public void setAlign_se1(int[] alignSe1) { 2350 align_se1 = alignSe1; 2351 } 2352 2353 2354 2355 public int[] getAlign_se2() { 2356 return align_se2; 2357 } 2358 2359 2360 2361 public void setAlign_se2(int[] alignSe2) { 2362 align_se2 = alignSe2; 2363 } 2364 2365 /** 2366 * Caution: this matrix is overwriten with very different data at several 2367 * points in the alignment algorithm. After 2368 * {@link #initSumOfDistances(int, int, int, int, Atom[], Atom[]) initSumOfDistances} 2369 * is run, this will hold the distance matrix between AFPs. 2370 * @return mat 2371 */ 2372 public double[][] getMatMatrix() { 2373 return mat; 2374 } 2375 2376 public void setMatMatrix(double[][] matrix){ 2377 mat = matrix; 2378 } 2379 2380 /** 2381 * Gets the rotation matrix from the last call to 2382 * {@link #calc_rmsd(Atom[], Atom[], int, boolean, boolean) calc_rmsd}. 2383 * @return The rotatiokn matrix 2384 */ 2385 public Matrix getRotationMatrix() { 2386 return r; 2387 } 2388 2389 /** 2390 * Gets the shift from the last call to 2391 * {@link #calc_rmsd(Atom[], Atom[], int, boolean, boolean) calc_rmsd}. 2392 * @return The shift 2393 */ 2394 public Atom getShift() { 2395 return t; 2396 } 2397 2398 public double[][] getDist1() { 2399 return dist1; 2400 } 2401 2402 public void setDist1(double[][] dist1) { 2403 this.dist1 = dist1; 2404 } 2405 2406 public double[][] getDist2() { 2407 return dist2; 2408 } 2409 2410 public void setDist2(double[][] dist2) { 2411 this.dist2 = dist2; 2412 } 2413 2414 2415}