001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Sep 25, 2009 021 * Author: Andreas Prlic 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027import org.biojava.nbio.core.alignment.matrices.ScaledSubstitutionMatrix; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.structure.*; 030import org.biojava.nbio.structure.align.model.AFP; 031import org.biojava.nbio.structure.align.model.AFPChain; 032import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 033import org.biojava.nbio.structure.geometry.Matrices; 034import org.biojava.nbio.structure.geometry.SuperPositions; 035import org.biojava.nbio.structure.jama.Matrix; 036import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 037import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 038 039import java.util.ArrayList; 040import java.util.List; 041 042import javax.vecmath.Matrix4d; 043 044 045 046/** This is based on the original Combinatorial Extension (CE) source code from 2003 or 2004 (CE version 2.3), 047 * as has been originally developed by I. Shindyalov and P.Bourne (1998). 048 * The original CE paper is available from here: <a href="http://peds.oxfordjournals.org/cgi/content/short/11/9/739">http://peds.oxfordjournals.org/cgi/content/short/11/9/739</a>. 049 * 050 * This class is a pretty much exact 1:1 port from C, where I cared about exact reproduce of the CE results 051 * and not about Java style. 052 * 053 * @author Spencer Bliven 054 * @author Andreas Prlic 055 * 056 057 * 058 */ 059public class CeCalculatorEnhanced { 060 061 protected static final boolean isPrint = true; 062 private static final boolean showAlignmentSteps = true; 063 private static final boolean debug = true; 064 065 int[] f1; 066 int[] f2; 067 double[][]dist1; 068 double[][]dist2; 069 protected double[][]mat; 070 protected int[] bestTrace1; 071 protected int[] bestTrace2; 072 protected int[][] bestTraces1; 073 protected int[][] bestTraces2; 074 protected int nBestTrace; 075 protected int nBestTraces; 076 double[] d_ = new double[20]; 077 protected int[] bestTracesN; 078 protected double bestTraceScore; 079 protected int nTrace; 080 protected double[] bestTracesScores; 081 protected int[] trace1; 082 protected int[] trace2; 083 084 protected static final double zThr=-0.1; 085 086 long timeStart ; 087 long timeEnd; 088 private int nAtom; 089 090 // the equivalent positions in the alignment... 091 private int[] align_se1; 092 private int[] align_se2; 093 094 095 private int alignmentPositionOrLength; 096 private int[] bestTraceLen; 097 private Matrix r; 098 private Atom t; 099 protected int nTraces; 100 101 private double z; 102 private int[] traceIndexContainer; 103 104 protected CeParameters params; 105 // SHOULD these fields be PARAMETERS? 106 107 protected static final int nIter = 1; 108 private static final boolean distAll = false; 109 110 List<MatrixListener> matrixListeners; 111 112 public static final boolean GLOBAL_ALIGN1 = false; 113 public static final boolean GLOBAL_ALIGN2 = false; 114 115 public CeCalculatorEnhanced(CeParameters params){ 116 timeStart = System.currentTimeMillis(); 117 dist1= new double[0][0]; 118 dist2= new double[0][0]; 119 this.params = params; 120 matrixListeners = new ArrayList<MatrixListener>(); 121 122 } 123 124 /** 125 * 126 * @param afpChain A new AFPChain, which will be filled in by this function 127 * @param ca1 128 * @param ca2 129 * @return afpChain 130 * @throws StructureException 131 */ 132 public AFPChain extractFragments(AFPChain afpChain, 133 Atom[] ca1, Atom[] ca2) throws StructureException{ 134 135 int nse1 = ca1.length; 136 int nse2 = ca2.length; 137 138 afpChain.setCa1Length(nse1); 139 afpChain.setCa2Length(nse2); 140 141 int traceMaxSize=nse1<nse2?nse1:nse2; 142 143 f1 = new int[nse1]; 144 f2 = new int[nse2]; 145 146 dist1 = initIntraDistmatrix(ca1, nse1); 147 dist2 = initIntraDistmatrix(ca2, nse2); 148 149 150 if ( debug ) 151 System.out.println("parameters: " + params); 152 153 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 154 if ( params.getSeqWeight() < 1) 155 params.setSeqWeight(2); 156 } 157 158 int winSize = params.getWinSize(); 159 160 int winSizeComb1 = (winSize-1)*(winSize-2)/2; 161 162 traceIndexContainer = new int[traceMaxSize]; 163 164 // CE: unused code. distAll is always false and both loops do the same??? 165 // CE v2.3 calls this Weight factors for trace extension 166 if(distAll ) { 167 for(int i=0; i<traceMaxSize; i++) 168 traceIndexContainer[i]=(i+1)*i*winSize*winSize/2+(i+1)*winSizeComb1; 169 } else { 170 for(int i=0; i<traceMaxSize; i++) { 171 traceIndexContainer[i]=(i+1)*i*winSize/2+(i+1)*winSizeComb1; 172 173 174 } 175 } 176 177 // verified: a[] is set correctly. 178 179 mat = initSumOfDistances(nse1, nse2, winSize, winSizeComb1, ca1, ca2); 180 181 182 183 // try { 184 // Matrix m2 = new Matrix(mat).copy(); 185 // JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 186 // JFrame frame = new JFrame(); 187 // frame.addWindowListener(new WindowAdapter(){ 188 // public void windowClosing(WindowEvent e){ 189 // JFrame f = (JFrame) e.getSource(); 190 // f.setVisible(false); 191 // f.dispose(); 192 // } 193 // }); 194 // 195 // 196 // frame.getContentPane().add(panel); 197 // 198 // frame.pack(); 199 // frame.setVisible(true); 200 // } catch (Exception e) { 201 // e.printStackTrace(); 202 // } 203 204 205 // Set the distance matrix 206 //afpChain.setDistanceMatrix(new Matrix(mat.clone())); 207 208 209 // 210 // double rmsdThr = params.getRmsdThr(); 211 // StringBuffer buf = new StringBuffer(" "); 212 // for(int i=0; i<nse2; i++) 213 // buf.append(String.format("%c", i%10==0?(i%100)/10+48:32)); 214 // buf.append("\n"); 215 // for(int i=0; i<nse1; i++) { 216 // buf.append(String.format("%c ", i%10==0?(i%100)/10+48:32)); 217 // for(int j=0; j<nse2; j++) 218 // buf.append(String.format("%c", (mat[i][j])<rmsdThr?'+':'X')); 219 // //printf("%c", ((int)*(mat[i]+j)/40)>9?'*':((int)*(mat[i]+j)/40)+48); 220 // buf.append("\n"); 221 // } 222 // buf.append("\n"); 223 // 224 // System.out.println(buf.toString()); 225 // 226 227 228 return afpChain; 229 } 230 231 /** 232 * Evaluates the distance between two atoms 233 * Several scoring functions are implemented and can be changed by calling 234 * {@link CeParameters#setScoringStrategy(Integer) setScoringStrategy()} 235 * on {@link CeParameters parameter} object this CECalculator was created with. 236 * <p> 237 * Scoring Strategies:<dl> 238 * <dt>DEFAULT_SCORING_STRATEGY</dt> 239 * <dd>Strategy of the original CE publication; CA-CA distance</dd> 240 * 241 * <dt>SIDE_CHAIN_SCORING</dt> 242 * <dd>CB-CB distance. This performs better for sheets and helices than CA.</dd> 243 * 244 * <dt>SIDE_CHAIN_ANGLE_SCORING</dt> 245 * <dd>Use the dot product (eg the cosine) of the two CA-CB vectors.</dd> 246 * 247 * <dt>CA_AND_SIDE_CHAIN_ANGLE_SCORING</dt> 248 * <dd>Equivalent to DEFAULT_SCORING_STRATEGY + SIDE_CHAIN_ANGLE_SCORING</dd> 249 * </dl> 250 * 251 * <dt>SEQUENCE_CONSERVATION</dt> 252 * <dd>A mix between the DEFAULT_SCORING_STRATEGY and a scoring function that favors the alignment of sequence conserved positions in the alignment</dd> 253 * </dl> 254 * 255 * 256 * 257 * @param ca1 The CA of the first residue 258 * @param ca2 The CA of the second residue 259 * @return The distance between the two fragments, according to the selected 260 * scoring strategy. Lower distances are better alignments. 261 * @throws StructureException 262 */ 263 private double getDistanceWithSidechain(Atom ca1, Atom ca2) throws StructureException { 264 265 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_SCORING) { 266 267 return Calc.getDistance(ca1,ca2); 268 269 } 270 271 double dist; 272 Group g1 = ca1.getGroup(); 273 Atom cb1 = null; 274 if ( g1.hasAtom(StructureTools.CB_ATOM_NAME)) { 275 cb1 = g1.getAtom(StructureTools.CB_ATOM_NAME); 276 } 277 // 278 Group g2 = ca2.getGroup(); 279 Atom cb2 = null; 280 if ( g2.hasAtom(StructureTools.CB_ATOM_NAME)) { 281 cb2 = g2.getAtom(StructureTools.CB_ATOM_NAME); 282 } 283 284 285 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_SCORING) { 286 287 288 // here we are using side chain orientation for scoring... 289 290 291 // score type 1 consider side chain distances 292 if ( cb1 != null && cb2 != null) { 293 // CB distance 294 dist = Calc.getDistance(cb1,cb2); 295 //dist = dist / 2.; 296 } else { 297 dist = Calc.getDistance(ca1,ca2); 298 } 299 300 return dist; 301 } 302 303 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_ANGLE_SCORING){ 304 305 // score type 2 add angle info 306 307 308 if ( cb1 != null && cb2 != null) { 309 // If the CA were overlaid, what is the distance between the CB? 310 // Recall c^2 = a^2 + b^2 -2ab*cos(theta), so this is a function of angle 311 Atom c1 = Calc.subtract(cb1, ca1); 312 Atom c2 = Calc.subtract(cb2, ca2); 313 Atom newA = Calc.subtract(c2, c1); 314 dist = Calc.amount(newA); 315 } else { 316 //dist += Calc.getDistance(ca1,ca2); 317 dist = 0; 318 } 319 320 return dist; 321 322 } 323 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_AND_SIDE_CHAIN_ANGLE_SCORING){ 324 325 // score type 3 326 // CA distance + cos(angle) 327 dist = 0; 328 if ( cb1 != null && cb2 != null) { 329 Atom cacb1 = Calc.subtract(cb1, ca1); 330 Atom cacb2 = Calc.subtract(cb2, ca2); 331 Atom newA = Calc.subtract(cacb2, cacb1); 332 //System.out.format("CACB 1: %s\nCACB 2: %s\ndiff: %s\nd: %f\n",cacb1.toString(),cacb2.toString(),newA.toString(),Calc.amount(newA)); 333 dist += Calc.amount(newA); 334 } 335 dist += Calc.getDistance(ca1,ca2); 336 337 return dist; 338 339 } else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 340 if ( cb1 != null && cb2 != null) { 341 // CB distance 342 dist = Calc.getDistance(cb1,cb2); 343 //dist = dist / 2.; 344 } else { 345 dist = Calc.getDistance(ca1,ca2); 346 } 347 return dist; 348 349 350 } 351 else { 352 // unsupported scoring scheme 353 return Calc.getDistance(ca1,ca2); 354 } 355 } 356 357 /** build up intramolecular distance matrix dist1 & dist2 358 * 359 * @param ca 360 * @param nse 361 * @return 362 * @throws StructureException 363 */ 364 private double[][] initIntraDistmatrix(Atom[] ca, int nse) throws StructureException 365 { 366 367 368 double[][] intraDist = new double[nse][nse]; 369 370 // 371 for(int ise1=0; ise1<nse; ise1++) { 372 373 for(int ise2=0; ise2<nse; ise2++) { 374 intraDist[ise1][ise2] = getDistanceWithSidechain(ca[ise1], ca[ise2]); 375 376 } 377 } 378 return intraDist; 379 } 380 381 382 public double[][] initSumOfDistances(int nse1, int nse2, int winSize, int winSizeComb1, Atom[] ca1, Atom[] ca2) { 383 384 double d; 385 386 double[][] mat = new double[nse1][nse2]; 387 388 // init the initial mat[] array. 389 // at this stage mat contains the sum of the distances of fragments of the matrices dist1, dist 390 for(int ise1=0; ise1<nse1; ise1++) { 391 for(int ise2=0; ise2<nse2; ise2++) { 392 393 mat[ise1][ise2]=-1.0; 394 395 if(ise1>nse1-winSize || ise2>nse2-winSize) continue; 396 397 d=0.0; 398 // this sums up over the distances of the fragments 399 for(int is1=0; is1<winSize-2; is1++) 400 for(int is2=is1+2; is2<winSize; is2++) { 401 //System.out.println("pos1 :" + (ise1+is1) + " " + (ise1+is2) + " " + (ise2+is1) + " " + (ise2+is2)); 402 // is this abs or floor? check! 403 d+=Math.abs(dist1[ise1+is1][ise1+is2]-dist2[ise2+is1][ise2+is2]); 404 } 405 mat[ise1][ise2]=d/winSizeComb1; 406 407 //System.out.println("mat ["+ise1+"]["+ise2+"]="+mat[ise1][ise2]); 408 } 409 410 } 411 412 // verified: mat[][] probably ok. 413 414 return mat; 415 } 416 417 418 419 420 421 422 @SuppressWarnings("unused") 423 public void traceFragmentMatrix( AFPChain afpChain, 424 Atom[] ca1, Atom[] ca2) { 425 426 double rmsdThr = params.getRmsdThr(); 427 428 429 double oldBestTraceScore=10000.0; 430 bestTraceScore = 100.0; 431 nBestTrace=0; 432 int nBestTrace0 = 0; 433 int winSize = params.getWinSize(); 434 int winSizeComb1=(winSize-1)*(winSize-2)/2; 435 boolean distAll = false; 436 437 int winSizeComb2=distAll?winSize*winSize:winSize; 438 double rmsdThrJoin = params.getRmsdThrJoin(); 439 440 double z0; 441 442 //double bestTraceZScore=-1.0; 443 444 int nse1 = ca1.length; 445 int nse2 = ca2.length; 446 447 //System.out.println("nse1 :" +nse1 + " nse2: " + nse2); 448 449 int traceMaxSize=nse1<nse2?nse1:nse2; 450 451 bestTrace1 = new int [traceMaxSize]; 452 bestTrace2 = new int [traceMaxSize]; 453 trace1 = new int [traceMaxSize]; 454 trace2 = new int [traceMaxSize]; 455 456 int[] traceIndex = new int [traceMaxSize]; 457 int[] traceIterLevel = new int [traceMaxSize]; 458 459 int ise11; 460 int ise12; 461 int ise21; 462 int ise22; 463 464 int ise1; 465 int ise2; 466 467 int gapMax=params.getMaxGapSize(); 468 469 int iterDepth; 470 if ( gapMax > 0){ 471 iterDepth =gapMax*2+1; 472 } else { 473 iterDepth = traceMaxSize; 474 } 475 double[][] traceScore = new double[traceMaxSize][iterDepth]; 476 477 nTraces =0; 478 long tracesLimit=(long)5e7; 479 double score =-1; 480 double score0 = -1; 481 double score1 = -1 ; 482 double score2 = -1; 483 484 int mse1; 485 int mse2; 486 int jgap; 487 int jdir; 488 int jse1=0; 489 int jse2=0; 490 491 int bestTracesMax=30; 492 bestTraces1 = new int[bestTracesMax][traceMaxSize]; 493 bestTraces2 = new int[bestTracesMax][ traceMaxSize]; 494 bestTracesN=new int [bestTracesMax]; 495 bestTracesScores = new double [bestTracesMax]; 496 for(int it=0; it<bestTracesMax; it++) { 497 bestTracesN[it]=0; 498 bestTracesScores[it]=100; 499 } 500 501 nBestTraces=0; 502 int newBestTrace=0; 503 504 double traceTotalScore=0; 505 double traceScoreMax =0; 506 double userRMSDMax = params.getMaxOptRMSD(); 507 int kse1; 508 int kse2; 509 510 iterLoop: 511 for(int iter=0; iter<nIter; iter++) { 512 513 if(iter>2) { 514 if(oldBestTraceScore<=bestTraceScore) break; 515 } 516 oldBestTraceScore=bestTraceScore; 517 518 if(iter==1) { 519 z0=zStrAlign(winSize, nBestTrace, bestTraceScore, 520 bestTrace1[nBestTrace]+winSize-bestTrace1[0]+ 521 bestTrace2[nBestTrace]+winSize-bestTrace2[0]- 522 nBestTrace*2*winSize); 523 if(z0<zThr) break; 524 nBestTrace0=nBestTrace; 525 nBestTrace=0; 526 bestTraceScore=100.0; 527 528 nTraces=0; 529 } 530 531 532 if(iter==0) { 533 ise11=0; ise12=nse1; 534 ise21=0; ise22=nse2; 535 536 } 537 else { 538 if(iter==1) { 539 ise11=bestTrace1[0]; ise12=bestTrace1[0]+1; 540 ise21=bestTrace2[0]; ise22=bestTrace2[0]+1; 541 } 542 else { 543 ise11=bestTrace1[0]-1; ise12=bestTrace1[0]+2; 544 ise21=bestTrace2[0]-1; ise22=bestTrace2[0]+2; 545 } 546 if(ise11<0) ise11=0; 547 if(ise12>nse1) ise12=nse1; 548 if(ise21<0) ise21=0; 549 if(ise22>nse2) ise22=nse2; 550 } 551 552 //System.out.println("ise1Loop: " + ise11 + " " + ise12 + " " + ise21 + " " + ise22); 553 ise1Loop: 554 for(int ise1_=ise11; ise1_<ise12; ise1_++) { 555 ise2Loop: 556 for(int ise2_=ise21; ise2_<ise22; ise2_++) { 557 558 ise1=ise1_; 559 ise2=ise2_; 560 if(iter>1 && ise1==ise11+1 && ise2==ise21+1) continue ise2Loop; 561 562 //if(ise2==ise21) System.out.println(String.format("(%d, %d)",ise1, nTraces)); 563 564 565 if(iter==0 && (ise1>nse1-winSize*(nBestTrace-1) || 566 ise2>nse2-winSize*(nBestTrace-1))) continue ise2Loop; 567 568 if(mat[ise1][ise2]<0.0) continue ise2Loop; 569 if(mat[ise1][ise2]>rmsdThr) continue ise2Loop; 570 if (mat[ise1][ise2]>userRMSDMax) continue ise2Loop; 571 nTrace=0; 572 trace1[nTrace]=ise1; 573 trace2[nTrace]=ise2; 574 traceIndex[nTrace]=0; 575 traceIterLevel[nTrace]=0; 576 577 score0=mat[ise1][ise2]; 578 579 580 nTrace++; 581 boolean isTraceUp=true; 582 int traceIndex_=0; 583 584 traceLoop: 585 while(nTrace>0) { 586 587 kse1=trace1[nTrace-1]+winSize; 588 kse2=trace2[nTrace-1]+winSize; 589 590 //System.out.println("isTraceUp " + isTraceUp + " " + nTrace + " " + kse1 + " " + kse2); 591 592 while(true) { 593 if(kse1>nse1-winSize-1) break; 594 if(kse2>nse2-winSize-1) break; 595 if(mat[kse1][kse2]>=0.0) break; 596 kse1++; 597 kse2++; 598 } 599 600 601 traceIndex_=-1; 602 603 if(isTraceUp) { 604 605 int nBestExtTrace=nTrace; 606 double bestExtScore=100.0; 607 608 609 // extension of the alignment path 610 // condition 4, 5 611 itLoop: 612 for(int it=0; it<iterDepth; it++) { 613 614 jgap=(it+1)/2; 615 jdir=(it+1)%2; 616 617 if(jdir==0) { 618 mse1=kse1+jgap; 619 mse2=kse2; 620 } 621 else { 622 mse1=kse1; 623 mse2=kse2+jgap; 624 } 625 626 if(mse1>nse1-winSize-1) continue itLoop; 627 if(mse2>nse2-winSize-1) continue itLoop; 628 629 if(mat[mse1][mse2]<0.0) continue itLoop; 630 if(mat[mse1][mse2]>rmsdThr) continue itLoop; 631 if(mat[mse1][mse2]>userRMSDMax) continue itLoop; 632 633 nTraces++; 634 if(nTraces>tracesLimit) { 635 636 return; 637 } 638 639 score=0.0; 640 641 // if(!distAll) { 642 //System.out.println("getting score " + mse1 + " " + mse2 + " " + winSize + " " + jgap + " " + jdir + " " + it + " " + kse1 + " " + kse2); 643 score = getScoreFromDistanceMatrices(mse1,mse2,winSize); 644 //System.out.println("got score: " + score); 645 score1=score/(nTrace*winSize); 646 647 // } else { 648 // // all dist 649 // for(int itrace=0; itrace<nTrace; itrace++) { 650 // for(int is1=0; is1<winSize; is1++) 651 // for(int is2=0; is2<winSize; is2++) 652 // score+=Math.abs(dist1[trace1[itrace]+is1][mse1+is2]- 653 // dist2[trace2[itrace]+is1][mse2+is2]); 654 // } 655 // score1=score/(nTrace*winSize*winSize); 656 // } 657 658 659 //System.out.println("up: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + winSize + " " + traceIndex_ + " " + it + " "); 660 if(score1>rmsdThrJoin) 661 continue itLoop; 662 if(score1>userRMSDMax) 663 continue itLoop; 664 score2=score1; 665 666 // this just got checked, no need to check again.. 667 //if(score2>rmsdThrJoin) 668 // continue itLoop; 669 670 if(nTrace>nBestExtTrace || (nTrace==nBestExtTrace && 671 score2<bestExtScore)) { 672 //System.out.println("setting traceindex to " + it + " " + score2); 673 bestExtScore=score2; 674 nBestExtTrace=nTrace; 675 traceIndex_=it; 676 traceScore[nTrace-1][traceIndex_]=score1; 677 } 678 679 } 680 } 681 682 if(traceIndex_!=-1) { 683 jgap=(traceIndex_+1)/2; 684 jdir=(traceIndex_+1)%2; 685 if(jdir==0) { 686 jse1=kse1+jgap; 687 jse2=kse2; 688 } 689 else { 690 jse1=kse1; 691 jse2=kse2+jgap; 692 } 693 694 if(iter==0){ 695 696 score1=(traceScore[nTrace-1][traceIndex_]*winSizeComb2*nTrace+ 697 mat[jse1][jse2]*winSizeComb1)/(winSizeComb2*nTrace+ 698 winSizeComb1); 699 700 score2 = getScore2(jse1, jse2, traceScore, traceIndex_, traceIndex, winSizeComb1, winSizeComb2, score0, score1); 701 702 if(score2>rmsdThrJoin) 703 traceIndex_=-1; 704 else if ( score2 > userRMSDMax) 705 traceIndex_=-1; 706 else { 707 traceScore[nTrace-1][traceIndex_]=score2; 708 709 traceTotalScore=score2; 710 } 711 712 } 713 else { 714 if(traceScoreMax>rmsdThrJoin && nBestTrace>=nBestTrace0) 715 traceIndex_=-1; 716 traceTotalScore=traceScoreMax; 717 } 718 } 719 720 //System.out.println("middle: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + score2 + " " + traceIndex_); 721 722 if(traceIndex_==-1) { 723 //System.out.println("continue traceLoop " + nTrace); 724 //if(iterLevel==1) break; 725 nTrace--; 726 isTraceUp=false; 727 continue traceLoop; 728 } 729 else { 730 traceIterLevel[nTrace-1]++; 731 trace1[nTrace]=jse1; 732 trace2[nTrace]=jse2; 733 traceIndex[nTrace]=traceIndex_; 734 traceIterLevel[nTrace]=0; 735 nTrace++; 736 isTraceUp=true; 737 738 if(nTrace>nBestTrace || 739 (nTrace==nBestTrace && 740 bestTraceScore>traceTotalScore)) { 741 742 for(int itrace=0; itrace<nTrace; itrace++) { 743 bestTrace1[itrace]=trace1[itrace]; 744 bestTrace2[itrace]=trace2[itrace]; 745 } 746 bestTraceScore=traceTotalScore; 747 nBestTrace=nTrace; 748 } 749 750 if(iter==0) { 751 //System.out.println("doing iter0 " + newBestTrace + " " + traceTotalScore + " " + bestTracesMax); 752 newBestTrace = doIter0(newBestTrace,traceTotalScore, bestTracesMax); 753 754 755 } 756 } 757 } 758 } 759 } 760 761 if ( isPrint) { 762 System.out.println("fragment length: " + params.getWinSize()); 763 System.out.println("ntraces : " + nTraces ); 764 } 765 766 767 768 } 769 770 // try { 771 // Matrix m2 = new Matrix(traceScore).copy(); 772 // JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 773 // JFrame frame = new JFrame(); 774 // frame.addWindowListener(new WindowAdapter(){ 775 // public void windowClosing(WindowEvent e){ 776 // JFrame f = (JFrame) e.getSource(); 777 // f.setVisible(false); 778 // f.dispose(); 779 // } 780 // }); 781 // 782 // 783 // frame.getContentPane().add(panel); 784 // 785 // frame.pack(); 786 // frame.setVisible(true); 787 // } catch (Exception e) { 788 // e.printStackTrace(); 789 // } 790 791 792 if ( params.isShowAFPRanges()){ 793 System.out.println("fragment length: " + params.getWinSize()); 794 System.out.println("ntraces : " + nTraces ); 795 796 } 797 798 } 799 800 protected double getScore2(int jse1, int jse2, double[][] traceScore, int traceIndex_,int[] traceIndex,int winSizeComb1, int winSizeComb2, double score0, double score1 ) { 801 802 803 804 /*double score2= 805 ((nTrace>1?traceScore[nTrace-2][traceIndex[nTrace-1]]:score0) 806 *a[nTrace-1]+score1*(a[nTrace]-a[nTrace-1]))/a[nTrace]; 807 */ 808 double val = 0; 809 if ( nTrace>1) 810 val =traceScore[nTrace-2][traceIndex[nTrace-1]]; 811 else 812 val = score0; 813 814 double score2 = (val * traceIndexContainer[nTrace-1]+score1*(traceIndexContainer[nTrace]-traceIndexContainer[nTrace-1]))/traceIndexContainer[nTrace]; 815 816 //System.out.println("check: score0 " + score0 + " score 1 " + score1 + " sc2: " + score2 + " val: " + val + " nTrace:" + nTrace+ " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace] ); 817 818 return score2; 819 820 821 } 822 823 protected int doIter0(int newBestTrace, double traceTotalScore, double bestTracesMax) { 824 825 826 // only do the whole method if this criteria is fulfilled... 827 if(nTrace>bestTracesN[newBestTrace] || 828 (nTrace==bestTracesN[newBestTrace] && 829 bestTracesScores[newBestTrace]>traceTotalScore)) { 830 831 832 for(int itrace=0; itrace<nTrace; itrace++) { 833 bestTraces1[newBestTrace][itrace]=trace1[itrace]; 834 bestTraces2[newBestTrace][itrace]=trace2[itrace]; 835 bestTracesN[newBestTrace]=nTrace; 836 bestTracesScores[newBestTrace]=traceTotalScore; 837 //System.out.println("bestTracesScrores ["+newBestTrace+"]=" +traceTotalScore); 838 } 839 840 if(nTrace>nBestTrace) nBestTrace=nTrace; 841 842 if(nBestTraces<bestTracesMax) { 843 nBestTraces++; 844 newBestTrace++; 845 } 846 847 if(nBestTraces==bestTracesMax) { 848 //System.out.println("nBestTraces == bestTracesmax " + nBestTraces); 849 newBestTrace=0; 850 double scoreTmp=bestTracesScores[0]; 851 int nTraceTmp=bestTracesN[0]; 852 for(int ir=1; ir<nBestTraces; ir++) { 853 if(bestTracesN[ir]<nTraceTmp || 854 (bestTracesN[ir]==nTraceTmp && 855 scoreTmp<bestTracesScores[ir])) { 856 nTraceTmp=bestTracesN[ir]; 857 scoreTmp=bestTracesScores[ir]; 858 newBestTrace=ir; 859 //System.out.println("setting new bestTracesScore to " + ir + " " + scoreTmp); 860 } 861 } 862 } 863 } 864 865 //System.out.println("iter0 : " + newBestTrace + " " + bestTracesN[newBestTrace] + " " + traceTotalScore + " " + nTrace); 866 867 868 869 /* 870z=zStrAlign(winSize, nTrace, traceTotalScore, 871trace1[nTrace-1]-trace1[0]+trace2[nTrace-1]-trace2[0]- 8722*(nTrace-1)*winSize); 873if(z>bestTraceZScore) { 874for(int itrace=0; itrace<nTrace; itrace++) { 875bestTrace1[itrace]=trace1[itrace]; 876bestTrace2[itrace]=trace2[itrace]; 877} 878bestTraceZScore=z; 879bestTraceScore=*(traceScore[nTrace-2]+traceIndex_); 880nBestTrace=nTrace; 881} 882 */ 883 return newBestTrace; 884 885 } 886 887 888 protected double getScoreFromDistanceMatrices(int mse1, int mse2,int winSize) { 889 890 double score = 0; 891 // (winSize) "best" dist 892 893 // reduce sign. values to C code.. 6 digits.. 894 895 for(int itrace=0; itrace<nTrace; itrace++) { 896 score+= Math.abs(dist1[trace1[itrace]][mse1]- 897 dist2[trace2[itrace]][mse2]); 898 899 score+= Math.abs(dist1[trace1[itrace]+winSize-1] 900 [mse1+winSize-1]- 901 dist2[trace2[itrace]+winSize-1][mse2+winSize-1]); 902 903 for(int id=1; id<winSize-1; id++) 904 score+= Math.abs(dist1[trace1[itrace]+id][mse1+winSize-1-id]- 905 dist2[trace2[itrace]+id][mse2+winSize-1-id]); 906 907 } 908 909 return score; 910 } 911 912 public void nextStep( AFPChain afpChain, 913 Atom[] ca1, Atom[] ca2) throws StructureException{ 914 915 916 if(nBestTrace>0) { 917 checkBestTraces(afpChain,ca1,ca2); 918 } else { 919 noBestTrace(); 920 } 921 922 convertAfpChain(afpChain, ca1, ca2); 923 AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2); 924 } 925 926 927 928 // this part is modified from the original CeCalculator 929 @SuppressWarnings("unused") 930 private void checkBestTraces( AFPChain afpChain, 931 Atom[] ca1, Atom[] ca2) throws StructureException{ 932 933 z=0.0; 934 935 936 int nGaps; 937 int winSize = params.getWinSize(); 938 int nse1 = ca1.length; 939 int nse2 = ca2.length; 940 int traceMaxSize=nse1<nse2?nse1:nse2; 941 int idir; 942 943 944 align_se1=new int [nse1+nse2]; 945 align_se2=new int [nse1+nse2]; 946 alignmentPositionOrLength = 0; 947 948 // we now support alignment using any particular atoms.. 949 950 Atom[] strBuf1 = new Atom[traceMaxSize]; 951 Atom[] strBuf2 = new Atom[traceMaxSize]; 952 953 double rmsdNew; 954 955 956 957 // removing some loops that are run in orig CE 958 // and which did not do anything 959 if ( debug ){ 960 checkPrintRmsdNew(traceMaxSize, winSize, ca1, ca2); 961 } 962 963 double rmsd=100.0; 964 965 int iBestTrace=0; 966 967 for(int ir=0; ir<nBestTraces; ir++) { 968 if(bestTracesN[ir]!=nBestTrace) continue; 969 970 rmsdNew = getRMSDForBestTrace(ir, strBuf1, strBuf2, bestTracesN,bestTraces1, bestTrace2,winSize,ca1,ca2); 971 if ( isPrint) 972 System.out.println(String.format("%d %d %d %.2f", ir, bestTracesN[ir], nBestTrace, rmsdNew)); 973 974 if(rmsd>rmsdNew) { 975 iBestTrace=ir; 976 rmsd=rmsdNew; 977 //System.out.println(" iBestTrace:" + iBestTrace + " new rmsd = " + rmsd); 978 } 979 } 980 for(int it=0; it<bestTracesN[iBestTrace]; it++) { 981 bestTrace1[it]=bestTraces1[iBestTrace][it]; 982 bestTrace2[it]=bestTraces2[iBestTrace][it]; 983 } 984 985 //System.out.println("iBestTrace: "+iBestTrace+" = bestTracesScores " + bestTracesScores[iBestTrace]); 986 987 nBestTrace=bestTracesN[iBestTrace]; 988 989 bestTraceScore=bestTracesScores[iBestTrace]; 990 991 992 //printf("\nOptimizing gaps...\n"); 993 994 int[] traceLen=new int [traceMaxSize]; 995 bestTraceLen=new int [traceMaxSize]; 996 997 998 int strLen=0; 999 1000 int jt; 1001 strLen=0; 1002 nGaps=0; 1003 nTrace=nBestTrace; 1004 1005 for(jt=0; jt<nBestTrace; jt++) { 1006 trace1[jt]=bestTrace1[jt]; 1007 trace2[jt]=bestTrace2[jt]; 1008 traceLen[jt]=winSize; 1009 1010 if(jt<nBestTrace-1) { 1011 nGaps+=bestTrace1[jt+1]-bestTrace1[jt]-winSize+ 1012 bestTrace2[jt+1]-bestTrace2[jt]-winSize; 1013 } 1014 } 1015 nBestTrace=0; 1016 for(int it=0; it<nTrace; ) { 1017 int cSize=traceLen[it]; 1018 for(jt=it+1; jt<nTrace; jt++) { 1019 if(trace1[jt]-trace1[jt-1]-traceLen[jt-1]!=0 || 1020 trace2[jt]-trace2[jt-1]-traceLen[jt-1]!=0) break; 1021 cSize+=traceLen[jt]; 1022 } 1023 bestTrace1[nBestTrace]=trace1[it]; 1024 bestTrace2[nBestTrace]=trace2[it]; 1025 bestTraceLen[nBestTrace]=cSize; 1026 nBestTrace++; 1027 strLen+=cSize; 1028 it=jt; 1029 } 1030 1031 1032 int is=0; 1033 for(jt=0; jt<nBestTrace; jt++) { 1034 for(int i=0; i<bestTraceLen[jt]; i++) { 1035 setStrBuf(strBuf1,is+i,ca1,bestTrace1[jt]+i ); 1036 setStrBuf(strBuf2,is+i,ca2,bestTrace2[jt]+i); 1037 } 1038 is+=bestTraceLen[jt]; 1039 } 1040 //sup_str(strBuf1, strBuf2, strLen, d_); 1041 1042 rmsd=calc_rmsd(strBuf1, strBuf2, strLen,true,showAlignmentSteps); 1043 1044 if ( isPrint) 1045 System.out.println("got first rmsd: " + rmsd); 1046 boolean isCopied=false; 1047 1048 outer_loop: 1049 for(int it=1; it<nBestTrace; it++) { 1050 1051 /* not needed... 1052 int igap; 1053 if(bestTrace1[it]-bestTrace1[it-1]-bestTraceLen[it-1]>0) igap=0; 1054 if(bestTrace2[it]-bestTrace2[it-1]-bestTraceLen[it-1]>0) igap=1; 1055 */ 1056 1057 1058 boolean wasBest=false; 1059 main_loop: 1060 for(idir=-1; idir<=1; idir+=2) { 1061 if(wasBest) break; 1062 1063 inner_loop: 1064 for(int idep=1; idep<=winSize/2; idep++) { 1065 1066 if(!isCopied) 1067 for(jt=0; jt<nBestTrace; jt++) { 1068 trace1[jt]=bestTrace1[jt]; 1069 trace2[jt]=bestTrace2[jt]; 1070 traceLen[jt]=bestTraceLen[jt]; 1071 } 1072 isCopied=false; 1073 1074 traceLen[it-1]+=idir; 1075 traceLen[it]-=idir; 1076 trace1[it]+=idir; 1077 trace2[it]+=idir; 1078 1079 is=0; 1080 for(jt=0; jt<nBestTrace; jt++) { 1081 for(int i=0; i<traceLen[jt]; i++) { 1082 if(ca1[trace1[jt]+i].getX()>1e10 || ca2[trace2[jt]+i].getX()>1e10) 1083 continue main_loop; 1084 strBuf1[is+i]=ca1[trace1[jt]+i]; 1085 strBuf2[is+i]=ca2[trace2[jt]+i]; 1086 } 1087 is+=traceLen[jt]; 1088 } 1089 //sup_str(strBuf1, strBuf2, strLen, d_); 1090 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen, true, false); 1091 //System.out.println(String.format("step %d %d %d %.2f old: %.2f", it, idir, idep, rmsdNew, rmsd)); 1092 if(rmsdNew<rmsd) { 1093 1094 for(jt=0; jt<nBestTrace; jt++) { 1095 bestTrace1[jt] = trace1[jt]; 1096 bestTrace2[jt] = trace2[jt]; 1097 bestTraceLen[jt]= traceLen[jt]; 1098 } 1099 isCopied=true; 1100 wasBest=true; 1101 rmsd=rmsdNew; 1102 continue inner_loop; 1103 } 1104 // AP 1105 //bad_ca: break; 1106 continue main_loop; 1107 } 1108 } 1109 } 1110 //if ( showAlignmentSteps) 1111 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen,true, showAlignmentSteps); 1112 if ( isPrint) 1113 System.out.println("rmsdNew: " + rmsdNew + " rmsd " + rmsd); 1114 afpChain.setTotalRmsdIni(rmsdNew); 1115 afpChain.setTotalLenIni(strBuf1.length); 1116 1117 1118 nAtom = strLen; 1119 1120 System.out.println("zStrAlign: " + winSize + " strLen " + strLen + " s/w " + (strLen/winSize) + " " + bestTraceScore + " " + nGaps); 1121 z=zStrAlign(winSize, strLen/winSize, bestTraceScore, nGaps); 1122 1123 if(params.isShowAFPRanges()) { 1124 System.out.println("win size: " + winSize + " strLen/winSize: " + strLen/winSize + " best trace score: " + String.format("%.2f",bestTraceScore) + " nr gaps: " + nGaps + " nr residues: " + nAtom); 1125 1126 System.out.println(String.format("size=%d rmsd=%.2f z=%.1f gaps=%d(%.1f%%) comb=%d", 1127 nAtom, rmsd, z, nGaps, nGaps*100.0/nAtom, 1128 nTraces)); 1129 1130 System.out.println("Best Trace, before optimization"); 1131 for(int k=0; k<nBestTrace; k++) 1132 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1133 bestTraceLen[k])); 1134 1135 } 1136 1137 // start to convert CE internal datastructure to generic AFPChain one... 1138 List<AFP> afpSet = new ArrayList<AFP>(); 1139 for (int afp=0;afp<nBestTrace;afp++){ 1140 // fill in data from nBestTrace into AFP 1141 1142 AFP afpI = new AFP(); 1143 1144 afpI.setFragLen(bestTraceLen[afp]); 1145 afpI.setP1(bestTrace1[afp]+1); 1146 afpI.setP2(bestTrace2[afp]+1); 1147 1148 afpSet.add( afpI); 1149 } 1150 1151 afpChain.setAfpSet(afpSet); 1152 1153 1154 //System.out.println("z:"+z + " zThr" + zThr+ " bestTraceScore " + bestTraceScore + " " + nGaps ); 1155 if(z>=zThr) { 1156 nGaps = optimizeSuperposition(afpChain,nse1, nse2, strLen, rmsd, ca1, ca2,nGaps,strBuf1,strBuf2); 1157 // if(isPrint) { 1158 // /* 1159 // FILE *f=fopen("homologies", "a"); 1160 // fprintf(f, "%s(%d) %s(%d) %3d %4.1f %4.1f %d(%d) ", 1161 // name1, nse1, name2, nse2, nAtom, rmsd, z, 1162 // nGaps, nGaps*100/nAtom); 1163 // for(int k=0; k<nBestTrace; k++) 1164 // fprintf(f, "(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1165 // bestTraceLen[k]); 1166 // fprintf(f, "\n"); 1167 // fclose(f); 1168 // */ 1169 // } 1170 } 1171 else { 1172 int lali_x_ = 0; 1173 for(int k=0; k<nBestTrace; k++) { 1174 for(int l=0; l<bestTraceLen[k]; l++) { 1175 align_se1[alignmentPositionOrLength+l]=bestTrace1[k]+l; 1176 align_se2[alignmentPositionOrLength+l]=bestTrace2[k]+l; 1177 } 1178 lali_x_+=bestTraceLen[k]; 1179 if(k<nBestTrace-1) { 1180 if(bestTrace1[k]+bestTraceLen[k]!=bestTrace1[k+1]) 1181 for(int l=bestTrace1[k]+bestTraceLen[k]; l<bestTrace1[k+1]; l++) { 1182 align_se1[alignmentPositionOrLength]=l; 1183 align_se2[alignmentPositionOrLength]=-1; 1184 alignmentPositionOrLength++; 1185 } 1186 if(bestTrace2[k]+bestTraceLen[k]!=bestTrace2[k+1]) 1187 for(int l=bestTrace2[k]+bestTraceLen[k]; l<bestTrace2[k+1]; l++) { 1188 align_se1[alignmentPositionOrLength]=-1; 1189 align_se2[alignmentPositionOrLength]=l; 1190 alignmentPositionOrLength++; 1191 } 1192 } 1193 } 1194 nAtom=lali_x_; 1195 } 1196 1197 timeEnd = System.currentTimeMillis(); 1198 long time_q=(timeEnd-timeStart); 1199 1200 double gapsP = ( nGaps*100.0/nAtom) ; 1201 if(isPrint) { 1202 String msg = String.format("Alignment length = %d Rmsd = %.2fA Z-Score = %.1f Gaps = %d(%.1f%%)",nAtom,rmsd,z,nGaps, gapsP); 1203 System.out.println(msg + " CPU = " + time_q); 1204 } 1205 1206 // if ( params.isShowAFPRanges()){ 1207 1208 // this is actually the final alignment... 1209 System.out.println("Best Trace: (index1,index2,len)"); 1210 for(int k=0; k<nBestTrace; k++) 1211 System.out.println( 1212 String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, bestTraceLen[k])); 1213 1214 1215 1216 // } 1217 1218 afpChain.setCalculationTime(time_q); 1219 afpChain.setGapLen(nGaps); 1220 1221 int[] optLen = new int[]{nAtom}; 1222 afpChain.setOptLen(optLen); 1223 afpChain.setOptLength(nAtom); 1224 afpChain.setAlnLength(alignmentPositionOrLength); 1225 1226 afpChain.setProbability(z); 1227 1228 1229 } 1230 1231 /** set the Atoms for a particular residue position. 1232 * Requires that atom.getParent returns the correct group! 1233 * take care during cloning of atoms. Best to use StructureTools.cloneCaAtoms(); 1234 * 1235 * @param strBuf 1236 * @param i 1237 * @param ca 1238 * @param j 1239 */ 1240 private void setStrBuf(Atom[] strBuf, int i, Atom[] ca, int j) { 1241 // TODO Auto-generated method stub 1242 //TODO 1243 Group parent = ca[j].getGroup(); 1244 int pos = 0; 1245 String atomName = ca[j].getName(); 1246 1247 Atom a = null; 1248 1249 a= parent.getAtom(atomName); 1250 if ( a != null){ 1251 strBuf[i]=a; 1252 } 1253 else { 1254 // probably a GLY and no CB was found... 1255 //e.printStackTrace(); 1256 } 1257 strBuf[i+pos] = a; 1258 pos++; 1259 1260 1261 1262 } 1263 1264 // TODO: consider all requested Atoms? 1265 private double getRMSDForBestTrace(int ir, Atom[] strBuf1, Atom[] strBuf2, 1266 int[] bestTracesN2, int[][] bestTraces12, int[] bestTrace22, 1267 int winSize,Atom[] ca1, Atom[] ca2 ) throws StructureException { 1268 int is=0; 1269 for(int jt=0; jt<bestTracesN[ir]; jt++) { 1270 for(int i=0; i<winSize; i++) { 1271 1272 setStrBuf(strBuf1, is+i, ca1, bestTraces1[ir][jt]+i); 1273 setStrBuf(strBuf2, is+i, ca2, bestTraces2[ir][jt]+i); 1274 } 1275 is+=winSize; 1276 } 1277 //sup_str(strBuf1, strBuf2, bestTracesN[ir]*winSize, d_); 1278 double rmsdNew=calc_rmsd(strBuf1, strBuf2, bestTracesN[ir]*winSize, true, false); 1279 return rmsdNew; 1280 1281 } 1282 1283 1284 1285 1286 1287 1288 /** calc initial RMSD for bestTrace1 in debug only 1289 * 1290 */ 1291 private void checkPrintRmsdNew(int traceMaxSize, int winSize, Atom[] ca1, Atom[] ca2) throws StructureException{ 1292 1293 int is = 0; 1294 // calc initial RMSD for bestTrace1 1295 Atom[] strBuf1 = new Atom[traceMaxSize]; 1296 Atom[] strBuf2 = new Atom[traceMaxSize]; 1297 for(int jt=0; jt<nBestTrace; jt++) { 1298 for(int i=0; i<winSize; i++) { 1299 setStrBuf(strBuf1, is+i, ca1, bestTrace1[jt]+i ); 1300 setStrBuf(strBuf2, is+i, ca2, bestTrace2[jt]+i ); 1301 } 1302 is+=winSize; 1303 } 1304 1305 //sup_str(strBuf1, strBuf2, nBestTrace*winSize, d_); 1306 double rmsdNew=calc_rmsd(strBuf1, strBuf2, nBestTrace*winSize, true, false); 1307 //afpChain.setTotalRmsdIni(rmsdNew); 1308 1309 1310 if ( isPrint){ 1311 System.out.println("rmsdNew after trace: " +rmsdNew); 1312 1313 for(int k=0; k<nBestTrace; k++) 1314 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1,8)); 1315 } 1316 1317 if ( isPrint){ 1318 System.out.println("best traces: " + nBestTraces); 1319 } 1320 1321 1322 } 1323 1324 1325 1326 1327 1328 1329 private static char getOneLetter(Group g){ 1330 1331 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 1332 1333 return StructureTools.get1LetterCode(g.getPDBName()); 1334 } 1335 1336 1337 1338 private int optimizeSuperposition(AFPChain afpChain, int nse1, int nse2, int strLen, double rmsd, Atom[] ca1, Atom[] ca2,int nGaps, 1339 Atom[] strBuf1, Atom[] strBuf2 ) throws StructureException { 1340 1341 //System.out.println("optimizing Superimposition..."); 1342 1343 //nAtom=strLen; 1344 // optimization on superposition 1345 Atom[] ca3=new Atom[nse2]; 1346 1347 double rmsdLen = 0.0; 1348 1349 // this flag tests if the RMSDLen has been assigned. 1350 // this is to enforce that the alignment ends up 1351 // smaller than 95% of the original alignment. 1352 // +/- 1353 boolean isRmsdLenAssigned=false; 1354 int nAtomPrev=-1; 1355 1356 double oRmsdThr = params.getORmsdThr(); 1357 1358 double distanceIncrement = params.getDistanceIncrement(); 1359 double maxUserRMSD = params.getMaxOptRMSD(); 1360 nAtom=0; 1361 int counter = -1; 1362 1363 int maxNrIterations = params.getMaxNrIterationsForOptimization(); 1364 1365 //double[][] mat = new double[nse1][nse2]; 1366 while((nAtom<strLen*0.95 || 1367 (isRmsdLenAssigned && rmsd<rmsdLen*1.1 && nAtomPrev!=nAtom)) && ( counter< maxNrIterations)) { 1368 1369 counter++; 1370 if ( debug) 1371 System.out.println("nAtom: " + nAtom + " " + nAtomPrev + " " + rmsdLen + " " + isRmsdLenAssigned + " strLen:" + strLen + " nse1,nse2:" + nse1 + " " + nse2); 1372 nAtomPrev=nAtom; 1373 oRmsdThr += distanceIncrement; 1374 1375 rot_mol(ca2, ca3, nse2, r,t); 1376 1377 for(int ise1=0; ise1<nse1; ise1++) { 1378 for(int ise2=0; ise2<nse2; ise2++) { 1379 1380 //mat[ise1][ise2]=-0.001; 1381 1382 // this needs to be a parameter... 1383 1384 1385 double dist = getDistanceWithSidechain(ca1[ise1], ca3[ise2]); 1386 mat[ise1][ise2] = oRmsdThr - dist; 1387 1388 //double distold = Calc.getDistance(ca1[ise1],ca3[ise2]); 1389 //double scoreOld = oRmsdThr - distold ; 1390 //mat[ise1][ise2] = scoreOld; 1391 //mat[ise1][ise2] = oRmsdThr - Calc.getDistance(ca1[ise1],ca3[ise2]); 1392 1393 //if ( counter == 0 && ise1 == ise2) { 1394 1395 // System.out.println("mat[" + ise1 + "][" + ise2 + "] " + mat[ise1][ise2] + " scoreOld:" + scoreOld + " oRmsdThr: " + oRmsdThr +" dist: " + dist + " distold:" + distold ); 1396 // } 1397 1398 1399 } 1400 } 1401 1402 mat = notifyMatrixListener(mat); 1403 1404 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 1405 mat = updateMatrixWithSequenceConservation(mat,ca1,ca2, params); 1406 } 1407 1408 double gapOpen = params.getGapOpen(); 1409 double gapExtension = params.getGapExtension(); 1410 1411 // by default we always do local alignment 1412 double score = dpAlign( nse1, nse2, gapOpen , gapExtension , GLOBAL_ALIGN1, GLOBAL_ALIGN2); 1413 1414 if (debug) { 1415 System.out.println("iter: "+ counter + " score:" + score + " " + " nAtomPrev: " + nAtomPrev + " nAtom:" + nAtom + " oRmsdThr: " + oRmsdThr); 1416 1417 for (int i=0 ; i<alignmentPositionOrLength ; i++){ 1418 if ( align_se2[i] == 172 || align_se2[i] == 173) { 1419 System.out.println("BREAK POINT IS ALIGNED!!!!"); 1420 System.out.println(align_se2[i-1] + " " + align_se2[i] + " " + align_se2[i+1]); 1421 } 1422 } 1423 } 1424 afpChain.setAlignScore(score); 1425 1426 1427 nAtom=0; nGaps=0; 1428 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 1429 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1430 1431 strBuf1[nAtom]=ca1[align_se1[ia]]; 1432 strBuf2[nAtom]=ca2[align_se2[ia]]; 1433 1434 nAtom++; 1435 1436 } 1437 else { 1438 nGaps++; 1439 } 1440 } 1441 1442 for ( int i =0 ; i < strBuf1.length; i++){ 1443 if ( strBuf1[i] == null) 1444 break; 1445 System.out.print(strBuf1[i].getGroup().getChemComp().getOne_letter_code()); 1446 } 1447 System.out.println(); 1448 1449 if(nAtom<4) continue; 1450 1451 //sup_str(strBuf1, strBuf2, nAtom, _d); 1452 // here we don't store the rotation matrix for the user! 1453 rmsd= calc_rmsd(strBuf1, strBuf2, nAtom,false, false); 1454 if ( isPrint ) 1455 System.out.println("iter: " + counter + " nAtom " + nAtom + " rmsd: " + rmsd); 1456 //afpChain.setTotalRmsdOpt(rmsd); 1457 //System.out.println("rmsd: " + rmsd); 1458 1459 if(!(nAtom<strLen*0.95) && (!isRmsdLenAssigned)) { 1460 rmsdLen=rmsd; 1461 isRmsdLenAssigned=true; 1462 } 1463 //System.out.println(String.format("nAtom %d %d rmsd %.1f", nAtom, nAtomPrev, rmsd)); 1464 1465 1466 afpChain.setBlockRmsd(new double[]{rmsd}); 1467 afpChain.setOptRmsd(new double[]{rmsd}); 1468 afpChain.setTotalRmsdOpt(rmsd); 1469 afpChain.setChainRmsd(rmsd); 1470 1471 if ( rmsd >= maxUserRMSD) { 1472 break; 1473 } 1474 1475 } 1476 1477 1478 1479 //System.out.println("done optimizing"); 1480 /* 1481 nAtom=0; nGaps=0; 1482 for(int ia=0; ia<lcmp; ia++) 1483 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1484 if(ca1[align_se1[ia]].X<1e10 && ca2[align_se2[ia]].X<1e10) { 1485 strBuf1[nAtom]=ca1[align_se1[ia]]; 1486 strBuf2[nAtom]=ca2[align_se2[ia]]; 1487 nAtom++; 1488 } 1489 } 1490 else { 1491 nGaps++; 1492 } 1493 1494 sup_str(strBuf1, strBuf2, nAtom, _d); 1495 rmsd=calc_rmsd(strBuf1, strBuf2, nAtom, _d); 1496 */ 1497 nBestTrace=0; 1498 boolean newBestTrace=true; 1499 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 1500 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1501 //System.out.println(" " +align_se1[ia] + " " + align_se2[ia]); 1502 1503 if(newBestTrace) { 1504 bestTrace1[nBestTrace]=align_se1[ia]; 1505 bestTrace2[nBestTrace]=align_se2[ia]; 1506 bestTraceLen[nBestTrace]=0; 1507 newBestTrace=false; 1508 nBestTrace++; 1509 } 1510 bestTraceLen[nBestTrace-1]++; 1511 1512 } 1513 else { 1514 newBestTrace=true; 1515 } 1516 } 1517 1518 return nGaps; 1519 1520 // end of optimization on superposition 1521 1522 } 1523 1524 /** Modifies an alignment matrix by favoring the alignment of similar and identical amino acids and penalizing the alignment of unrelated ones. 1525 * 1526 * @param max alignment matrix 1527 * @param ca1 Atoms for protein 1 1528 * @param ca2 Atoms for Protein 2 1529 * @param params alignment parameters 1530 * @return modified alignment matrix 1531 */ 1532 public static double[][] updateMatrixWithSequenceConservation(double[][] max, Atom[] ca1, Atom[] ca2, CeParameters params) { 1533 Matrix origM = new Matrix(max); 1534 1535 SubstitutionMatrix<AminoAcidCompound> substMatrix = 1536 params.getSubstitutionMatrix(); 1537 1538 int internalScale = 1; 1539 if ( substMatrix instanceof ScaledSubstitutionMatrix) { 1540 ScaledSubstitutionMatrix scaledMatrix = (ScaledSubstitutionMatrix) substMatrix; 1541 internalScale = scaledMatrix.getScale(); 1542 } 1543 1544 1545 AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 1546 1547 for (int i = 0 ; i < origM.getRowDimension() ; i++){ 1548 for ( int j =0; j < origM.getColumnDimension() ; j ++ ) { 1549 double val = origM.get(i,j); 1550 Atom a1 = ca1[i]; 1551 Atom a2 = ca2[j]; 1552 1553 AminoAcidCompound ac1 = 1554 set.getCompoundForString(a1.getGroup().getChemComp().getOne_letter_code()); 1555 AminoAcidCompound ac2 = 1556 set.getCompoundForString(a2.getGroup().getChemComp().getOne_letter_code()); 1557 1558 1559 if ( ac1 == null || ac2 == null) 1560 continue; 1561 1562 short aaScore = substMatrix.getValue(ac1,ac2); 1563 1564 double weightedScore = (aaScore / internalScale) * params.getSeqWeight(); 1565 1566 1567 val += weightedScore; 1568 origM.set(i,j,val); 1569 1570 } 1571 } 1572 max = origM.getArray(); 1573 1574 //SymmetryTools.showMatrix((Matrix)origM.clone(), "in optimizer " + loopCount ); 1575 //SymmetryTools.showMatrix(origM, "iteration matrix " + loopCount + " after"); 1576 1577 return max; 1578 } 1579 1580 private double[][] notifyMatrixListener(double[][] mat2) { 1581 for (MatrixListener li : matrixListeners) { 1582 mat2 = li.matrixInOptimizer(mat2); 1583 } 1584 return mat2; 1585 } 1586 1587 private boolean[][] notifyBreakFlagListener(boolean[][] brkFlag){ 1588 for (MatrixListener li : matrixListeners) { 1589 brkFlag = li.initializeBreakFlag(brkFlag); 1590 } 1591 return brkFlag; 1592 } 1593 1594 public void addMatrixListener(MatrixListener li){ 1595 matrixListeners.add(li); 1596 } 1597 1598 1599 private double dpAlign(int nSeq1, int nSeq2, double gapI, double gapE, 1600 boolean isGlobal1, boolean isGlobal2) { 1601 1602 1603 // isGlobal1,isGlobal2 are always false... 1604 // i.e. we do local alignments by default 1605 1606 int i, j, iStart, jStart, iMax, jMax, k; 1607 boolean hasGapExtensionPenalty=(gapE!=0.0?true:false); 1608 double sum_ret, sum_brk; 1609 1610 boolean[][] brk_flg=new boolean [nSeq1][nSeq2]; 1611 for(i=0; i<nSeq1; i++) brk_flg[i]=new boolean [nSeq2]; 1612 1613 brk_flg = notifyBreakFlagListener(brk_flg); 1614 1615 // ge = true here... 1616 /* 1617 for(i=0; i<nSeq1; i++) 1618 { 1619 printf("\n"); 1620 for(j=0; j<nSeq2; j++) 1621 { 1622 printf("%4d", (int)(*(mat[i]+j)*10)); 1623 } 1624 } 1625 printf("\n\n\n"); 1626 */ 1627 int[][] tracebackMatrix1 = new int[nSeq1][nSeq2]; 1628 int[][] tracebackMatrix2 = new int[nSeq1][nSeq2]; 1629 1630 // System.out.println("===== " + mat[0][0]); 1631 // for ( i = 39 ; i < 42 ;i ++){ 1632 // //System.out.print(" " + i + " "); 1633 // for ( j = 155 ; j < 157 ; j++) { 1634 // System.out.print(String.format("[%s %s %.2f] ",i,j,mat[i][j])); 1635 // } 1636 // System.out.println(); 1637 // } 1638 // System.out.println("----"); 1639 // 1640 // for ( i = 69 ; i < 72 ;i ++){ 1641 // //System.out.println(" " + i + " "); 1642 // for ( j = 170 ; j < 175 ; j++) { 1643 // System.out.print(String.format("[%s %s %.2f] ",i,j,mat[i][j])); 1644 // } 1645 // System.out.println(); 1646 // } 1647 1648 //double sum = 0; 1649 if(!hasGapExtensionPenalty) 1650 { 1651 for(i=nSeq1-1; i>=0; i--) 1652 for(j=nSeq2-1; j>=0; j--) 1653 { 1654 double sum ; 1655 brk_flg[i][j]=false; 1656 if(j<nSeq2-1 && i<nSeq1-1) 1657 { 1658 sum=mat[i+1][j+1]; 1659 } 1660 else 1661 { 1662 sum=0.0; 1663 if((isGlobal1 && i!=nSeq1-1) || (isGlobal2 && j!=nSeq2-1)) 1664 sum=-gapI; 1665 } 1666 if(j+1<nSeq2) 1667 for(k=i+2; k<nSeq1; k++) 1668 { 1669 if(mat[k][j+1]-gapI>sum) 1670 sum=mat[k][j+1]-gapI; 1671 } 1672 if(i+1<nSeq1) 1673 for(k=j+2; k<nSeq2; k++) 1674 { 1675 if(mat[i+1][k]-gapI>sum) 1676 sum=mat[i+1][k]-gapI; 1677 } 1678 sum+=mat[i][j]; 1679 sum_brk=(isGlobal1?-gapI:0.0)+(isGlobal2?-gapI:0.0); 1680 if(sum<sum_brk) 1681 { 1682 sum=sum_brk; 1683 brk_flg[i][j]=true; 1684 //System.out.println("break at: " + i + " " + j); 1685 } 1686 mat[i][j]=sum; 1687 } 1688 } 1689 else 1690 { 1691 for(i=nSeq1-1; i>=0; i--) 1692 for(j=nSeq2-1; j>=0; j--) 1693 { 1694 double maxSum ; 1695 brk_flg[i][j]=false; 1696 if(j<nSeq2-1 && i<nSeq1-1) 1697 { 1698 // any row/column which is not the last 1699 maxSum=mat[i+1][j+1]; 1700 tracebackMatrix1[i][j] = i+1; 1701 tracebackMatrix2[i][j] = j+1; 1702 } 1703 else 1704 { 1705 // sets the last row and column 1706 maxSum=0.0; 1707 if(isGlobal1 && i!=nSeq1-1) maxSum=-gapI-gapE*(nSeq1-i-1); 1708 if(isGlobal2 && j!=nSeq2-1) maxSum=-gapI-gapE*(nSeq2-j-1); 1709 tracebackMatrix1[i][j] = -1; 1710 tracebackMatrix2[i][j] = -1; 1711 1712 } 1713 1714 // do only for rows/columns which are not the last: 1715 if(j+1<nSeq2) 1716 for(k=i+2; k<nSeq1; k++) { 1717 if(mat[k][j+1]-gapI-gapE*(k-i-1)>maxSum) { 1718 maxSum=mat[k][j+1]-gapI-gapE*(k-i-1); 1719 tracebackMatrix1[i][j] = k; 1720 tracebackMatrix2[i][j] = j+1; 1721 1722 } 1723 } 1724 if(i+1<nSeq1) 1725 for(k=j+2; k<nSeq2; k++) { 1726 if(mat[i+1][k]-gapI-gapE*(k-j-1)>maxSum) { 1727 maxSum=mat[i+1][k]-gapI-gapE*(k-j-1); 1728 tracebackMatrix1[i][j] = i+1; 1729 tracebackMatrix2[i][j] = k; 1730 1731 } 1732 } 1733 1734 maxSum+= mat[i][j]; 1735 1736 1737 sum_brk=(isGlobal1?(-gapI-gapE*(nSeq1-1-i)):0.0)+(isGlobal2?(-gapI-gapE*(nSeq2-1-j)):0.0); 1738 if(maxSum<sum_brk) 1739 { 1740 maxSum=sum_brk; 1741 brk_flg[i][j]=true; 1742 } 1743 mat[i][j]=maxSum; 1744 } 1745 } 1746 1747 1748 1749 1750 iStart=0; jStart=0; alignmentPositionOrLength=0; 1751 // no nc-end penalty - begin 1752 sum_ret=mat[0][0]; 1753 1754 // look for the highest score in mat[i][j] 1755 // TODO: move this up ?? 1756 1757 for(i=0; i<nSeq1; i++) 1758 for(j=0; j<nSeq2; j++) 1759 { 1760 if(i==0 && j==0) continue; 1761 double sum=mat[i][j]; 1762 if(isGlobal1) sum+=-gapI-gapE*i; 1763 if(isGlobal2) sum+=-gapI-gapE*j; 1764 if(sum>sum_ret) 1765 { 1766 sum_ret=sum; 1767 iStart=i; jStart=j; 1768 } 1769 } 1770 1771 1772 // ok we got the maximum score in sum_ret and the start position in iStart, jStart 1773 1774 //System.out.println("start at " + is + " " + js); 1775 //for(k=0; k<is; k++) align1[k]=-1; 1776 //for(k=0; k<js; k++) align2[k]=-1; 1777 // no nc-end penalty - end 1778 int prevGapEnd = -1; 1779 // 1780 for(i=iStart, j=jStart; i<nSeq1 && j<nSeq2; i++, j++) 1781 { 1782 iMax=i; jMax=j; 1783 double localMaxScore=mat[i][j]; 1784 if(!hasGapExtensionPenalty) 1785 { 1786 for(k=i+1; k<nSeq1; k++) 1787 if(mat[k][j]-gapI>localMaxScore) 1788 { 1789 iMax=k; jMax=j; 1790 localMaxScore=mat[k][j]-gapI; 1791 } 1792 1793 for(k=j+1; k<nSeq2; k++) 1794 if(mat[i][k]-gapI>localMaxScore) 1795 { 1796 iMax=i; jMax=k; 1797 localMaxScore=mat[i][k]-gapI; 1798 } 1799 } 1800 else 1801 { 1802 for(k=i+1; k<nSeq1; k++) { 1803 if(mat[k][j]-gapI-gapE*(k-i)>localMaxScore) 1804 { 1805 System.out.println(" gap1 " + alignmentPositionOrLength + " " + k + " " + j + " " + localMaxScore + "<" +(mat[k][j]-gapI-gapE*(k-i))); 1806 iMax=k; jMax=j; 1807 localMaxScore=mat[k][j]-gapI-gapE*(k-i); 1808 } 1809 } 1810 for(k=j+1; k<nSeq2; k++) { 1811 if(mat[i][k]-gapI-gapE*(k-j)>localMaxScore) 1812 { 1813 System.out.println(" gap2 " + alignmentPositionOrLength + " " + k + " " + i + " " + localMaxScore + "<"+ (mat[i][k]-gapI-gapE*(k-j))); 1814 iMax=i; jMax=k; 1815 localMaxScore=mat[i][k]-gapI-gapE*(k-j); 1816 } 1817 } 1818 } 1819 1820 //boolean doubleGap = false; 1821 boolean gapPosition = false; 1822 if ( i != iMax || j != jMax ) { 1823 int l1 = iMax - i; 1824 int l2 = jMax - j ; 1825 System.out.println(String.format("FOUND GAP AT: lcmp:%d l1: %d l2: %d | i:%d iMax: %d j:%d jMax:%d ",alignmentPositionOrLength, l1,l2, i, iMax , j, jMax)); 1826 if ( l1 > 0) { 1827 System.out.println(" -- G1 : " + alignmentPositionOrLength + " ->" + (alignmentPositionOrLength + l1) + " " ); 1828 gapPosition = true; 1829 } 1830 if ( l2 > 0) { 1831 System.out.println(" -- G2 : " + alignmentPositionOrLength + " ->" + (alignmentPositionOrLength + l2) + " "); 1832 gapPosition = true; 1833 } 1834 if ( prevGapEnd == alignmentPositionOrLength -1){ 1835 // double gap! 1836 System.out.println( " !! FOUND DOUBLE GAP AT: "+ alignmentPositionOrLength + " | "+ i+ " " + iMax + " " + j + " " + jMax + " " + String.format("%f", mat[i][j]) + " " + getTraceBack(tracebackMatrix1,tracebackMatrix2,i,j)); 1837 //doubleGap = true; 1838 1839 // if ( i != iMax){ 1840 // int pos = align_se1[ alignmentPositionOrLength -1]; 1841 // align_se1[ alignmentPositionOrLength -1] = -1; 1842 // align_se1[ alignmentPositionOrLength ] = pos; 1843 // align_se2[ alignmentPositionOrLength ] = -1; 1844 // } else { 1845 // int pos = align_se2[ alignmentPositionOrLength -1]; 1846 // align_se2[ alignmentPositionOrLength -1] = -1; 1847 // align_se2[ alignmentPositionOrLength ] = pos; 1848 // align_se1[ alignmentPositionOrLength ] = -1; 1849 // } 1850 // 1851 // alignmentPositionOrLength++; 1852 } 1853 } 1854 1855 1856 1857 //System.out.println(" iMax " + iMax + " jMax " + jMax); 1858 // set the gap positions: 1859 //lcmp:53 i:41 j:173 imax:70 jmax:173 1860 System.out.println(String.format(" lcmp:%d i:%d j:%d imax:%d jmax:%d score: %.2f",alignmentPositionOrLength,i,j, iMax, jMax, mat[iMax][jMax])); 1861 1862 1863 for(k=i; k<iMax; k++, i++) { 1864 align_se1[alignmentPositionOrLength]=k; 1865 align_se2[alignmentPositionOrLength]=-1; 1866 1867 1868 alignmentPositionOrLength++; 1869 } 1870 1871 for(k=j; k<jMax; k++, j++) { 1872 align_se1[alignmentPositionOrLength]=-1; 1873 align_se2[alignmentPositionOrLength]=k; 1874 1875 1876 alignmentPositionOrLength++; 1877 } 1878 1879 1880 align_se1[alignmentPositionOrLength]=iMax; 1881 align_se2[alignmentPositionOrLength]=jMax; 1882 1883 1884 1885 if ( gapPosition) 1886 prevGapEnd = alignmentPositionOrLength; 1887 alignmentPositionOrLength++; 1888 1889 if(brk_flg[i][j]) { 1890 //System.out.println("hit break flag at: " + i + " " + j + " sum " + sum_ret + " lcmp " + alignmentPositionOrLength); 1891 break; 1892 1893 } 1894 } 1895 1896 1897 return sum_ret; 1898 } 1899 1900 1901 1902 1903 private String getTraceBack(int[][] tracebackMatrix1, 1904 int[][] tracebackMatrix2, int i, int j) { 1905 1906 1907 if ( tracebackMatrix1[i][j] == -1 ) 1908 return ""; 1909 if ( tracebackMatrix2[i][j] == -1 ) 1910 return ""; 1911 1912 int t1 = tracebackMatrix1[i][j]; 1913 int t2 = tracebackMatrix2[i][j]; 1914 1915 1916 String s = "[ "+t1+","+t2+"] "; 1917 1918 return s + getTraceBack(tracebackMatrix1, tracebackMatrix2, t1, t2); 1919 1920 1921 } 1922 1923 private void rot_mol(Atom[] caA, Atom[] caB, int nse2, Matrix m , Atom shift) throws StructureException{ 1924 1925 1926 1927 for(int l=0; l<nse2; l++) { 1928 Atom a = caA[l]; 1929 Group g = (Group)a.getGroup().clone(); 1930 //Group g = (Group)a.getParent(); 1931 1932 Calc.rotate( g, m); 1933 Calc.shift( g, shift); 1934 caB[l] = g.getAtom(a.getName()); 1935 } 1936 } 1937 1938 //void rot_mol(XYZ *molA, XYZ *molB, int nAtom, double d_[20] ) { 1939 // double dx, dy, dz; 1940 // for(int l=0; l<nAtom; l++) { 1941 // if(molA[l].X<1e10) { 1942 // dx=molA[l].X; 1943 // dy=molA[l].Y; 1944 // dz=molA[l].Z; 1945 // molB[l].X=dx*d_[0]+dy*d_[1]+dz*d_[2]+d_[9]; 1946 // molB[l].Y=dx*d_[3]+dy*d_[4]+dz*d_[5]+d_[10]; 1947 // molB[l].Z=dx*d_[6]+dy*d_[7]+dz*d_[8]+d_[11]; 1948 // } 1949 // else { 1950 // molB[l]=molA[l]; 1951 // } 1952 // } 1953 // } 1954 // 1955 1956 1957 /** superimpose and get rmsd 1958 * 1959 * @param pro1 1960 * @param pro2 1961 * @param strLen 1962 * @param storeTransform 1963 * @param show 1964 * @return RMSD 1965 * @throws StructureException 1966 */ 1967 public double calc_rmsd(Atom[] pro1, Atom[] pro2, int strLen, boolean storeTransform, boolean show) throws StructureException { 1968 1969 Atom[] cod1 = getAtoms(pro1, strLen,false); 1970 Atom[] cod2 = getAtoms(pro2, strLen,true); 1971 1972 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(cod1), 1973 Calc.atomsToPoints(cod2)); 1974 1975 Matrix matrix = Matrices.getRotationJAMA(trans); 1976 Atom shift = Calc.getTranslationVector(trans); 1977 1978 if ( debug){ 1979 matrix.print(3,3); 1980 } 1981 1982 if ( storeTransform) { 1983 r = matrix; 1984 t = shift; 1985 } 1986 for (Atom a : cod2) 1987 Calc.transform(a.getGroup(), trans); 1988 1989 //Calc.rotate(a,r); 1990 //Calc.shift(a,t); 1991 // if ( show){ 1992 // StructureAlignmentJmol jmol = new StructureAlignmentJmol(); 1993 // 1994 // jmol.setAtoms(cod1); 1995 // jmol.evalString("select * ; wireframe off; spacefill off; backbone on; color chain; select ligand;color cpk; wireframe 40;spacefill 120; "); 1996 // jmol.setTitle("calCaRmsd - pdb1"); 1997 // 1998 // StructureAlignmentJmol jmol2 = new StructureAlignmentJmol(); 1999 // jmol2.setAtoms(cod2); 2000 // jmol2.evalString("select * ; wireframe off; spacefill off; backbone on; color chain; select ligand;color cpk; wireframe 40;spacefill 120; "); 2001 // jmol2.setTitle("calCaRmsd - pdb2"); 2002 // } 2003 return Calc.rmsd(cod1, cod2); 2004 2005 } 2006 2007 /** 2008 * Copies the first length atoms from the input array 2009 * @param ca The array to copy 2010 * @param length the number of atoms to copy 2011 * @param clone If true, preform a deep copy, cloning the underlying Groups 2012 * @return An array with the first length items of ca, possibly cloning the Atoms. 2013 * @throws StructureException 2014 */ 2015 private Atom[] getAtoms(Atom[] ca, int length, boolean clone) throws StructureException{ 2016 2017 List<Atom> atoms = new ArrayList<Atom>(); 2018 for ( int i = 0 ; i < length ; i++){ 2019 2020 Atom a; 2021 if ( clone ){ 2022 Group g = (Group)ca[i].getGroup().clone(); 2023 a = g.getAtom(ca[i].getName()); 2024 } 2025 else { 2026 a = ca[i]; 2027 } 2028 atoms.add(a); 2029 } 2030 return atoms.toArray(new Atom[atoms.size()]); 2031 } 2032 2033 2034 2035 private void noBestTrace(){ 2036 2037 if(isPrint) { 2038 timeEnd = System.currentTimeMillis(); 2039 long time_q=(timeEnd-timeStart); 2040 2041 String msg = String.format("size=0 time=%d comb=%d\n", (int)(time_q), nTraces); 2042 System.out.println(msg); 2043 } 2044 } 2045 2046 2047 2048 private double zToP(double z) { 2049 int ind=(int)(z/0.1); 2050 if(ind<0) ind=0; 2051 if(ind>149) ind=149; 2052 return(tableZtoP[ind]); 2053 } 2054 /////////////////////////////////////////////////////////////////////////// 2055 private double pToZ(double p) { 2056 int ind=(int)(-Math.log10(p)*3.0); 2057 if(ind<0) ind=0; 2058 if(ind>149) ind=149; 2059 return(tablePtoZ[ind]); 2060 } 2061 /////////////////////////////////////////////////////////////////////////// 2062 private double zByZ(double z1, double z2) { 2063 double p1=zToP(z1); 2064 double p2=zToP(z2); 2065 return(pToZ(p1*p2)); 2066 } 2067 2068 protected double zStrAlign(int winSize, int nTrace, double score, int nGaps) { 2069 double z1=zScore(winSize, nTrace, score); 2070 double z2=zGaps(winSize, nTrace, nGaps); 2071 return(zByZ(z1, z2)); 2072 } 2073 2074 double zScore(int winSize, int nTrace, double score) { 2075 2076 if(winSize==8) { 2077 2078 if(nTrace<1) return(0.0); 2079 2080 double scoreAv_, scoreSd_; 2081 if(nTrace<21) { 2082 scoreAv_=scoreAv8[nTrace-1]; 2083 scoreSd_=scoreSd8[nTrace-1]; 2084 } 2085 else { 2086 scoreAv_=0.209874*nTrace+2.944714; 2087 scoreSd_=0.039487*nTrace+0.675735; 2088 } 2089 if(score>scoreAv_) return(0.0); 2090 return((scoreAv_-score)/scoreSd_); 2091 } 2092 2093 if(winSize==6) { 2094 2095 if(nTrace<1) return(0.0); 2096 2097 double scoreAv_, scoreSd_; 2098 if(nTrace<21) { 2099 scoreAv_=scoreAv6[nTrace-1]; 2100 scoreSd_=scoreSd6[nTrace-1]; 2101 } 2102 else { 2103 scoreAv_=0.198534*nTrace+2.636477; 2104 scoreSd_=0.040922*nTrace+0.715636; 2105 } 2106 if(score>scoreAv_) return(0.0); 2107 return((scoreAv_-score)/scoreSd_); 2108 } 2109 2110 return(0.0); 2111 2112 } 2113 /////////////////////////////////////////////////////////////////////////// 2114 double zGaps(int winSize, int nTrace, int nGaps) { 2115 2116 if(nTrace<2) return(0.0); 2117 double scoreAv_, scoreSd_; 2118 2119 if(winSize==8) { 2120 if(nTrace<21) { 2121 scoreAv_=gapsAv8[nTrace-1]; 2122 scoreSd_=gapsSd8[nTrace-1]; 2123 } 2124 else { 2125 scoreAv_=14.949173*nTrace-14.581193; 2126 scoreSd_=2.045067*nTrace+13.191095; 2127 } 2128 if(nGaps>scoreAv_) return(0.0); 2129 return((scoreAv_-nGaps)/scoreSd_); 2130 } 2131 2132 if(winSize==6) { 2133 if(nTrace<21) { 2134 scoreAv_=gapsAv6[nTrace-1]; 2135 scoreSd_=gapsSd6[nTrace-1]; 2136 } 2137 else { 2138 scoreAv_=13.574490*nTrace-13.977223; 2139 scoreSd_=1.719977*nTrace+19.615014; 2140 } 2141 if(nGaps>scoreAv_) return(0.0); 2142 return((scoreAv_-nGaps)/scoreSd_); 2143 } 2144 2145 return(0.0); 2146 } 2147 2148 private static final double[] scoreAv8={2.54, 2.51, 2.72, 3.01, 3.31, 3.61, 3.90, 4.19, 4.47, 4.74, 2149 4.99, 5.22, 5.46, 5.70, 5.94, 6.13, 6.36, 6.52, 6.68, 6.91}; 2150 private static final double[] scoreSd8={1.33, 0.88, 0.73, 0.71, 0.74, 0.80, 0.86, 0.92, 0.98, 1.04, 2151 1.08, 1.10, 1.15, 1.19, 1.23, 1.25, 1.32, 1.34, 1.36, 1.45}; 2152 private static final double[] gapsAv8={0.00, 11.50, 23.32, 35.95, 49.02, 62.44, 76.28, 90.26, 2153 104.86, 119.97, 134.86, 150.54, 164.86, 179.57, 194.39, 2154 209.38, 224.74, 238.96, 253.72, 270.79}; 2155 private static final double[] gapsSd8={0.00, 9.88, 14.34, 17.99, 21.10, 23.89, 26.55, 29.00, 31.11, 2156 33.10, 35.02, 36.03, 37.19, 38.82, 41.04, 43.35, 45.45, 2157 48.41, 50.87, 52.27}; 2158 private static final double[] scoreAv6={1.98, 1.97, 2.22, 2.54, 2.87, 3.18, 3.48, 3.77, 4.05, 4.31, 2159 4.57, 4.82, 5.03, 5.24, 5.43, 5.64, 5.82, 6.02, 6.21, 6.42}; 2160 private static final double[] scoreSd6={1.15, 0.73, 0.63, 0.64, 0.71, 0.80, 0.87, 0.95, 1.01, 1.07, 2161 1.13, 1.19, 1.22, 1.25, 1.28, 1.32, 1.35, 1.39, 1.45, 1.50}; 2162 private static final double[] gapsAv6={0.00, 10.12, 20.25, 31.29, 42.95, 55.20, 67.53, 80.15, 2163 93.30, 106.47, 120.52, 134.38, 148.59, 162.58, 176.64, 2164 191.23, 204.12, 218.64, 231.82, 243.43}; 2165 private static final double[] gapsSd6={0.00, 9.80, 14.44, 18.14, 21.35, 24.37, 27.00, 29.68, 32.22, 2166 34.37, 36.65, 38.63, 40.31, 42.16, 43.78, 44.98, 47.08, 2167 49.09, 50.78, 52.15}; 2168 2169 2170 private static final double[] tableZtoP={ 2171 1.0, 9.20e-01,8.41e-01,7.64e-01,6.89e-01,6.17e-01,5.49e-01,4.84e-01,4.24e-01,3.68e-01, 2172 3.17e-01,2.71e-01,2.30e-01,1.94e-01,1.62e-01,1.34e-01,1.10e-01,8.91e-02,7.19e-02,5.74e-02, 2173 4.55e-02,3.57e-02,2.78e-02,2.14e-02,1.64e-02,1.24e-02,9.32e-03,6.93e-03,5.11e-03,3.73e-03, 2174 2.70e-03,1.94e-03,1.37e-03,9.67e-04,6.74e-04,4.65e-04,3.18e-04,2.16e-04,1.45e-04,9.62e-05, 2175 6.33e-05,4.13e-05,2.67e-05,1.71e-05,1.08e-05,6.80e-06,4.22e-06,2.60e-06,1.59e-06,9.58e-07, 2176 5.73e-07,3.40e-07,1.99e-07,1.16e-07,6.66e-08,3.80e-08,2.14e-08,1.20e-08,6.63e-09,3.64e-09, 2177 1.97e-09,1.06e-09,5.65e-10,2.98e-10,1.55e-10,8.03e-11,4.11e-11,2.08e-11,1.05e-11,5.20e-12, 2178 2.56e-12,1.25e-12,6.02e-13,2.88e-13,1.36e-13,6.38e-14,2.96e-14,1.36e-14,6.19e-15,2.79e-15, 2179 1.24e-15,5.50e-16,2.40e-16,1.04e-16,4.46e-17,1.90e-17,7.97e-18,3.32e-18,1.37e-18,5.58e-19, 2180 2.26e-19,9.03e-20,3.58e-20,1.40e-20,5.46e-21,2.10e-21,7.99e-22,3.02e-22,1.13e-22,4.16e-23, 2181 1.52e-23,5.52e-24,1.98e-24,7.05e-25,2.48e-25,8.64e-26,2.98e-26,1.02e-26,3.44e-27,1.15e-27, 2182 3.82e-28,1.25e-28,4.08e-29,1.31e-29,4.18e-30,1.32e-30,4.12e-31,1.27e-31,3.90e-32,1.18e-32, 2183 3.55e-33,1.06e-33,3.11e-34,9.06e-35,2.61e-35,7.47e-36,2.11e-36,5.91e-37,1.64e-37,4.50e-38, 2184 1.22e-38,3.29e-39,8.77e-40,2.31e-40,6.05e-41,1.56e-41,4.00e-42,1.02e-42,2.55e-43,6.33e-44, 2185 1.56e-44,3.80e-45,9.16e-46,2.19e-46,5.17e-47,1.21e-47,2.81e-48,6.45e-49,1.46e-49,3.30e-50}; 2186 private static final double[] tablePtoZ={ 2187 0.00,0.73,1.24,1.64,1.99,2.30,2.58,2.83,3.07,3.29, 2188 3.50,3.70,3.89,4.07,4.25,4.42,4.58,4.74,4.89,5.04, 2189 5.19,5.33,5.46,5.60,5.73,5.86,5.99,6.11,6.23,6.35, 2190 6.47,6.58,6.70,6.81,6.92,7.02,7.13,7.24,7.34,7.44, 2191 7.54,7.64,7.74,7.84,7.93,8.03,8.12,8.21,8.30,8.40, 2192 8.49,8.57,8.66,8.75,8.84,8.92,9.01,9.09,9.17,9.25, 2193 9.34,9.42,9.50,9.58,9.66,9.73,9.81,9.89,9.97,10.04, 2194 10.12,10.19,10.27,10.34,10.41,10.49,10.56,10.63,10.70,10.77, 2195 10.84,10.91,10.98,11.05,11.12,11.19,11.26,11.32,11.39,11.46, 2196 11.52,11.59,11.66,11.72,11.79,11.85,11.91,11.98,12.04,12.10, 2197 12.17,12.23,12.29,12.35,12.42,12.48,12.54,12.60,12.66,12.72, 2198 12.78,12.84,12.90,12.96,13.02,13.07,13.13,13.19,13.25,13.31, 2199 13.36,13.42,13.48,13.53,13.59,13.65,13.70,13.76,13.81,13.87, 2200 13.92,13.98,14.03,14.09,14.14,14.19,14.25,14.30,14.35,14.41, 2201 14.46,14.51,14.57,14.62,14.67,14.72,14.77,14.83,14.88,14.93}; 2202 2203 /** copy data from this class into AFPChain container object. 2204 * 2205 * @param afpChain 2206 * @param ca1 2207 * @param ca2 2208 */ 2209 public void convertAfpChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2) { 2210 2211 afpChain.setBlockNum(1); 2212 //afpChain.setAlignScore(z); 2213 Matrix[] m ; 2214 2215 if ( r != null ) { 2216 m = new Matrix[1]; 2217 m[0] = r; 2218 } else { 2219 m = new Matrix[0]; 2220 } 2221 2222 Atom[] as ; 2223 if ( t != null) { 2224 as = new Atom[1]; 2225 as[0] = t; 2226 } else { 2227 as = new Atom[0]; 2228 } 2229 2230 afpChain.setBlockRotationMatrix(m); 2231 afpChain.setBlockShiftVector(as); 2232 2233 int nse1 = ca1.length; 2234 int nse2 = ca2.length; 2235 //System.out.println("dist1 :" + dist1.length + " " + dist2.length); 2236 2237 if ( nse1 > 0 && dist1.length > 0 ) 2238 afpChain.setDisTable1(new Matrix(dist1)); 2239 else 2240 afpChain.setDisTable1 (Matrix.identity(3, 3)); 2241 if ( nse2 > 0 && dist2.length > 0 ) 2242 afpChain.setDisTable2(new Matrix(dist2)); 2243 else 2244 afpChain.setDisTable2(Matrix.identity(3, 3)); 2245 2246 2247 char[] alnseq1 = new char[nse1+nse2+1]; 2248 char[] alnseq2 = new char[nse1+nse2+1] ; 2249 char[] alnsymb = new char[nse1+nse2+1]; 2250 2251 int[][][] optAln = new int[1][2][nAtom]; 2252 afpChain.setOptAln(optAln); 2253 2254 int pos = 0; 2255 int nrIdent = 0; 2256 int nrSim = 0; 2257 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 2258 2259 // no gap 2260 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 2261 //System.out.println("ia " + ia + " pos " + pos + " " + align_se1[ia] + " " + align_se2[ia]); 2262 optAln[0][0][pos] = align_se1[ia]; 2263 optAln[0][1][pos] = align_se2[ia]; 2264 2265 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2266 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2267 2268 alnseq1[ia] = Character.toUpperCase(l1); 2269 alnseq2[ia] = Character.toUpperCase(l2); 2270 alnsymb[ia] = ' '; 2271 if ( l1 == l2) { 2272 nrIdent++; 2273 nrSim++; 2274 alnsymb[ia] = '|'; 2275 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 2276 nrSim++; 2277 alnsymb[ia] = ':'; 2278 } 2279 2280 pos++; 2281 2282 } else { 2283 // there is a gap at this position 2284 alnsymb[ia] = ' '; 2285 if (align_se1[ia] == -1 ) { 2286 alnseq1[ia] = '-'; 2287 } else { 2288 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2289 alnseq1[ia] = Character.toUpperCase(l1); 2290 } 2291 if ( align_se2[ia] == -1 ) { 2292 alnseq2[ia] = '-'; 2293 } else { 2294 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2295 alnseq2[ia] = Character.toUpperCase(l2); 2296 } 2297 2298 } 2299 } 2300 2301 2302 afpChain.setAlnseq1(alnseq1); 2303 afpChain.setAlnseq2(alnseq2); 2304 afpChain.setAlnsymb(alnsymb); 2305 2306 2307 // CE uses the aligned pairs as reference not the whole alignment including gaps... 2308 if ( pos > 0) { 2309 afpChain.setIdentity(nrIdent*1.0/pos); 2310 afpChain.setSimilarity(nrSim*1.0/pos); 2311 } else { 2312 afpChain.setIdentity(0); 2313 afpChain.setSimilarity(0); 2314 } 2315 2316 //AFPAlignmentDisplay.getAlign( afpChain,ca1,ca2); 2317 2318 } 2319 2320 public int getnAtom() { 2321 return nAtom; 2322 } 2323 2324 2325 2326 public void setnAtom(int nAtom) { 2327 this.nAtom = nAtom; 2328 } 2329 2330 2331 2332 public int getLcmp() { 2333 return alignmentPositionOrLength; 2334 } 2335 2336 2337 2338 public void setLcmp(int lcmp) { 2339 this.alignmentPositionOrLength = lcmp; 2340 } 2341 2342 2343 2344 public int[] getAlign_se1() { 2345 return align_se1; 2346 } 2347 2348 2349 2350 public void setAlign_se1(int[] alignSe1) { 2351 align_se1 = alignSe1; 2352 } 2353 2354 2355 2356 public int[] getAlign_se2() { 2357 return align_se2; 2358 } 2359 2360 2361 2362 public void setAlign_se2(int[] alignSe2) { 2363 align_se2 = alignSe2; 2364 } 2365 2366 /** 2367 * Caution: this matrix is overwriten with very different data at several 2368 * points in the alignment algorithm. After 2369 * {@link #initSumOfDistances(int, int, int, int, Atom[], Atom[]) initSumOfDistances} 2370 * is run, this will hold the distance matrix between AFPs. 2371 * @return mat 2372 */ 2373 public double[][] getMatMatrix() { 2374 return mat; 2375 } 2376 2377 public void setMatMatrix(double[][] matrix){ 2378 mat = matrix; 2379 } 2380 2381 /** 2382 * Gets the rotation matrix from the last call to 2383 * {@link #calc_rmsd(Atom[], Atom[], int, boolean, boolean) calc_rmsd}. 2384 * @return The rotatiokn matrix 2385 */ 2386 public Matrix getRotationMatrix() { 2387 return r; 2388 } 2389 2390 /** 2391 * Gets the shift from the last call to 2392 * {@link #calc_rmsd(Atom[], Atom[], int, boolean, boolean) calc_rmsd}. 2393 * @return The shift 2394 */ 2395 public Atom getShift() { 2396 return t; 2397 } 2398 2399 public double[][] getDist1() { 2400 return dist1; 2401 } 2402 2403 public void setDist1(double[][] dist1) { 2404 this.dist1 = dist1; 2405 } 2406 2407 public double[][] getDist2() { 2408 return dist2; 2409 } 2410 2411 public void setDist2(double[][] dist2) { 2412 this.dist2 = dist2; 2413 } 2414 2415 2416}