001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Sep 25, 2009 021 * Author: Andreas Prlic 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027import org.biojava.nbio.core.alignment.matrices.ScaledSubstitutionMatrix; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.structure.*; 030import org.biojava.nbio.structure.align.model.AFP; 031import org.biojava.nbio.structure.align.model.AFPChain; 032import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 033import org.biojava.nbio.structure.geometry.Matrices; 034import org.biojava.nbio.structure.geometry.SuperPositions; 035import org.biojava.nbio.structure.jama.Matrix; 036import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 037import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 038 039import java.util.ArrayList; 040import java.util.List; 041 042import javax.vecmath.Matrix4d; 043 044 045 046/** 047 * This is based on the original Combinatorial Extension (CE) source code from 2003 or 2004 (CE version 2.3), 048 * as has been originally developed by I. Shindyalov and P.Bourne (1998). 049 * The original CE paper is available from here: <a href="http://peds.oxfordjournals.org/cgi/content/short/11/9/739">http://peds.oxfordjournals.org/cgi/content/short/11/9/739</a>. 050 * 051 * This class is a pretty much exact 1:1 port from C, where I cared about exact reproduce of the CE results 052 * and not about Java style. 053 * 054 * @author Spencer Bliven 055 * @author Andreas Prlic 056 * 057 */ 058public class CeCalculatorEnhanced { 059 060 protected static final boolean isPrint = true; 061 private static final boolean showAlignmentSteps = true; 062 private static final boolean debug = true; 063 064 int[] f1; 065 int[] f2; 066 double[][]dist1; 067 double[][]dist2; 068 protected double[][]mat; 069 protected int[] bestTrace1; 070 protected int[] bestTrace2; 071 protected int[][] bestTraces1; 072 protected int[][] bestTraces2; 073 protected int nBestTrace; 074 protected int nBestTraces; 075 double[] d_ = new double[20]; 076 protected int[] bestTracesN; 077 protected double bestTraceScore; 078 protected int nTrace; 079 protected double[] bestTracesScores; 080 protected int[] trace1; 081 protected int[] trace2; 082 083 protected static final double zThr=-0.1; 084 085 long timeStart ; 086 long timeEnd; 087 private int nAtom; 088 089 // the equivalent positions in the alignment... 090 private int[] align_se1; 091 private int[] align_se2; 092 093 094 private int alignmentPositionOrLength; 095 private int[] bestTraceLen; 096 private Matrix r; 097 private Atom t; 098 protected int nTraces; 099 100 private double z; 101 private int[] traceIndexContainer; 102 103 protected CeParameters params; 104 // SHOULD these fields be PARAMETERS? 105 106 protected static final int nIter = 1; 107 private static final boolean distAll = false; 108 109 List<MatrixListener> matrixListeners; 110 111 public static final boolean GLOBAL_ALIGN1 = false; 112 public static final boolean GLOBAL_ALIGN2 = false; 113 114 public CeCalculatorEnhanced(CeParameters params){ 115 timeStart = System.currentTimeMillis(); 116 dist1= new double[0][0]; 117 dist2= new double[0][0]; 118 this.params = params; 119 matrixListeners = new ArrayList<>(); 120 121 } 122 123 /** 124 * 125 * @param afpChain A new AFPChain, which will be filled in by this function 126 * @param ca1 127 * @param ca2 128 * @return afpChain 129 * @throws StructureException 130 */ 131 public AFPChain extractFragments(AFPChain afpChain, 132 Atom[] ca1, Atom[] ca2) throws StructureException{ 133 134 int nse1 = ca1.length; 135 int nse2 = ca2.length; 136 137 afpChain.setCa1Length(nse1); 138 afpChain.setCa2Length(nse2); 139 140 int traceMaxSize=nse1<nse2?nse1:nse2; 141 142 f1 = new int[nse1]; 143 f2 = new int[nse2]; 144 145 dist1 = initIntraDistmatrix(ca1, nse1); 146 dist2 = initIntraDistmatrix(ca2, nse2); 147 148 149 if ( debug ) 150 System.out.println("parameters: " + params); 151 152 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 153 if ( params.getSeqWeight() < 1) 154 params.setSeqWeight(2); 155 } 156 157 int winSize = params.getWinSize(); 158 159 int winSizeComb1 = (winSize-1)*(winSize-2)/2; 160 161 traceIndexContainer = new int[traceMaxSize]; 162 163 // CE: unused code. distAll is always false and both loops do the same??? 164 // CE v2.3 calls this Weight factors for trace extension 165 if(distAll ) { 166 for(int i=0; i<traceMaxSize; i++) 167 traceIndexContainer[i]=(i+1)*i*winSize*winSize/2+(i+1)*winSizeComb1; 168 } else { 169 for(int i=0; i<traceMaxSize; i++) { 170 traceIndexContainer[i]=(i+1)*i*winSize/2+(i+1)*winSizeComb1; 171 172 173 } 174 } 175 176 // verified: a[] is set correctly. 177 178 mat = initSumOfDistances(nse1, nse2, winSize, winSizeComb1, ca1, ca2); 179 180 181 182 // try { 183 // Matrix m2 = new Matrix(mat).copy(); 184 // JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 185 // JFrame frame = new JFrame(); 186 // frame.addWindowListener(new WindowAdapter(){ 187 // public void windowClosing(WindowEvent e){ 188 // JFrame f = (JFrame) e.getSource(); 189 // f.setVisible(false); 190 // f.dispose(); 191 // } 192 // }); 193 // 194 // 195 // frame.getContentPane().add(panel); 196 // 197 // frame.pack(); 198 // frame.setVisible(true); 199 // } catch (Exception e) { 200 // e.printStackTrace(); 201 // } 202 203 204 // Set the distance matrix 205 //afpChain.setDistanceMatrix(new Matrix(mat.clone())); 206 207 208 // 209 // double rmsdThr = params.getRmsdThr(); 210 // StringBuffer buf = new StringBuffer(" "); 211 // for(int i=0; i<nse2; i++) 212 // buf.append(String.format("%c", i%10==0?(i%100)/10+48:32)); 213 // buf.append("\n"); 214 // for(int i=0; i<nse1; i++) { 215 // buf.append(String.format("%c ", i%10==0?(i%100)/10+48:32)); 216 // for(int j=0; j<nse2; j++) 217 // buf.append(String.format("%c", (mat[i][j])<rmsdThr?'+':'X')); 218 // //printf("%c", ((int)*(mat[i]+j)/40)>9?'*':((int)*(mat[i]+j)/40)+48); 219 // buf.append("\n"); 220 // } 221 // buf.append("\n"); 222 // 223 // System.out.println(buf.toString()); 224 // 225 226 227 return afpChain; 228 } 229 230 /** 231 * Evaluates the distance between two atoms 232 * Several scoring functions are implemented and can be changed by calling 233 * {@link CeParameters#setScoringStrategy(Integer) setScoringStrategy()} 234 * on {@link CeParameters parameter} object this CECalculator was created with. 235 * <p> 236 * Scoring Strategies:<dl> 237 * <dt>DEFAULT_SCORING_STRATEGY</dt> 238 * <dd>Strategy of the original CE publication; CA-CA distance</dd> 239 * 240 * <dt>SIDE_CHAIN_SCORING</dt> 241 * <dd>CB-CB distance. This performs better for sheets and helices than CA.</dd> 242 * 243 * <dt>SIDE_CHAIN_ANGLE_SCORING</dt> 244 * <dd>Use the dot product (eg the cosine) of the two CA-CB vectors.</dd> 245 * 246 * <dt>CA_AND_SIDE_CHAIN_ANGLE_SCORING</dt> 247 * <dd>Equivalent to DEFAULT_SCORING_STRATEGY + SIDE_CHAIN_ANGLE_SCORING</dd> 248 * </dl> 249 * 250 * <dt>SEQUENCE_CONSERVATION</dt> 251 * <dd>A mix between the DEFAULT_SCORING_STRATEGY and a scoring function that favors the alignment of sequence conserved positions in the alignment</dd> 252 * </dl> 253 * 254 * 255 * 256 * @param ca1 The CA of the first residue 257 * @param ca2 The CA of the second residue 258 * @return The distance between the two fragments, according to the selected 259 * scoring strategy. Lower distances are better alignments. 260 * @throws StructureException 261 */ 262 private double getDistanceWithSidechain(Atom ca1, Atom ca2) throws StructureException { 263 264 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_SCORING) { 265 266 return Calc.getDistance(ca1,ca2); 267 268 } 269 270 double dist; 271 Group g1 = ca1.getGroup(); 272 Atom cb1 = null; 273 if ( g1.hasAtom(StructureTools.CB_ATOM_NAME)) { 274 cb1 = g1.getAtom(StructureTools.CB_ATOM_NAME); 275 } 276 // 277 Group g2 = ca2.getGroup(); 278 Atom cb2 = null; 279 if ( g2.hasAtom(StructureTools.CB_ATOM_NAME)) { 280 cb2 = g2.getAtom(StructureTools.CB_ATOM_NAME); 281 } 282 283 284 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_SCORING) { 285 286 287 // here we are using side chain orientation for scoring... 288 289 290 // score type 1 consider side chain distances 291 if ( cb1 != null && cb2 != null) { 292 // CB distance 293 dist = Calc.getDistance(cb1,cb2); 294 //dist = dist / 2.; 295 } else { 296 dist = Calc.getDistance(ca1,ca2); 297 } 298 299 return dist; 300 } 301 302 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_ANGLE_SCORING){ 303 304 // score type 2 add angle info 305 306 307 if ( cb1 != null && cb2 != null) { 308 // If the CA were overlaid, what is the distance between the CB? 309 // Recall c^2 = a^2 + b^2 -2ab*cos(theta), so this is a function of angle 310 Atom c1 = Calc.subtract(cb1, ca1); 311 Atom c2 = Calc.subtract(cb2, ca2); 312 Atom newA = Calc.subtract(c2, c1); 313 dist = Calc.amount(newA); 314 } else { 315 //dist += Calc.getDistance(ca1,ca2); 316 dist = 0; 317 } 318 319 return dist; 320 321 } 322 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_AND_SIDE_CHAIN_ANGLE_SCORING){ 323 324 // score type 3 325 // CA distance + cos(angle) 326 dist = 0; 327 if ( cb1 != null && cb2 != null) { 328 Atom cacb1 = Calc.subtract(cb1, ca1); 329 Atom cacb2 = Calc.subtract(cb2, ca2); 330 Atom newA = Calc.subtract(cacb2, cacb1); 331 //System.out.format("CACB 1: %s\nCACB 2: %s\ndiff: %s\nd: %f\n",cacb1.toString(),cacb2.toString(),newA.toString(),Calc.amount(newA)); 332 dist += Calc.amount(newA); 333 } 334 dist += Calc.getDistance(ca1,ca2); 335 336 return dist; 337 338 } else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 339 if ( cb1 != null && cb2 != null) { 340 // CB distance 341 dist = Calc.getDistance(cb1,cb2); 342 //dist = dist / 2.; 343 } else { 344 dist = Calc.getDistance(ca1,ca2); 345 } 346 return dist; 347 348 349 } 350 else { 351 // unsupported scoring scheme 352 return Calc.getDistance(ca1,ca2); 353 } 354 } 355 356 /** build up intramolecular distance matrix dist1 & dist2 357 * 358 * @param ca 359 * @param nse 360 * @return 361 * @throws StructureException 362 */ 363 private double[][] initIntraDistmatrix(Atom[] ca, int nse) throws StructureException 364 { 365 366 367 double[][] intraDist = new double[nse][nse]; 368 369 // 370 for(int ise1=0; ise1<nse; ise1++) { 371 372 for(int ise2=0; ise2<nse; ise2++) { 373 intraDist[ise1][ise2] = getDistanceWithSidechain(ca[ise1], ca[ise2]); 374 375 } 376 } 377 return intraDist; 378 } 379 380 381 public double[][] initSumOfDistances(int nse1, int nse2, int winSize, int winSizeComb1, Atom[] ca1, Atom[] ca2) { 382 383 double d; 384 385 double[][] mat = new double[nse1][nse2]; 386 387 // init the initial mat[] array. 388 // at this stage mat contains the sum of the distances of fragments of the matrices dist1, dist 389 for(int ise1=0; ise1<nse1; ise1++) { 390 for(int ise2=0; ise2<nse2; ise2++) { 391 392 mat[ise1][ise2]=-1.0; 393 394 if(ise1>nse1-winSize || ise2>nse2-winSize) continue; 395 396 d=0.0; 397 // this sums up over the distances of the fragments 398 for(int is1=0; is1<winSize-2; is1++) 399 for(int is2=is1+2; is2<winSize; is2++) { 400 //System.out.println("pos1 :" + (ise1+is1) + " " + (ise1+is2) + " " + (ise2+is1) + " " + (ise2+is2)); 401 // is this abs or floor? check! 402 d+=Math.abs(dist1[ise1+is1][ise1+is2]-dist2[ise2+is1][ise2+is2]); 403 } 404 mat[ise1][ise2]=d/winSizeComb1; 405 406 //System.out.println("mat ["+ise1+"]["+ise2+"]="+mat[ise1][ise2]); 407 } 408 409 } 410 411 // verified: mat[][] probably ok. 412 413 return mat; 414 } 415 416 417 418 419 420 421 @SuppressWarnings("unused") 422 public void traceFragmentMatrix( AFPChain afpChain, 423 Atom[] ca1, Atom[] ca2) { 424 425 double rmsdThr = params.getRmsdThr(); 426 427 428 double oldBestTraceScore=10000.0; 429 bestTraceScore = 100.0; 430 nBestTrace=0; 431 int nBestTrace0 = 0; 432 int winSize = params.getWinSize(); 433 int winSizeComb1=(winSize-1)*(winSize-2)/2; 434 boolean distAll = false; 435 436 int winSizeComb2=distAll?winSize*winSize:winSize; 437 double rmsdThrJoin = params.getRmsdThrJoin(); 438 439 double z0; 440 441 //double bestTraceZScore=-1.0; 442 443 int nse1 = ca1.length; 444 int nse2 = ca2.length; 445 446 //System.out.println("nse1 :" +nse1 + " nse2: " + nse2); 447 448 int traceMaxSize=nse1<nse2?nse1:nse2; 449 450 bestTrace1 = new int [traceMaxSize]; 451 bestTrace2 = new int [traceMaxSize]; 452 trace1 = new int [traceMaxSize]; 453 trace2 = new int [traceMaxSize]; 454 455 int[] traceIndex = new int [traceMaxSize]; 456 int[] traceIterLevel = new int [traceMaxSize]; 457 458 int ise11; 459 int ise12; 460 int ise21; 461 int ise22; 462 463 int ise1; 464 int ise2; 465 466 int gapMax=params.getMaxGapSize(); 467 468 int iterDepth; 469 if ( gapMax > 0){ 470 iterDepth =gapMax*2+1; 471 } else { 472 iterDepth = traceMaxSize; 473 } 474 double[][] traceScore = new double[traceMaxSize][iterDepth]; 475 476 nTraces =0; 477 long tracesLimit=(long)5e7; 478 double score =-1; 479 double score0 = -1; 480 double score1 = -1 ; 481 double score2 = -1; 482 483 int mse1; 484 int mse2; 485 int jgap; 486 int jdir; 487 int jse1=0; 488 int jse2=0; 489 490 int bestTracesMax=30; 491 bestTraces1 = new int[bestTracesMax][traceMaxSize]; 492 bestTraces2 = new int[bestTracesMax][ traceMaxSize]; 493 bestTracesN=new int [bestTracesMax]; 494 bestTracesScores = new double [bestTracesMax]; 495 for(int it=0; it<bestTracesMax; it++) { 496 bestTracesN[it]=0; 497 bestTracesScores[it]=100; 498 } 499 500 nBestTraces=0; 501 int newBestTrace=0; 502 503 double traceTotalScore=0; 504 double traceScoreMax =0; 505 double userRMSDMax = params.getMaxOptRMSD(); 506 int kse1; 507 int kse2; 508 509 iterLoop: 510 for(int iter=0; iter<nIter; iter++) { 511 512 if(iter>2) { 513 if(oldBestTraceScore<=bestTraceScore) break; 514 } 515 oldBestTraceScore=bestTraceScore; 516 517 if(iter==1) { 518 z0=zStrAlign(winSize, nBestTrace, bestTraceScore, 519 bestTrace1[nBestTrace]+winSize-bestTrace1[0]+ 520 bestTrace2[nBestTrace]+winSize-bestTrace2[0]- 521 nBestTrace*2*winSize); 522 if(z0<zThr) break; 523 nBestTrace0=nBestTrace; 524 nBestTrace=0; 525 bestTraceScore=100.0; 526 527 nTraces=0; 528 } 529 530 531 if(iter==0) { 532 ise11=0; ise12=nse1; 533 ise21=0; ise22=nse2; 534 535 } 536 else { 537 if(iter==1) { 538 ise11=bestTrace1[0]; ise12=bestTrace1[0]+1; 539 ise21=bestTrace2[0]; ise22=bestTrace2[0]+1; 540 } 541 else { 542 ise11=bestTrace1[0]-1; ise12=bestTrace1[0]+2; 543 ise21=bestTrace2[0]-1; ise22=bestTrace2[0]+2; 544 } 545 if(ise11<0) ise11=0; 546 if(ise12>nse1) ise12=nse1; 547 if(ise21<0) ise21=0; 548 if(ise22>nse2) ise22=nse2; 549 } 550 551 //System.out.println("ise1Loop: " + ise11 + " " + ise12 + " " + ise21 + " " + ise22); 552 ise1Loop: 553 for(int ise1_=ise11; ise1_<ise12; ise1_++) { 554 ise2Loop: 555 for(int ise2_=ise21; ise2_<ise22; ise2_++) { 556 557 ise1=ise1_; 558 ise2=ise2_; 559 if(iter>1 && ise1==ise11+1 && ise2==ise21+1) continue ise2Loop; 560 561 //if(ise2==ise21) System.out.println(String.format("(%d, %d)",ise1, nTraces)); 562 563 564 if(iter==0 && (ise1>nse1-winSize*(nBestTrace-1) || 565 ise2>nse2-winSize*(nBestTrace-1))) continue ise2Loop; 566 567 if(mat[ise1][ise2]<0.0) continue ise2Loop; 568 if(mat[ise1][ise2]>rmsdThr) continue ise2Loop; 569 if (mat[ise1][ise2]>userRMSDMax) continue ise2Loop; 570 nTrace=0; 571 trace1[nTrace]=ise1; 572 trace2[nTrace]=ise2; 573 traceIndex[nTrace]=0; 574 traceIterLevel[nTrace]=0; 575 576 score0=mat[ise1][ise2]; 577 578 579 nTrace++; 580 boolean isTraceUp=true; 581 int traceIndex_=0; 582 583 traceLoop: 584 while(nTrace>0) { 585 586 kse1=trace1[nTrace-1]+winSize; 587 kse2=trace2[nTrace-1]+winSize; 588 589 //System.out.println("isTraceUp " + isTraceUp + " " + nTrace + " " + kse1 + " " + kse2); 590 591 while(true) { 592 if(kse1>nse1-winSize-1) break; 593 if(kse2>nse2-winSize-1) break; 594 if(mat[kse1][kse2]>=0.0) break; 595 kse1++; 596 kse2++; 597 } 598 599 600 traceIndex_=-1; 601 602 if(isTraceUp) { 603 604 int nBestExtTrace=nTrace; 605 double bestExtScore=100.0; 606 607 608 // extension of the alignment path 609 // condition 4, 5 610 itLoop: 611 for(int it=0; it<iterDepth; it++) { 612 613 jgap=(it+1)/2; 614 jdir=(it+1)%2; 615 616 if(jdir==0) { 617 mse1=kse1+jgap; 618 mse2=kse2; 619 } 620 else { 621 mse1=kse1; 622 mse2=kse2+jgap; 623 } 624 625 if(mse1>nse1-winSize-1) continue itLoop; 626 if(mse2>nse2-winSize-1) continue itLoop; 627 628 if(mat[mse1][mse2]<0.0) continue itLoop; 629 if(mat[mse1][mse2]>rmsdThr) continue itLoop; 630 if(mat[mse1][mse2]>userRMSDMax) continue itLoop; 631 632 nTraces++; 633 if(nTraces>tracesLimit) { 634 635 return; 636 } 637 638 score=0.0; 639 640 // if(!distAll) { 641 //System.out.println("getting score " + mse1 + " " + mse2 + " " + winSize + " " + jgap + " " + jdir + " " + it + " " + kse1 + " " + kse2); 642 score = getScoreFromDistanceMatrices(mse1,mse2,winSize); 643 //System.out.println("got score: " + score); 644 score1=score/(nTrace*winSize); 645 646 // } else { 647 // // all dist 648 // for(int itrace=0; itrace<nTrace; itrace++) { 649 // for(int is1=0; is1<winSize; is1++) 650 // for(int is2=0; is2<winSize; is2++) 651 // score+=Math.abs(dist1[trace1[itrace]+is1][mse1+is2]- 652 // dist2[trace2[itrace]+is1][mse2+is2]); 653 // } 654 // score1=score/(nTrace*winSize*winSize); 655 // } 656 657 658 //System.out.println("up: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + winSize + " " + traceIndex_ + " " + it + " "); 659 if(score1>rmsdThrJoin) 660 continue itLoop; 661 if(score1>userRMSDMax) 662 continue itLoop; 663 score2=score1; 664 665 // this just got checked, no need to check again.. 666 //if(score2>rmsdThrJoin) 667 // continue itLoop; 668 669 if(nTrace>nBestExtTrace || (nTrace==nBestExtTrace && 670 score2<bestExtScore)) { 671 //System.out.println("setting traceindex to " + it + " " + score2); 672 bestExtScore=score2; 673 nBestExtTrace=nTrace; 674 traceIndex_=it; 675 traceScore[nTrace-1][traceIndex_]=score1; 676 } 677 678 } 679 } 680 681 if(traceIndex_!=-1) { 682 jgap=(traceIndex_+1)/2; 683 jdir=(traceIndex_+1)%2; 684 if(jdir==0) { 685 jse1=kse1+jgap; 686 jse2=kse2; 687 } 688 else { 689 jse1=kse1; 690 jse2=kse2+jgap; 691 } 692 693 if(iter==0){ 694 695 score1=(traceScore[nTrace-1][traceIndex_]*winSizeComb2*nTrace+ 696 mat[jse1][jse2]*winSizeComb1)/(winSizeComb2*nTrace+ 697 winSizeComb1); 698 699 score2 = getScore2(jse1, jse2, traceScore, traceIndex_, traceIndex, winSizeComb1, winSizeComb2, score0, score1); 700 701 if(score2>rmsdThrJoin) 702 traceIndex_=-1; 703 else if ( score2 > userRMSDMax) 704 traceIndex_=-1; 705 else { 706 traceScore[nTrace-1][traceIndex_]=score2; 707 708 traceTotalScore=score2; 709 } 710 711 } 712 else { 713 if(traceScoreMax>rmsdThrJoin && nBestTrace>=nBestTrace0) 714 traceIndex_=-1; 715 traceTotalScore=traceScoreMax; 716 } 717 } 718 719 //System.out.println("middle: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + score2 + " " + traceIndex_); 720 721 if(traceIndex_==-1) { 722 //System.out.println("continue traceLoop " + nTrace); 723 //if(iterLevel==1) break; 724 nTrace--; 725 isTraceUp=false; 726 continue traceLoop; 727 } 728 else { 729 traceIterLevel[nTrace-1]++; 730 trace1[nTrace]=jse1; 731 trace2[nTrace]=jse2; 732 traceIndex[nTrace]=traceIndex_; 733 traceIterLevel[nTrace]=0; 734 nTrace++; 735 isTraceUp=true; 736 737 if(nTrace>nBestTrace || 738 (nTrace==nBestTrace && 739 bestTraceScore>traceTotalScore)) { 740 741 for(int itrace=0; itrace<nTrace; itrace++) { 742 bestTrace1[itrace]=trace1[itrace]; 743 bestTrace2[itrace]=trace2[itrace]; 744 } 745 bestTraceScore=traceTotalScore; 746 nBestTrace=nTrace; 747 } 748 749 if(iter==0) { 750 //System.out.println("doing iter0 " + newBestTrace + " " + traceTotalScore + " " + bestTracesMax); 751 newBestTrace = doIter0(newBestTrace,traceTotalScore, bestTracesMax); 752 753 754 } 755 } 756 } 757 } 758 } 759 760 if ( isPrint) { 761 System.out.println("fragment length: " + params.getWinSize()); 762 System.out.println("ntraces : " + nTraces ); 763 } 764 765 766 767 } 768 769 // try { 770 // Matrix m2 = new Matrix(traceScore).copy(); 771 // JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 772 // JFrame frame = new JFrame(); 773 // frame.addWindowListener(new WindowAdapter(){ 774 // public void windowClosing(WindowEvent e){ 775 // JFrame f = (JFrame) e.getSource(); 776 // f.setVisible(false); 777 // f.dispose(); 778 // } 779 // }); 780 // 781 // 782 // frame.getContentPane().add(panel); 783 // 784 // frame.pack(); 785 // frame.setVisible(true); 786 // } catch (Exception e) { 787 // e.printStackTrace(); 788 // } 789 790 791 if ( params.isShowAFPRanges()){ 792 System.out.println("fragment length: " + params.getWinSize()); 793 System.out.println("ntraces : " + nTraces ); 794 795 } 796 797 } 798 799 protected double getScore2(int jse1, int jse2, double[][] traceScore, int traceIndex_,int[] traceIndex,int winSizeComb1, int winSizeComb2, double score0, double score1 ) { 800 801 802 803 /*double score2= 804 ((nTrace>1?traceScore[nTrace-2][traceIndex[nTrace-1]]:score0) 805 *a[nTrace-1]+score1*(a[nTrace]-a[nTrace-1]))/a[nTrace]; 806 */ 807 double val = 0; 808 if ( nTrace>1) 809 val =traceScore[nTrace-2][traceIndex[nTrace-1]]; 810 else 811 val = score0; 812 813 double score2 = (val * traceIndexContainer[nTrace-1]+score1*(traceIndexContainer[nTrace]-traceIndexContainer[nTrace-1]))/traceIndexContainer[nTrace]; 814 815 //System.out.println("check: score0 " + score0 + " score 1 " + score1 + " sc2: " + score2 + " val: " + val + " nTrace:" + nTrace+ " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace] ); 816 817 return score2; 818 819 820 } 821 822 protected int doIter0(int newBestTrace, double traceTotalScore, double bestTracesMax) { 823 824 825 // only do the whole method if this criteria is fulfilled... 826 if(nTrace>bestTracesN[newBestTrace] || 827 (nTrace==bestTracesN[newBestTrace] && 828 bestTracesScores[newBestTrace]>traceTotalScore)) { 829 830 831 for(int itrace=0; itrace<nTrace; itrace++) { 832 bestTraces1[newBestTrace][itrace]=trace1[itrace]; 833 bestTraces2[newBestTrace][itrace]=trace2[itrace]; 834 bestTracesN[newBestTrace]=nTrace; 835 bestTracesScores[newBestTrace]=traceTotalScore; 836 //System.out.println("bestTracesScrores ["+newBestTrace+"]=" +traceTotalScore); 837 } 838 839 if(nTrace>nBestTrace) nBestTrace=nTrace; 840 841 if(nBestTraces<bestTracesMax) { 842 nBestTraces++; 843 newBestTrace++; 844 } 845 846 if(nBestTraces==bestTracesMax) { 847 //System.out.println("nBestTraces == bestTracesmax " + nBestTraces); 848 newBestTrace=0; 849 double scoreTmp=bestTracesScores[0]; 850 int nTraceTmp=bestTracesN[0]; 851 for(int ir=1; ir<nBestTraces; ir++) { 852 if(bestTracesN[ir]<nTraceTmp || 853 (bestTracesN[ir]==nTraceTmp && 854 scoreTmp<bestTracesScores[ir])) { 855 nTraceTmp=bestTracesN[ir]; 856 scoreTmp=bestTracesScores[ir]; 857 newBestTrace=ir; 858 //System.out.println("setting new bestTracesScore to " + ir + " " + scoreTmp); 859 } 860 } 861 } 862 } 863 864 //System.out.println("iter0 : " + newBestTrace + " " + bestTracesN[newBestTrace] + " " + traceTotalScore + " " + nTrace); 865 866 867 868 /* 869z=zStrAlign(winSize, nTrace, traceTotalScore, 870trace1[nTrace-1]-trace1[0]+trace2[nTrace-1]-trace2[0]- 8712*(nTrace-1)*winSize); 872if(z>bestTraceZScore) { 873for(int itrace=0; itrace<nTrace; itrace++) { 874bestTrace1[itrace]=trace1[itrace]; 875bestTrace2[itrace]=trace2[itrace]; 876} 877bestTraceZScore=z; 878bestTraceScore=*(traceScore[nTrace-2]+traceIndex_); 879nBestTrace=nTrace; 880} 881 */ 882 return newBestTrace; 883 884 } 885 886 887 protected double getScoreFromDistanceMatrices(int mse1, int mse2,int winSize) { 888 889 double score = 0; 890 // (winSize) "best" dist 891 892 // reduce sign. values to C code.. 6 digits.. 893 894 for(int itrace=0; itrace<nTrace; itrace++) { 895 score+= Math.abs(dist1[trace1[itrace]][mse1]- 896 dist2[trace2[itrace]][mse2]); 897 898 score+= Math.abs(dist1[trace1[itrace]+winSize-1] 899 [mse1+winSize-1]- 900 dist2[trace2[itrace]+winSize-1][mse2+winSize-1]); 901 902 for(int id=1; id<winSize-1; id++) 903 score+= Math.abs(dist1[trace1[itrace]+id][mse1+winSize-1-id]- 904 dist2[trace2[itrace]+id][mse2+winSize-1-id]); 905 906 } 907 908 return score; 909 } 910 911 public void nextStep( AFPChain afpChain, 912 Atom[] ca1, Atom[] ca2) throws StructureException{ 913 914 915 if(nBestTrace>0) { 916 checkBestTraces(afpChain,ca1,ca2); 917 } else { 918 noBestTrace(); 919 } 920 921 convertAfpChain(afpChain, ca1, ca2); 922 AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2); 923 } 924 925 926 927 // this part is modified from the original CeCalculator 928 @SuppressWarnings("unused") 929 private void checkBestTraces( AFPChain afpChain, 930 Atom[] ca1, Atom[] ca2) throws StructureException{ 931 932 z=0.0; 933 934 935 int nGaps; 936 int winSize = params.getWinSize(); 937 int nse1 = ca1.length; 938 int nse2 = ca2.length; 939 int traceMaxSize=nse1<nse2?nse1:nse2; 940 int idir; 941 942 943 align_se1=new int [nse1+nse2]; 944 align_se2=new int [nse1+nse2]; 945 alignmentPositionOrLength = 0; 946 947 // we now support alignment using any particular atoms.. 948 949 Atom[] strBuf1 = new Atom[traceMaxSize]; 950 Atom[] strBuf2 = new Atom[traceMaxSize]; 951 952 double rmsdNew; 953 954 955 956 // removing some loops that are run in orig CE 957 // and which did not do anything 958 if ( debug ){ 959 checkPrintRmsdNew(traceMaxSize, winSize, ca1, ca2); 960 } 961 962 double rmsd=100.0; 963 964 int iBestTrace=0; 965 966 for(int ir=0; ir<nBestTraces; ir++) { 967 if(bestTracesN[ir]!=nBestTrace) continue; 968 969 rmsdNew = getRMSDForBestTrace(ir, strBuf1, strBuf2, bestTracesN,bestTraces1, bestTrace2,winSize,ca1,ca2); 970 if ( isPrint) 971 System.out.println(String.format("%d %d %d %.2f", ir, bestTracesN[ir], nBestTrace, rmsdNew)); 972 973 if(rmsd>rmsdNew) { 974 iBestTrace=ir; 975 rmsd=rmsdNew; 976 //System.out.println(" iBestTrace:" + iBestTrace + " new rmsd = " + rmsd); 977 } 978 } 979 for(int it=0; it<bestTracesN[iBestTrace]; it++) { 980 bestTrace1[it]=bestTraces1[iBestTrace][it]; 981 bestTrace2[it]=bestTraces2[iBestTrace][it]; 982 } 983 984 //System.out.println("iBestTrace: "+iBestTrace+" = bestTracesScores " + bestTracesScores[iBestTrace]); 985 986 nBestTrace=bestTracesN[iBestTrace]; 987 988 bestTraceScore=bestTracesScores[iBestTrace]; 989 990 991 //printf("\nOptimizing gaps...\n"); 992 993 int[] traceLen=new int [traceMaxSize]; 994 bestTraceLen=new int [traceMaxSize]; 995 996 997 int strLen=0; 998 999 int jt; 1000 strLen=0; 1001 nGaps=0; 1002 nTrace=nBestTrace; 1003 1004 for(jt=0; jt<nBestTrace; jt++) { 1005 trace1[jt]=bestTrace1[jt]; 1006 trace2[jt]=bestTrace2[jt]; 1007 traceLen[jt]=winSize; 1008 1009 if(jt<nBestTrace-1) { 1010 nGaps+=bestTrace1[jt+1]-bestTrace1[jt]-winSize+ 1011 bestTrace2[jt+1]-bestTrace2[jt]-winSize; 1012 } 1013 } 1014 nBestTrace=0; 1015 for(int it=0; it<nTrace; ) { 1016 int cSize=traceLen[it]; 1017 for(jt=it+1; jt<nTrace; jt++) { 1018 if(trace1[jt]-trace1[jt-1]-traceLen[jt-1]!=0 || 1019 trace2[jt]-trace2[jt-1]-traceLen[jt-1]!=0) break; 1020 cSize+=traceLen[jt]; 1021 } 1022 bestTrace1[nBestTrace]=trace1[it]; 1023 bestTrace2[nBestTrace]=trace2[it]; 1024 bestTraceLen[nBestTrace]=cSize; 1025 nBestTrace++; 1026 strLen+=cSize; 1027 it=jt; 1028 } 1029 1030 1031 int is=0; 1032 for(jt=0; jt<nBestTrace; jt++) { 1033 for(int i=0; i<bestTraceLen[jt]; i++) { 1034 setStrBuf(strBuf1,is+i,ca1,bestTrace1[jt]+i ); 1035 setStrBuf(strBuf2,is+i,ca2,bestTrace2[jt]+i); 1036 } 1037 is+=bestTraceLen[jt]; 1038 } 1039 //sup_str(strBuf1, strBuf2, strLen, d_); 1040 1041 rmsd=calc_rmsd(strBuf1, strBuf2, strLen,true,showAlignmentSteps); 1042 1043 if ( isPrint) 1044 System.out.println("got first rmsd: " + rmsd); 1045 boolean isCopied=false; 1046 1047 outer_loop: 1048 for(int it=1; it<nBestTrace; it++) { 1049 1050 /* not needed... 1051 int igap; 1052 if(bestTrace1[it]-bestTrace1[it-1]-bestTraceLen[it-1]>0) igap=0; 1053 if(bestTrace2[it]-bestTrace2[it-1]-bestTraceLen[it-1]>0) igap=1; 1054 */ 1055 1056 1057 boolean wasBest=false; 1058 main_loop: 1059 for(idir=-1; idir<=1; idir+=2) { 1060 if(wasBest) break; 1061 1062 inner_loop: 1063 for(int idep=1; idep<=winSize/2; idep++) { 1064 1065 if(!isCopied) 1066 for(jt=0; jt<nBestTrace; jt++) { 1067 trace1[jt]=bestTrace1[jt]; 1068 trace2[jt]=bestTrace2[jt]; 1069 traceLen[jt]=bestTraceLen[jt]; 1070 } 1071 isCopied=false; 1072 1073 traceLen[it-1]+=idir; 1074 traceLen[it]-=idir; 1075 trace1[it]+=idir; 1076 trace2[it]+=idir; 1077 1078 is=0; 1079 for(jt=0; jt<nBestTrace; jt++) { 1080 for(int i=0; i<traceLen[jt]; i++) { 1081 if(ca1[trace1[jt]+i].getX()>1e10 || ca2[trace2[jt]+i].getX()>1e10) 1082 continue main_loop; 1083 strBuf1[is+i]=ca1[trace1[jt]+i]; 1084 strBuf2[is+i]=ca2[trace2[jt]+i]; 1085 } 1086 is+=traceLen[jt]; 1087 } 1088 //sup_str(strBuf1, strBuf2, strLen, d_); 1089 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen, true, false); 1090 //System.out.println(String.format("step %d %d %d %.2f old: %.2f", it, idir, idep, rmsdNew, rmsd)); 1091 if(rmsdNew<rmsd) { 1092 1093 for(jt=0; jt<nBestTrace; jt++) { 1094 bestTrace1[jt] = trace1[jt]; 1095 bestTrace2[jt] = trace2[jt]; 1096 bestTraceLen[jt]= traceLen[jt]; 1097 } 1098 isCopied=true; 1099 wasBest=true; 1100 rmsd=rmsdNew; 1101 continue inner_loop; 1102 } 1103 // AP 1104 //bad_ca: break; 1105 continue main_loop; 1106 } 1107 } 1108 } 1109 //if ( showAlignmentSteps) 1110 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen,true, showAlignmentSteps); 1111 if ( isPrint) 1112 System.out.println("rmsdNew: " + rmsdNew + " rmsd " + rmsd); 1113 afpChain.setTotalRmsdIni(rmsdNew); 1114 afpChain.setTotalLenIni(strBuf1.length); 1115 1116 1117 nAtom = strLen; 1118 1119 System.out.println("zStrAlign: " + winSize + " strLen " + strLen + " s/w " + (strLen/winSize) + " " + bestTraceScore + " " + nGaps); 1120 z=zStrAlign(winSize, strLen/winSize, bestTraceScore, nGaps); 1121 1122 if(params.isShowAFPRanges()) { 1123 System.out.println("win size: " + winSize + " strLen/winSize: " + strLen/winSize + " best trace score: " + String.format("%.2f",bestTraceScore) + " nr gaps: " + nGaps + " nr residues: " + nAtom); 1124 1125 System.out.println(String.format("size=%d rmsd=%.2f z=%.1f gaps=%d(%.1f%%) comb=%d", 1126 nAtom, rmsd, z, nGaps, nGaps*100.0/nAtom, 1127 nTraces)); 1128 1129 System.out.println("Best Trace, before optimization"); 1130 for(int k=0; k<nBestTrace; k++) 1131 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1132 bestTraceLen[k])); 1133 1134 } 1135 1136 // start to convert CE internal datastructure to generic AFPChain one... 1137 List<AFP> afpSet = new ArrayList<>(); 1138 for (int afp=0;afp<nBestTrace;afp++){ 1139 // fill in data from nBestTrace into AFP 1140 1141 AFP afpI = new AFP(); 1142 1143 afpI.setFragLen(bestTraceLen[afp]); 1144 afpI.setP1(bestTrace1[afp]+1); 1145 afpI.setP2(bestTrace2[afp]+1); 1146 1147 afpSet.add( afpI); 1148 } 1149 1150 afpChain.setAfpSet(afpSet); 1151 1152 1153 //System.out.println("z:"+z + " zThr" + zThr+ " bestTraceScore " + bestTraceScore + " " + nGaps ); 1154 if(z>=zThr) { 1155 nGaps = optimizeSuperposition(afpChain,nse1, nse2, strLen, rmsd, ca1, ca2,nGaps,strBuf1,strBuf2); 1156 // if(isPrint) { 1157 // /* 1158 // FILE *f=fopen("homologies", "a"); 1159 // fprintf(f, "%s(%d) %s(%d) %3d %4.1f %4.1f %d(%d) ", 1160 // name1, nse1, name2, nse2, nAtom, rmsd, z, 1161 // nGaps, nGaps*100/nAtom); 1162 // for(int k=0; k<nBestTrace; k++) 1163 // fprintf(f, "(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1164 // bestTraceLen[k]); 1165 // fprintf(f, "\n"); 1166 // fclose(f); 1167 // */ 1168 // } 1169 } 1170 else { 1171 int lali_x_ = 0; 1172 for(int k=0; k<nBestTrace; k++) { 1173 for(int l=0; l<bestTraceLen[k]; l++) { 1174 align_se1[alignmentPositionOrLength+l]=bestTrace1[k]+l; 1175 align_se2[alignmentPositionOrLength+l]=bestTrace2[k]+l; 1176 } 1177 lali_x_+=bestTraceLen[k]; 1178 if(k<nBestTrace-1) { 1179 if(bestTrace1[k]+bestTraceLen[k]!=bestTrace1[k+1]) 1180 for(int l=bestTrace1[k]+bestTraceLen[k]; l<bestTrace1[k+1]; l++) { 1181 align_se1[alignmentPositionOrLength]=l; 1182 align_se2[alignmentPositionOrLength]=-1; 1183 alignmentPositionOrLength++; 1184 } 1185 if(bestTrace2[k]+bestTraceLen[k]!=bestTrace2[k+1]) 1186 for(int l=bestTrace2[k]+bestTraceLen[k]; l<bestTrace2[k+1]; l++) { 1187 align_se1[alignmentPositionOrLength]=-1; 1188 align_se2[alignmentPositionOrLength]=l; 1189 alignmentPositionOrLength++; 1190 } 1191 } 1192 } 1193 nAtom=lali_x_; 1194 } 1195 1196 timeEnd = System.currentTimeMillis(); 1197 long time_q=(timeEnd-timeStart); 1198 1199 double gapsP = ( nGaps*100.0/nAtom) ; 1200 if(isPrint) { 1201 String msg = String.format("Alignment length = %d Rmsd = %.2fA Z-Score = %.1f Gaps = %d(%.1f%%)",nAtom,rmsd,z,nGaps, gapsP); 1202 System.out.println(msg + " CPU = " + time_q); 1203 } 1204 1205 // if ( params.isShowAFPRanges()){ 1206 1207 // this is actually the final alignment... 1208 System.out.println("Best Trace: (index1,index2,len)"); 1209 for(int k=0; k<nBestTrace; k++) 1210 System.out.println( 1211 String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, bestTraceLen[k])); 1212 1213 1214 1215 // } 1216 1217 afpChain.setCalculationTime(time_q); 1218 afpChain.setGapLen(nGaps); 1219 1220 int[] optLen = new int[]{nAtom}; 1221 afpChain.setOptLen(optLen); 1222 afpChain.setOptLength(nAtom); 1223 afpChain.setAlnLength(alignmentPositionOrLength); 1224 1225 afpChain.setProbability(z); 1226 1227 1228 } 1229 1230 /** set the Atoms for a particular residue position. 1231 * Requires that atom.getParent returns the correct group! 1232 * take care during cloning of atoms. Best to use StructureTools.cloneCaAtoms(); 1233 * 1234 * @param strBuf 1235 * @param i 1236 * @param ca 1237 * @param j 1238 */ 1239 private void setStrBuf(Atom[] strBuf, int i, Atom[] ca, int j) { 1240 // TODO Auto-generated method stub 1241 //TODO 1242 Group parent = ca[j].getGroup(); 1243 int pos = 0; 1244 String atomName = ca[j].getName(); 1245 1246 Atom a = null; 1247 1248 a= parent.getAtom(atomName); 1249 if ( a != null){ 1250 strBuf[i]=a; 1251 } 1252 else { 1253 // probably a GLY and no CB was found... 1254 //e.printStackTrace(); 1255 } 1256 strBuf[i+pos] = a; 1257 pos++; 1258 1259 1260 1261 } 1262 1263 // TODO: consider all requested Atoms? 1264 private double getRMSDForBestTrace(int ir, Atom[] strBuf1, Atom[] strBuf2, 1265 int[] bestTracesN2, int[][] bestTraces12, int[] bestTrace22, 1266 int winSize,Atom[] ca1, Atom[] ca2 ) throws StructureException { 1267 int is=0; 1268 for(int jt=0; jt<bestTracesN[ir]; jt++) { 1269 for(int i=0; i<winSize; i++) { 1270 1271 setStrBuf(strBuf1, is+i, ca1, bestTraces1[ir][jt]+i); 1272 setStrBuf(strBuf2, is+i, ca2, bestTraces2[ir][jt]+i); 1273 } 1274 is+=winSize; 1275 } 1276 //sup_str(strBuf1, strBuf2, bestTracesN[ir]*winSize, d_); 1277 double rmsdNew=calc_rmsd(strBuf1, strBuf2, bestTracesN[ir]*winSize, true, false); 1278 return rmsdNew; 1279 1280 } 1281 1282 1283 1284 1285 1286 1287 /** calc initial RMSD for bestTrace1 in debug only 1288 * 1289 */ 1290 private void checkPrintRmsdNew(int traceMaxSize, int winSize, Atom[] ca1, Atom[] ca2) throws StructureException{ 1291 1292 int is = 0; 1293 // calc initial RMSD for bestTrace1 1294 Atom[] strBuf1 = new Atom[traceMaxSize]; 1295 Atom[] strBuf2 = new Atom[traceMaxSize]; 1296 for(int jt=0; jt<nBestTrace; jt++) { 1297 for(int i=0; i<winSize; i++) { 1298 setStrBuf(strBuf1, is+i, ca1, bestTrace1[jt]+i ); 1299 setStrBuf(strBuf2, is+i, ca2, bestTrace2[jt]+i ); 1300 } 1301 is+=winSize; 1302 } 1303 1304 //sup_str(strBuf1, strBuf2, nBestTrace*winSize, d_); 1305 double rmsdNew=calc_rmsd(strBuf1, strBuf2, nBestTrace*winSize, true, false); 1306 //afpChain.setTotalRmsdIni(rmsdNew); 1307 1308 1309 if ( isPrint){ 1310 System.out.println("rmsdNew after trace: " +rmsdNew); 1311 1312 for(int k=0; k<nBestTrace; k++) 1313 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1,8)); 1314 } 1315 1316 if ( isPrint){ 1317 System.out.println("best traces: " + nBestTraces); 1318 } 1319 1320 1321 } 1322 1323 1324 1325 1326 1327 1328 private static char getOneLetter(Group g){ 1329 1330 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 1331 1332 return StructureTools.get1LetterCode(g.getPDBName()); 1333 } 1334 1335 1336 1337 private int optimizeSuperposition(AFPChain afpChain, int nse1, int nse2, int strLen, double rmsd, Atom[] ca1, Atom[] ca2,int nGaps, 1338 Atom[] strBuf1, Atom[] strBuf2 ) throws StructureException { 1339 1340 //System.out.println("optimizing Superimposition..."); 1341 1342 //nAtom=strLen; 1343 // optimization on superposition 1344 Atom[] ca3=new Atom[nse2]; 1345 1346 double rmsdLen = 0.0; 1347 1348 // this flag tests if the RMSDLen has been assigned. 1349 // this is to enforce that the alignment ends up 1350 // smaller than 95% of the original alignment. 1351 // +/- 1352 boolean isRmsdLenAssigned=false; 1353 int nAtomPrev=-1; 1354 1355 double oRmsdThr = params.getORmsdThr(); 1356 1357 double distanceIncrement = params.getDistanceIncrement(); 1358 double maxUserRMSD = params.getMaxOptRMSD(); 1359 nAtom=0; 1360 int counter = -1; 1361 1362 int maxNrIterations = params.getMaxNrIterationsForOptimization(); 1363 1364 //double[][] mat = new double[nse1][nse2]; 1365 while((nAtom<strLen*0.95 || 1366 (isRmsdLenAssigned && rmsd<rmsdLen*1.1 && nAtomPrev!=nAtom)) && ( counter< maxNrIterations)) { 1367 1368 counter++; 1369 if ( debug) 1370 System.out.println("nAtom: " + nAtom + " " + nAtomPrev + " " + rmsdLen + " " + isRmsdLenAssigned + " strLen:" + strLen + " nse1,nse2:" + nse1 + " " + nse2); 1371 nAtomPrev=nAtom; 1372 oRmsdThr += distanceIncrement; 1373 1374 rot_mol(ca2, ca3, nse2, r,t); 1375 1376 for(int ise1=0; ise1<nse1; ise1++) { 1377 for(int ise2=0; ise2<nse2; ise2++) { 1378 1379 //mat[ise1][ise2]=-0.001; 1380 1381 // this needs to be a parameter... 1382 1383 1384 double dist = getDistanceWithSidechain(ca1[ise1], ca3[ise2]); 1385 mat[ise1][ise2] = oRmsdThr - dist; 1386 1387 //double distold = Calc.getDistance(ca1[ise1],ca3[ise2]); 1388 //double scoreOld = oRmsdThr - distold ; 1389 //mat[ise1][ise2] = scoreOld; 1390 //mat[ise1][ise2] = oRmsdThr - Calc.getDistance(ca1[ise1],ca3[ise2]); 1391 1392 //if ( counter == 0 && ise1 == ise2) { 1393 1394 // System.out.println("mat[" + ise1 + "][" + ise2 + "] " + mat[ise1][ise2] + " scoreOld:" + scoreOld + " oRmsdThr: " + oRmsdThr +" dist: " + dist + " distold:" + distold ); 1395 // } 1396 1397 1398 } 1399 } 1400 1401 mat = notifyMatrixListener(mat); 1402 1403 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 1404 mat = updateMatrixWithSequenceConservation(mat,ca1,ca2, params); 1405 } 1406 1407 double gapOpen = params.getGapOpen(); 1408 double gapExtension = params.getGapExtension(); 1409 1410 // by default we always do local alignment 1411 double score = dpAlign( nse1, nse2, gapOpen , gapExtension , GLOBAL_ALIGN1, GLOBAL_ALIGN2); 1412 1413 if (debug) { 1414 System.out.println("iter: "+ counter + " score:" + score + " " + " nAtomPrev: " + nAtomPrev + " nAtom:" + nAtom + " oRmsdThr: " + oRmsdThr); 1415 1416 for (int i=0 ; i<alignmentPositionOrLength ; i++){ 1417 if ( align_se2[i] == 172 || align_se2[i] == 173) { 1418 System.out.println("BREAK POINT IS ALIGNED!!!!"); 1419 System.out.println(align_se2[i-1] + " " + align_se2[i] + " " + align_se2[i+1]); 1420 } 1421 } 1422 } 1423 afpChain.setAlignScore(score); 1424 1425 1426 nAtom=0; nGaps=0; 1427 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 1428 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1429 1430 strBuf1[nAtom]=ca1[align_se1[ia]]; 1431 strBuf2[nAtom]=ca2[align_se2[ia]]; 1432 1433 nAtom++; 1434 1435 } 1436 else { 1437 nGaps++; 1438 } 1439 } 1440 1441 for ( int i =0 ; i < strBuf1.length; i++){ 1442 if ( strBuf1[i] == null) 1443 break; 1444 System.out.print(strBuf1[i].getGroup().getChemComp().getOneLetterCode()); 1445 } 1446 System.out.println(); 1447 1448 if(nAtom<4) continue; 1449 1450 //sup_str(strBuf1, strBuf2, nAtom, _d); 1451 // here we don't store the rotation matrix for the user! 1452 rmsd= calc_rmsd(strBuf1, strBuf2, nAtom,false, false); 1453 if ( isPrint ) 1454 System.out.println("iter: " + counter + " nAtom " + nAtom + " rmsd: " + rmsd); 1455 //afpChain.setTotalRmsdOpt(rmsd); 1456 //System.out.println("rmsd: " + rmsd); 1457 1458 if(!(nAtom<strLen*0.95) && (!isRmsdLenAssigned)) { 1459 rmsdLen=rmsd; 1460 isRmsdLenAssigned=true; 1461 } 1462 //System.out.println(String.format("nAtom %d %d rmsd %.1f", nAtom, nAtomPrev, rmsd)); 1463 1464 1465 afpChain.setBlockRmsd(new double[]{rmsd}); 1466 afpChain.setOptRmsd(new double[]{rmsd}); 1467 afpChain.setTotalRmsdOpt(rmsd); 1468 afpChain.setChainRmsd(rmsd); 1469 1470 if ( rmsd >= maxUserRMSD) { 1471 break; 1472 } 1473 1474 } 1475 1476 1477 1478 //System.out.println("done optimizing"); 1479 /* 1480 nAtom=0; nGaps=0; 1481 for(int ia=0; ia<lcmp; ia++) 1482 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1483 if(ca1[align_se1[ia]].X<1e10 && ca2[align_se2[ia]].X<1e10) { 1484 strBuf1[nAtom]=ca1[align_se1[ia]]; 1485 strBuf2[nAtom]=ca2[align_se2[ia]]; 1486 nAtom++; 1487 } 1488 } 1489 else { 1490 nGaps++; 1491 } 1492 1493 sup_str(strBuf1, strBuf2, nAtom, _d); 1494 rmsd=calc_rmsd(strBuf1, strBuf2, nAtom, _d); 1495 */ 1496 nBestTrace=0; 1497 boolean newBestTrace=true; 1498 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 1499 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1500 //System.out.println(" " +align_se1[ia] + " " + align_se2[ia]); 1501 1502 if(newBestTrace) { 1503 bestTrace1[nBestTrace]=align_se1[ia]; 1504 bestTrace2[nBestTrace]=align_se2[ia]; 1505 bestTraceLen[nBestTrace]=0; 1506 newBestTrace=false; 1507 nBestTrace++; 1508 } 1509 bestTraceLen[nBestTrace-1]++; 1510 1511 } 1512 else { 1513 newBestTrace=true; 1514 } 1515 } 1516 1517 return nGaps; 1518 1519 // end of optimization on superposition 1520 1521 } 1522 1523 /** Modifies an alignment matrix by favoring the alignment of similar and identical amino acids and penalizing the alignment of unrelated ones. 1524 * 1525 * @param max alignment matrix 1526 * @param ca1 Atoms for protein 1 1527 * @param ca2 Atoms for Protein 2 1528 * @param params alignment parameters 1529 * @return modified alignment matrix 1530 */ 1531 public static double[][] updateMatrixWithSequenceConservation(double[][] max, Atom[] ca1, Atom[] ca2, CeParameters params) { 1532 Matrix origM = new Matrix(max); 1533 1534 SubstitutionMatrix<AminoAcidCompound> substMatrix = 1535 params.getSubstitutionMatrix(); 1536 1537 int internalScale = 1; 1538 if ( substMatrix instanceof ScaledSubstitutionMatrix) { 1539 ScaledSubstitutionMatrix scaledMatrix = (ScaledSubstitutionMatrix) substMatrix; 1540 internalScale = scaledMatrix.getScale(); 1541 } 1542 1543 1544 AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 1545 1546 for (int i = 0 ; i < origM.getRowDimension() ; i++){ 1547 for ( int j =0; j < origM.getColumnDimension() ; j ++ ) { 1548 double val = origM.get(i,j); 1549 Atom a1 = ca1[i]; 1550 Atom a2 = ca2[j]; 1551 1552 AminoAcidCompound ac1 = 1553 set.getCompoundForString(a1.getGroup().getChemComp().getOneLetterCode()); 1554 AminoAcidCompound ac2 = 1555 set.getCompoundForString(a2.getGroup().getChemComp().getOneLetterCode()); 1556 1557 1558 if ( ac1 == null || ac2 == null) 1559 continue; 1560 1561 short aaScore = substMatrix.getValue(ac1,ac2); 1562 1563 double weightedScore = (aaScore / internalScale) * params.getSeqWeight(); 1564 1565 1566 val += weightedScore; 1567 origM.set(i,j,val); 1568 1569 } 1570 } 1571 max = origM.getArray(); 1572 1573 //SymmetryTools.showMatrix((Matrix)origM.clone(), "in optimizer " + loopCount ); 1574 //SymmetryTools.showMatrix(origM, "iteration matrix " + loopCount + " after"); 1575 1576 return max; 1577 } 1578 1579 private double[][] notifyMatrixListener(double[][] mat2) { 1580 for (MatrixListener li : matrixListeners) { 1581 mat2 = li.matrixInOptimizer(mat2); 1582 } 1583 return mat2; 1584 } 1585 1586 private boolean[][] notifyBreakFlagListener(boolean[][] brkFlag){ 1587 for (MatrixListener li : matrixListeners) { 1588 brkFlag = li.initializeBreakFlag(brkFlag); 1589 } 1590 return brkFlag; 1591 } 1592 1593 public void addMatrixListener(MatrixListener li){ 1594 matrixListeners.add(li); 1595 } 1596 1597 1598 private double dpAlign(int nSeq1, int nSeq2, double gapI, double gapE, 1599 boolean isGlobal1, boolean isGlobal2) { 1600 1601 1602 // isGlobal1,isGlobal2 are always false... 1603 // i.e. we do local alignments by default 1604 1605 int i, j, iStart, jStart, iMax, jMax, k; 1606 boolean hasGapExtensionPenalty=(gapE!=0.0?true:false); 1607 double sum_ret, sum_brk; 1608 1609 boolean[][] brk_flg=new boolean [nSeq1][nSeq2]; 1610 for(i=0; i<nSeq1; i++) brk_flg[i]=new boolean [nSeq2]; 1611 1612 brk_flg = notifyBreakFlagListener(brk_flg); 1613 1614 // ge = true here... 1615 /* 1616 for(i=0; i<nSeq1; i++) 1617 { 1618 printf("\n"); 1619 for(j=0; j<nSeq2; j++) 1620 { 1621 printf("%4d", (int)(*(mat[i]+j)*10)); 1622 } 1623 } 1624 printf("\n\n\n"); 1625 */ 1626 int[][] tracebackMatrix1 = new int[nSeq1][nSeq2]; 1627 int[][] tracebackMatrix2 = new int[nSeq1][nSeq2]; 1628 1629 // System.out.println("===== " + mat[0][0]); 1630 // for ( i = 39 ; i < 42 ;i ++){ 1631 // //System.out.print(" " + i + " "); 1632 // for ( j = 155 ; j < 157 ; j++) { 1633 // System.out.print(String.format("[%s %s %.2f] ",i,j,mat[i][j])); 1634 // } 1635 // System.out.println(); 1636 // } 1637 // System.out.println("----"); 1638 // 1639 // for ( i = 69 ; i < 72 ;i ++){ 1640 // //System.out.println(" " + i + " "); 1641 // for ( j = 170 ; j < 175 ; j++) { 1642 // System.out.print(String.format("[%s %s %.2f] ",i,j,mat[i][j])); 1643 // } 1644 // System.out.println(); 1645 // } 1646 1647 //double sum = 0; 1648 if(!hasGapExtensionPenalty) 1649 { 1650 for(i=nSeq1-1; i>=0; i--) 1651 for(j=nSeq2-1; j>=0; j--) 1652 { 1653 double sum ; 1654 brk_flg[i][j]=false; 1655 if(j<nSeq2-1 && i<nSeq1-1) 1656 { 1657 sum=mat[i+1][j+1]; 1658 } 1659 else 1660 { 1661 sum=0.0; 1662 if((isGlobal1 && i!=nSeq1-1) || (isGlobal2 && j!=nSeq2-1)) 1663 sum=-gapI; 1664 } 1665 if(j+1<nSeq2) 1666 for(k=i+2; k<nSeq1; k++) 1667 { 1668 if(mat[k][j+1]-gapI>sum) 1669 sum=mat[k][j+1]-gapI; 1670 } 1671 if(i+1<nSeq1) 1672 for(k=j+2; k<nSeq2; k++) 1673 { 1674 if(mat[i+1][k]-gapI>sum) 1675 sum=mat[i+1][k]-gapI; 1676 } 1677 sum+=mat[i][j]; 1678 sum_brk=(isGlobal1?-gapI:0.0)+(isGlobal2?-gapI:0.0); 1679 if(sum<sum_brk) 1680 { 1681 sum=sum_brk; 1682 brk_flg[i][j]=true; 1683 //System.out.println("break at: " + i + " " + j); 1684 } 1685 mat[i][j]=sum; 1686 } 1687 } 1688 else 1689 { 1690 for(i=nSeq1-1; i>=0; i--) 1691 for(j=nSeq2-1; j>=0; j--) 1692 { 1693 double maxSum ; 1694 brk_flg[i][j]=false; 1695 if(j<nSeq2-1 && i<nSeq1-1) 1696 { 1697 // any row/column which is not the last 1698 maxSum=mat[i+1][j+1]; 1699 tracebackMatrix1[i][j] = i+1; 1700 tracebackMatrix2[i][j] = j+1; 1701 } 1702 else 1703 { 1704 // sets the last row and column 1705 maxSum=0.0; 1706 if(isGlobal1 && i!=nSeq1-1) maxSum=-gapI-gapE*(nSeq1-i-1); 1707 if(isGlobal2 && j!=nSeq2-1) maxSum=-gapI-gapE*(nSeq2-j-1); 1708 tracebackMatrix1[i][j] = -1; 1709 tracebackMatrix2[i][j] = -1; 1710 1711 } 1712 1713 // do only for rows/columns which are not the last: 1714 if(j+1<nSeq2) 1715 for(k=i+2; k<nSeq1; k++) { 1716 if(mat[k][j+1]-gapI-gapE*(k-i-1)>maxSum) { 1717 maxSum=mat[k][j+1]-gapI-gapE*(k-i-1); 1718 tracebackMatrix1[i][j] = k; 1719 tracebackMatrix2[i][j] = j+1; 1720 1721 } 1722 } 1723 if(i+1<nSeq1) 1724 for(k=j+2; k<nSeq2; k++) { 1725 if(mat[i+1][k]-gapI-gapE*(k-j-1)>maxSum) { 1726 maxSum=mat[i+1][k]-gapI-gapE*(k-j-1); 1727 tracebackMatrix1[i][j] = i+1; 1728 tracebackMatrix2[i][j] = k; 1729 1730 } 1731 } 1732 1733 maxSum+= mat[i][j]; 1734 1735 1736 sum_brk=(isGlobal1?(-gapI-gapE*(nSeq1-1-i)):0.0)+(isGlobal2?(-gapI-gapE*(nSeq2-1-j)):0.0); 1737 if(maxSum<sum_brk) 1738 { 1739 maxSum=sum_brk; 1740 brk_flg[i][j]=true; 1741 } 1742 mat[i][j]=maxSum; 1743 } 1744 } 1745 1746 1747 1748 1749 iStart=0; jStart=0; alignmentPositionOrLength=0; 1750 // no nc-end penalty - begin 1751 sum_ret=mat[0][0]; 1752 1753 // look for the highest score in mat[i][j] 1754 // TODO: move this up ?? 1755 1756 for(i=0; i<nSeq1; i++) 1757 for(j=0; j<nSeq2; j++) 1758 { 1759 if(i==0 && j==0) continue; 1760 double sum=mat[i][j]; 1761 if(isGlobal1) sum+=-gapI-gapE*i; 1762 if(isGlobal2) sum+=-gapI-gapE*j; 1763 if(sum>sum_ret) 1764 { 1765 sum_ret=sum; 1766 iStart=i; jStart=j; 1767 } 1768 } 1769 1770 1771 // ok we got the maximum score in sum_ret and the start position in iStart, jStart 1772 1773 //System.out.println("start at " + is + " " + js); 1774 //for(k=0; k<is; k++) align1[k]=-1; 1775 //for(k=0; k<js; k++) align2[k]=-1; 1776 // no nc-end penalty - end 1777 int prevGapEnd = -1; 1778 // 1779 for(i=iStart, j=jStart; i<nSeq1 && j<nSeq2; i++, j++) 1780 { 1781 iMax=i; jMax=j; 1782 double localMaxScore=mat[i][j]; 1783 if(!hasGapExtensionPenalty) 1784 { 1785 for(k=i+1; k<nSeq1; k++) 1786 if(mat[k][j]-gapI>localMaxScore) 1787 { 1788 iMax=k; jMax=j; 1789 localMaxScore=mat[k][j]-gapI; 1790 } 1791 1792 for(k=j+1; k<nSeq2; k++) 1793 if(mat[i][k]-gapI>localMaxScore) 1794 { 1795 iMax=i; jMax=k; 1796 localMaxScore=mat[i][k]-gapI; 1797 } 1798 } 1799 else 1800 { 1801 for(k=i+1; k<nSeq1; k++) { 1802 if(mat[k][j]-gapI-gapE*(k-i)>localMaxScore) 1803 { 1804 System.out.println(" gap1 " + alignmentPositionOrLength + " " + k + " " + j + " " + localMaxScore + "<" +(mat[k][j]-gapI-gapE*(k-i))); 1805 iMax=k; jMax=j; 1806 localMaxScore=mat[k][j]-gapI-gapE*(k-i); 1807 } 1808 } 1809 for(k=j+1; k<nSeq2; k++) { 1810 if(mat[i][k]-gapI-gapE*(k-j)>localMaxScore) 1811 { 1812 System.out.println(" gap2 " + alignmentPositionOrLength + " " + k + " " + i + " " + localMaxScore + "<"+ (mat[i][k]-gapI-gapE*(k-j))); 1813 iMax=i; jMax=k; 1814 localMaxScore=mat[i][k]-gapI-gapE*(k-j); 1815 } 1816 } 1817 } 1818 1819 //boolean doubleGap = false; 1820 boolean gapPosition = false; 1821 if ( i != iMax || j != jMax ) { 1822 int l1 = iMax - i; 1823 int l2 = jMax - j ; 1824 System.out.println(String.format("FOUND GAP AT: lcmp:%d l1: %d l2: %d | i:%d iMax: %d j:%d jMax:%d ",alignmentPositionOrLength, l1,l2, i, iMax , j, jMax)); 1825 if ( l1 > 0) { 1826 System.out.println(" -- G1 : " + alignmentPositionOrLength + " ->" + (alignmentPositionOrLength + l1) + " " ); 1827 gapPosition = true; 1828 } 1829 if ( l2 > 0) { 1830 System.out.println(" -- G2 : " + alignmentPositionOrLength + " ->" + (alignmentPositionOrLength + l2) + " "); 1831 gapPosition = true; 1832 } 1833 if ( prevGapEnd == alignmentPositionOrLength -1){ 1834 // double gap! 1835 System.out.println( " !! FOUND DOUBLE GAP AT: "+ alignmentPositionOrLength + " | "+ i+ " " + iMax + " " + j + " " + jMax + " " + String.format("%f", mat[i][j]) + " " + getTraceBack(tracebackMatrix1,tracebackMatrix2,i,j)); 1836 //doubleGap = true; 1837 1838 // if ( i != iMax){ 1839 // int pos = align_se1[ alignmentPositionOrLength -1]; 1840 // align_se1[ alignmentPositionOrLength -1] = -1; 1841 // align_se1[ alignmentPositionOrLength ] = pos; 1842 // align_se2[ alignmentPositionOrLength ] = -1; 1843 // } else { 1844 // int pos = align_se2[ alignmentPositionOrLength -1]; 1845 // align_se2[ alignmentPositionOrLength -1] = -1; 1846 // align_se2[ alignmentPositionOrLength ] = pos; 1847 // align_se1[ alignmentPositionOrLength ] = -1; 1848 // } 1849 // 1850 // alignmentPositionOrLength++; 1851 } 1852 } 1853 1854 1855 1856 //System.out.println(" iMax " + iMax + " jMax " + jMax); 1857 // set the gap positions: 1858 //lcmp:53 i:41 j:173 imax:70 jmax:173 1859 System.out.println(String.format(" lcmp:%d i:%d j:%d imax:%d jmax:%d score: %.2f",alignmentPositionOrLength,i,j, iMax, jMax, mat[iMax][jMax])); 1860 1861 1862 for(k=i; k<iMax; k++, i++) { 1863 align_se1[alignmentPositionOrLength]=k; 1864 align_se2[alignmentPositionOrLength]=-1; 1865 1866 1867 alignmentPositionOrLength++; 1868 } 1869 1870 for(k=j; k<jMax; k++, j++) { 1871 align_se1[alignmentPositionOrLength]=-1; 1872 align_se2[alignmentPositionOrLength]=k; 1873 1874 1875 alignmentPositionOrLength++; 1876 } 1877 1878 1879 align_se1[alignmentPositionOrLength]=iMax; 1880 align_se2[alignmentPositionOrLength]=jMax; 1881 1882 1883 1884 if ( gapPosition) 1885 prevGapEnd = alignmentPositionOrLength; 1886 alignmentPositionOrLength++; 1887 1888 if(brk_flg[i][j]) { 1889 //System.out.println("hit break flag at: " + i + " " + j + " sum " + sum_ret + " lcmp " + alignmentPositionOrLength); 1890 break; 1891 1892 } 1893 } 1894 1895 1896 return sum_ret; 1897 } 1898 1899 1900 1901 1902 private String getTraceBack(int[][] tracebackMatrix1, 1903 int[][] tracebackMatrix2, int i, int j) { 1904 1905 1906 if ( tracebackMatrix1[i][j] == -1 ) 1907 return ""; 1908 if ( tracebackMatrix2[i][j] == -1 ) 1909 return ""; 1910 1911 int t1 = tracebackMatrix1[i][j]; 1912 int t2 = tracebackMatrix2[i][j]; 1913 1914 1915 String s = "[ "+t1+","+t2+"] "; 1916 1917 return s + getTraceBack(tracebackMatrix1, tracebackMatrix2, t1, t2); 1918 1919 1920 } 1921 1922 private void rot_mol(Atom[] caA, Atom[] caB, int nse2, Matrix m , Atom shift) throws StructureException{ 1923 1924 1925 1926 for(int l=0; l<nse2; l++) { 1927 Atom a = caA[l]; 1928 Group g = (Group)a.getGroup().clone(); 1929 //Group g = (Group)a.getParent(); 1930 1931 Calc.rotate( g, m); 1932 Calc.shift( g, shift); 1933 caB[l] = g.getAtom(a.getName()); 1934 } 1935 } 1936 1937 //void rot_mol(XYZ *molA, XYZ *molB, int nAtom, double d_[20] ) { 1938 // double dx, dy, dz; 1939 // for(int l=0; l<nAtom; l++) { 1940 // if(molA[l].X<1e10) { 1941 // dx=molA[l].X; 1942 // dy=molA[l].Y; 1943 // dz=molA[l].Z; 1944 // molB[l].X=dx*d_[0]+dy*d_[1]+dz*d_[2]+d_[9]; 1945 // molB[l].Y=dx*d_[3]+dy*d_[4]+dz*d_[5]+d_[10]; 1946 // molB[l].Z=dx*d_[6]+dy*d_[7]+dz*d_[8]+d_[11]; 1947 // } 1948 // else { 1949 // molB[l]=molA[l]; 1950 // } 1951 // } 1952 // } 1953 // 1954 1955 1956 /** superimpose and get rmsd 1957 * 1958 * @param pro1 1959 * @param pro2 1960 * @param strLen 1961 * @param storeTransform 1962 * @param show 1963 * @return RMSD 1964 * @throws StructureException 1965 */ 1966 public double calc_rmsd(Atom[] pro1, Atom[] pro2, int strLen, boolean storeTransform, boolean show) throws StructureException { 1967 1968 Atom[] cod1 = getAtoms(pro1, strLen,false); 1969 Atom[] cod2 = getAtoms(pro2, strLen,true); 1970 1971 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(cod1), 1972 Calc.atomsToPoints(cod2)); 1973 1974 Matrix matrix = Matrices.getRotationJAMA(trans); 1975 Atom shift = Calc.getTranslationVector(trans); 1976 1977 if ( debug){ 1978 matrix.print(3,3); 1979 } 1980 1981 if ( storeTransform) { 1982 r = matrix; 1983 t = shift; 1984 } 1985 for (Atom a : cod2) 1986 Calc.transform(a.getGroup(), trans); 1987 1988 //Calc.rotate(a,r); 1989 //Calc.shift(a,t); 1990 // if ( show){ 1991 // StructureAlignmentJmol jmol = new StructureAlignmentJmol(); 1992 // 1993 // jmol.setAtoms(cod1); 1994 // jmol.evalString("select * ; wireframe off; spacefill off; backbone on; color chain; select ligand;color cpk; wireframe 40;spacefill 120; "); 1995 // jmol.setTitle("calCaRmsd - pdb1"); 1996 // 1997 // StructureAlignmentJmol jmol2 = new StructureAlignmentJmol(); 1998 // jmol2.setAtoms(cod2); 1999 // jmol2.evalString("select * ; wireframe off; spacefill off; backbone on; color chain; select ligand;color cpk; wireframe 40;spacefill 120; "); 2000 // jmol2.setTitle("calCaRmsd - pdb2"); 2001 // } 2002 return Calc.rmsd(cod1, cod2); 2003 2004 } 2005 2006 /** 2007 * Copies the first length atoms from the input array 2008 * @param ca The array to copy 2009 * @param length the number of atoms to copy 2010 * @param clone If true, preform a deep copy, cloning the underlying Groups 2011 * @return An array with the first length items of ca, possibly cloning the Atoms. 2012 * @throws StructureException 2013 */ 2014 private Atom[] getAtoms(Atom[] ca, int length, boolean clone) throws StructureException{ 2015 2016 List<Atom> atoms = new ArrayList<>(); 2017 for ( int i = 0 ; i < length ; i++){ 2018 2019 Atom a; 2020 if ( clone ){ 2021 Group g = (Group)ca[i].getGroup().clone(); 2022 a = g.getAtom(ca[i].getName()); 2023 } 2024 else { 2025 a = ca[i]; 2026 } 2027 atoms.add(a); 2028 } 2029 return atoms.toArray(new Atom[atoms.size()]); 2030 } 2031 2032 2033 2034 private void noBestTrace(){ 2035 2036 if(isPrint) { 2037 timeEnd = System.currentTimeMillis(); 2038 long time_q=(timeEnd-timeStart); 2039 2040 String msg = String.format("size=0 time=%d comb=%d\n", (int)(time_q), nTraces); 2041 System.out.println(msg); 2042 } 2043 } 2044 2045 2046 2047 private double zToP(double z) { 2048 int ind=(int)(z/0.1); 2049 if(ind<0) ind=0; 2050 if(ind>149) ind=149; 2051 return(tableZtoP[ind]); 2052 } 2053 /////////////////////////////////////////////////////////////////////////// 2054 private double pToZ(double p) { 2055 int ind=(int)(-Math.log10(p)*3.0); 2056 if(ind<0) ind=0; 2057 if(ind>149) ind=149; 2058 return(tablePtoZ[ind]); 2059 } 2060 /////////////////////////////////////////////////////////////////////////// 2061 private double zByZ(double z1, double z2) { 2062 double p1=zToP(z1); 2063 double p2=zToP(z2); 2064 return(pToZ(p1*p2)); 2065 } 2066 2067 protected double zStrAlign(int winSize, int nTrace, double score, int nGaps) { 2068 double z1=zScore(winSize, nTrace, score); 2069 double z2=zGaps(winSize, nTrace, nGaps); 2070 return(zByZ(z1, z2)); 2071 } 2072 2073 double zScore(int winSize, int nTrace, double score) { 2074 2075 if(winSize==8) { 2076 2077 if(nTrace<1) return(0.0); 2078 2079 double scoreAv_, scoreSd_; 2080 if(nTrace<21) { 2081 scoreAv_=scoreAv8[nTrace-1]; 2082 scoreSd_=scoreSd8[nTrace-1]; 2083 } 2084 else { 2085 scoreAv_=0.209874*nTrace+2.944714; 2086 scoreSd_=0.039487*nTrace+0.675735; 2087 } 2088 if(score>scoreAv_) return(0.0); 2089 return((scoreAv_-score)/scoreSd_); 2090 } 2091 2092 if(winSize==6) { 2093 2094 if(nTrace<1) return(0.0); 2095 2096 double scoreAv_, scoreSd_; 2097 if(nTrace<21) { 2098 scoreAv_=scoreAv6[nTrace-1]; 2099 scoreSd_=scoreSd6[nTrace-1]; 2100 } 2101 else { 2102 scoreAv_=0.198534*nTrace+2.636477; 2103 scoreSd_=0.040922*nTrace+0.715636; 2104 } 2105 if(score>scoreAv_) return(0.0); 2106 return((scoreAv_-score)/scoreSd_); 2107 } 2108 2109 return(0.0); 2110 2111 } 2112 /////////////////////////////////////////////////////////////////////////// 2113 double zGaps(int winSize, int nTrace, int nGaps) { 2114 2115 if(nTrace<2) return(0.0); 2116 double scoreAv_, scoreSd_; 2117 2118 if(winSize==8) { 2119 if(nTrace<21) { 2120 scoreAv_=gapsAv8[nTrace-1]; 2121 scoreSd_=gapsSd8[nTrace-1]; 2122 } 2123 else { 2124 scoreAv_=14.949173*nTrace-14.581193; 2125 scoreSd_=2.045067*nTrace+13.191095; 2126 } 2127 if(nGaps>scoreAv_) return(0.0); 2128 return((scoreAv_-nGaps)/scoreSd_); 2129 } 2130 2131 if(winSize==6) { 2132 if(nTrace<21) { 2133 scoreAv_=gapsAv6[nTrace-1]; 2134 scoreSd_=gapsSd6[nTrace-1]; 2135 } 2136 else { 2137 scoreAv_=13.574490*nTrace-13.977223; 2138 scoreSd_=1.719977*nTrace+19.615014; 2139 } 2140 if(nGaps>scoreAv_) return(0.0); 2141 return((scoreAv_-nGaps)/scoreSd_); 2142 } 2143 2144 return(0.0); 2145 } 2146 2147 private static final double[] scoreAv8={2.54, 2.51, 2.72, 3.01, 3.31, 3.61, 3.90, 4.19, 4.47, 4.74, 2148 4.99, 5.22, 5.46, 5.70, 5.94, 6.13, 6.36, 6.52, 6.68, 6.91}; 2149 private static final double[] scoreSd8={1.33, 0.88, 0.73, 0.71, 0.74, 0.80, 0.86, 0.92, 0.98, 1.04, 2150 1.08, 1.10, 1.15, 1.19, 1.23, 1.25, 1.32, 1.34, 1.36, 1.45}; 2151 private static final double[] gapsAv8={0.00, 11.50, 23.32, 35.95, 49.02, 62.44, 76.28, 90.26, 2152 104.86, 119.97, 134.86, 150.54, 164.86, 179.57, 194.39, 2153 209.38, 224.74, 238.96, 253.72, 270.79}; 2154 private static final double[] gapsSd8={0.00, 9.88, 14.34, 17.99, 21.10, 23.89, 26.55, 29.00, 31.11, 2155 33.10, 35.02, 36.03, 37.19, 38.82, 41.04, 43.35, 45.45, 2156 48.41, 50.87, 52.27}; 2157 private static final double[] scoreAv6={1.98, 1.97, 2.22, 2.54, 2.87, 3.18, 3.48, 3.77, 4.05, 4.31, 2158 4.57, 4.82, 5.03, 5.24, 5.43, 5.64, 5.82, 6.02, 6.21, 6.42}; 2159 private static final double[] scoreSd6={1.15, 0.73, 0.63, 0.64, 0.71, 0.80, 0.87, 0.95, 1.01, 1.07, 2160 1.13, 1.19, 1.22, 1.25, 1.28, 1.32, 1.35, 1.39, 1.45, 1.50}; 2161 private static final double[] gapsAv6={0.00, 10.12, 20.25, 31.29, 42.95, 55.20, 67.53, 80.15, 2162 93.30, 106.47, 120.52, 134.38, 148.59, 162.58, 176.64, 2163 191.23, 204.12, 218.64, 231.82, 243.43}; 2164 private static final double[] gapsSd6={0.00, 9.80, 14.44, 18.14, 21.35, 24.37, 27.00, 29.68, 32.22, 2165 34.37, 36.65, 38.63, 40.31, 42.16, 43.78, 44.98, 47.08, 2166 49.09, 50.78, 52.15}; 2167 2168 2169 private static final double[] tableZtoP={ 2170 1.0, 9.20e-01,8.41e-01,7.64e-01,6.89e-01,6.17e-01,5.49e-01,4.84e-01,4.24e-01,3.68e-01, 2171 3.17e-01,2.71e-01,2.30e-01,1.94e-01,1.62e-01,1.34e-01,1.10e-01,8.91e-02,7.19e-02,5.74e-02, 2172 4.55e-02,3.57e-02,2.78e-02,2.14e-02,1.64e-02,1.24e-02,9.32e-03,6.93e-03,5.11e-03,3.73e-03, 2173 2.70e-03,1.94e-03,1.37e-03,9.67e-04,6.74e-04,4.65e-04,3.18e-04,2.16e-04,1.45e-04,9.62e-05, 2174 6.33e-05,4.13e-05,2.67e-05,1.71e-05,1.08e-05,6.80e-06,4.22e-06,2.60e-06,1.59e-06,9.58e-07, 2175 5.73e-07,3.40e-07,1.99e-07,1.16e-07,6.66e-08,3.80e-08,2.14e-08,1.20e-08,6.63e-09,3.64e-09, 2176 1.97e-09,1.06e-09,5.65e-10,2.98e-10,1.55e-10,8.03e-11,4.11e-11,2.08e-11,1.05e-11,5.20e-12, 2177 2.56e-12,1.25e-12,6.02e-13,2.88e-13,1.36e-13,6.38e-14,2.96e-14,1.36e-14,6.19e-15,2.79e-15, 2178 1.24e-15,5.50e-16,2.40e-16,1.04e-16,4.46e-17,1.90e-17,7.97e-18,3.32e-18,1.37e-18,5.58e-19, 2179 2.26e-19,9.03e-20,3.58e-20,1.40e-20,5.46e-21,2.10e-21,7.99e-22,3.02e-22,1.13e-22,4.16e-23, 2180 1.52e-23,5.52e-24,1.98e-24,7.05e-25,2.48e-25,8.64e-26,2.98e-26,1.02e-26,3.44e-27,1.15e-27, 2181 3.82e-28,1.25e-28,4.08e-29,1.31e-29,4.18e-30,1.32e-30,4.12e-31,1.27e-31,3.90e-32,1.18e-32, 2182 3.55e-33,1.06e-33,3.11e-34,9.06e-35,2.61e-35,7.47e-36,2.11e-36,5.91e-37,1.64e-37,4.50e-38, 2183 1.22e-38,3.29e-39,8.77e-40,2.31e-40,6.05e-41,1.56e-41,4.00e-42,1.02e-42,2.55e-43,6.33e-44, 2184 1.56e-44,3.80e-45,9.16e-46,2.19e-46,5.17e-47,1.21e-47,2.81e-48,6.45e-49,1.46e-49,3.30e-50}; 2185 private static final double[] tablePtoZ={ 2186 0.00,0.73,1.24,1.64,1.99,2.30,2.58,2.83,3.07,3.29, 2187 3.50,3.70,3.89,4.07,4.25,4.42,4.58,4.74,4.89,5.04, 2188 5.19,5.33,5.46,5.60,5.73,5.86,5.99,6.11,6.23,6.35, 2189 6.47,6.58,6.70,6.81,6.92,7.02,7.13,7.24,7.34,7.44, 2190 7.54,7.64,7.74,7.84,7.93,8.03,8.12,8.21,8.30,8.40, 2191 8.49,8.57,8.66,8.75,8.84,8.92,9.01,9.09,9.17,9.25, 2192 9.34,9.42,9.50,9.58,9.66,9.73,9.81,9.89,9.97,10.04, 2193 10.12,10.19,10.27,10.34,10.41,10.49,10.56,10.63,10.70,10.77, 2194 10.84,10.91,10.98,11.05,11.12,11.19,11.26,11.32,11.39,11.46, 2195 11.52,11.59,11.66,11.72,11.79,11.85,11.91,11.98,12.04,12.10, 2196 12.17,12.23,12.29,12.35,12.42,12.48,12.54,12.60,12.66,12.72, 2197 12.78,12.84,12.90,12.96,13.02,13.07,13.13,13.19,13.25,13.31, 2198 13.36,13.42,13.48,13.53,13.59,13.65,13.70,13.76,13.81,13.87, 2199 13.92,13.98,14.03,14.09,14.14,14.19,14.25,14.30,14.35,14.41, 2200 14.46,14.51,14.57,14.62,14.67,14.72,14.77,14.83,14.88,14.93}; 2201 2202 /** copy data from this class into AFPChain container object. 2203 * 2204 * @param afpChain 2205 * @param ca1 2206 * @param ca2 2207 */ 2208 public void convertAfpChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2) { 2209 2210 afpChain.setBlockNum(1); 2211 //afpChain.setAlignScore(z); 2212 Matrix[] m ; 2213 2214 if ( r != null ) { 2215 m = new Matrix[1]; 2216 m[0] = r; 2217 } else { 2218 m = new Matrix[0]; 2219 } 2220 2221 Atom[] as ; 2222 if ( t != null) { 2223 as = new Atom[1]; 2224 as[0] = t; 2225 } else { 2226 as = new Atom[0]; 2227 } 2228 2229 afpChain.setBlockRotationMatrix(m); 2230 afpChain.setBlockShiftVector(as); 2231 2232 int nse1 = ca1.length; 2233 int nse2 = ca2.length; 2234 //System.out.println("dist1 :" + dist1.length + " " + dist2.length); 2235 2236 if ( nse1 > 0 && dist1.length > 0 ) 2237 afpChain.setDisTable1(new Matrix(dist1)); 2238 else 2239 afpChain.setDisTable1 (Matrix.identity(3, 3)); 2240 if ( nse2 > 0 && dist2.length > 0 ) 2241 afpChain.setDisTable2(new Matrix(dist2)); 2242 else 2243 afpChain.setDisTable2(Matrix.identity(3, 3)); 2244 2245 2246 char[] alnseq1 = new char[nse1+nse2+1]; 2247 char[] alnseq2 = new char[nse1+nse2+1] ; 2248 char[] alnsymb = new char[nse1+nse2+1]; 2249 2250 int[][][] optAln = new int[1][2][nAtom]; 2251 afpChain.setOptAln(optAln); 2252 2253 int pos = 0; 2254 int nrIdent = 0; 2255 int nrSim = 0; 2256 for(int ia=0; ia<alignmentPositionOrLength; ia++) { 2257 2258 // no gap 2259 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 2260 //System.out.println("ia " + ia + " pos " + pos + " " + align_se1[ia] + " " + align_se2[ia]); 2261 optAln[0][0][pos] = align_se1[ia]; 2262 optAln[0][1][pos] = align_se2[ia]; 2263 2264 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2265 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2266 2267 alnseq1[ia] = Character.toUpperCase(l1); 2268 alnseq2[ia] = Character.toUpperCase(l2); 2269 alnsymb[ia] = ' '; 2270 if ( l1 == l2) { 2271 nrIdent++; 2272 nrSim++; 2273 alnsymb[ia] = '|'; 2274 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 2275 nrSim++; 2276 alnsymb[ia] = ':'; 2277 } 2278 2279 pos++; 2280 2281 } else { 2282 // there is a gap at this position 2283 alnsymb[ia] = ' '; 2284 if (align_se1[ia] == -1 ) { 2285 alnseq1[ia] = '-'; 2286 } else { 2287 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2288 alnseq1[ia] = Character.toUpperCase(l1); 2289 } 2290 if ( align_se2[ia] == -1 ) { 2291 alnseq2[ia] = '-'; 2292 } else { 2293 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2294 alnseq2[ia] = Character.toUpperCase(l2); 2295 } 2296 2297 } 2298 } 2299 2300 2301 afpChain.setAlnseq1(alnseq1); 2302 afpChain.setAlnseq2(alnseq2); 2303 afpChain.setAlnsymb(alnsymb); 2304 2305 2306 // CE uses the aligned pairs as reference not the whole alignment including gaps... 2307 if ( pos > 0) { 2308 afpChain.setIdentity(nrIdent*1.0/pos); 2309 afpChain.setSimilarity(nrSim*1.0/pos); 2310 } else { 2311 afpChain.setIdentity(0); 2312 afpChain.setSimilarity(0); 2313 } 2314 2315 //AFPAlignmentDisplay.getAlign( afpChain,ca1,ca2); 2316 2317 } 2318 2319 public int getnAtom() { 2320 return nAtom; 2321 } 2322 2323 2324 2325 public void setnAtom(int nAtom) { 2326 this.nAtom = nAtom; 2327 } 2328 2329 2330 2331 public int getLcmp() { 2332 return alignmentPositionOrLength; 2333 } 2334 2335 2336 2337 public void setLcmp(int lcmp) { 2338 this.alignmentPositionOrLength = lcmp; 2339 } 2340 2341 2342 2343 public int[] getAlign_se1() { 2344 return align_se1; 2345 } 2346 2347 2348 2349 public void setAlign_se1(int[] alignSe1) { 2350 align_se1 = alignSe1; 2351 } 2352 2353 2354 2355 public int[] getAlign_se2() { 2356 return align_se2; 2357 } 2358 2359 2360 2361 public void setAlign_se2(int[] alignSe2) { 2362 align_se2 = alignSe2; 2363 } 2364 2365 /** 2366 * Caution: this matrix is overwriten with very different data at several 2367 * points in the alignment algorithm. After 2368 * {@link #initSumOfDistances(int, int, int, int, Atom[], Atom[]) initSumOfDistances} 2369 * is run, this will hold the distance matrix between AFPs. 2370 * @return mat 2371 */ 2372 public double[][] getMatMatrix() { 2373 return mat; 2374 } 2375 2376 public void setMatMatrix(double[][] matrix){ 2377 mat = matrix; 2378 } 2379 2380 /** 2381 * Gets the rotation matrix from the last call to 2382 * {@link #calc_rmsd(Atom[], Atom[], int, boolean, boolean) calc_rmsd}. 2383 * @return The rotatiokn matrix 2384 */ 2385 public Matrix getRotationMatrix() { 2386 return r; 2387 } 2388 2389 /** 2390 * Gets the shift from the last call to 2391 * {@link #calc_rmsd(Atom[], Atom[], int, boolean, boolean) calc_rmsd}. 2392 * @return The shift 2393 */ 2394 public Atom getShift() { 2395 return t; 2396 } 2397 2398 public double[][] getDist1() { 2399 return dist1; 2400 } 2401 2402 public void setDist1(double[][] dist1) { 2403 this.dist1 = dist1; 2404 } 2405 2406 public double[][] getDist2() { 2407 return dist2; 2408 } 2409 2410 public void setDist2(double[][] dist2) { 2411 this.dist2 = dist2; 2412 } 2413 2414 2415}