001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Sep 25, 2009 021 * Author: Andreas Prlic 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027import org.biojava.nbio.core.alignment.matrices.ScaledSubstitutionMatrix; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.structure.Atom; 030import org.biojava.nbio.structure.Calc; 031import org.biojava.nbio.structure.Group; 032import org.biojava.nbio.structure.StructureException; 033import org.biojava.nbio.structure.StructureTools; 034import org.biojava.nbio.structure.align.model.AFP; 035import org.biojava.nbio.structure.align.model.AFPChain; 036import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 037import org.biojava.nbio.structure.align.util.AFPChainScorer; 038import org.biojava.nbio.structure.geometry.Matrices; 039import org.biojava.nbio.structure.geometry.SuperPositions; 040import org.biojava.nbio.structure.jama.Matrix; 041import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 042import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 043 044import java.util.ArrayList; 045import java.util.List; 046 047import javax.vecmath.Matrix4d; 048 049 050 051/** 052 * This is based on the original Combinatorial Extension (CE) source code from 2003 or 2004 (CE version 2.3), 053 * as has been originally developed by I. Shindyalov and P.Bourne (1998). 054 * The original CE paper is available from here: <a href="http://peds.oxfordjournals.org/cgi/content/short/11/9/739">http://peds.oxfordjournals.org/cgi/content/short/11/9/739</a>. 055 * 056 * This class is a pretty much exact 1:1 port from C, where I cared about exact reproduce of the CE results 057 * and not about Java style. 058 * 059 * @author Andreas Prlic 060 * 061 */ 062public class CECalculator { 063 064 protected static final boolean isPrint = false; 065 private static final boolean debug = false; 066 067 int[] f1; 068 int[] f2; 069 double[][]dist1; 070 double[][]dist2; 071 protected double[][]mat; 072 protected int[] bestTrace1; 073 protected int[] bestTrace2; 074 protected int[][] bestTraces1; 075 protected int[][] bestTraces2; 076 protected int nBestTrace; 077 protected int nBestTraces; 078 double[] d_ = new double[20]; 079 protected int[] bestTracesN; 080 protected double bestTraceScore; 081 protected int nTrace; 082 protected double[] bestTracesScores; 083 protected int[] trace1; 084 protected int[] trace2; 085 086 protected static final double zThr=-0.1; 087 088 long timeStart ; 089 long timeEnd; 090 private int nAtom; 091 092 // the equivalent positions in the alignment... 093 private int[] align_se1; 094 private int[] align_se2; 095 096 097 private int lcmp; 098 private int[] bestTraceLen; 099 private Matrix r; 100 private Atom t; 101 protected int nTraces; 102 103 private double z; 104 private int[] traceIndexContainer; 105 106 protected CeParameters params; 107 // SHOULD these fields be PARAMETERS? 108 109 protected static final int nIter = 1; 110 private static final boolean distAll = false; 111 112 List<MatrixListener> matrixListeners; 113 114 115 116 public CECalculator(CeParameters params){ 117 timeStart = System.currentTimeMillis(); 118 dist1= new double[0][0]; 119 dist2= new double[0][0]; 120 this.params = params; 121 matrixListeners = new ArrayList<>(); 122 123 } 124 125 /** 126 * 127 * @param afpChain A new AFPChain, which will be filled in by this function 128 * @param ca1 129 * @param ca2 130 * @return afpChain 131 * @throws StructureException 132 */ 133 public AFPChain extractFragments(AFPChain afpChain, 134 Atom[] ca1, Atom[] ca2) throws StructureException{ 135 136 int nse1 = ca1.length; 137 int nse2 = ca2.length; 138 139 afpChain.setCa1Length(nse1); 140 afpChain.setCa2Length(nse2); 141 142 int traceMaxSize=nse1<nse2?nse1:nse2; 143 144 f1 = new int[nse1]; 145 f2 = new int[nse2]; 146 147 dist1 = initIntraDistmatrix(ca1, nse1); 148 dist2 = initIntraDistmatrix(ca2, nse2); 149 150 151 if ( debug ) 152 System.out.println("parameters: " + params); 153 154 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 155 if ( params.getSeqWeight() < 1) 156 params.setSeqWeight(2); 157 } 158 159 int winSize = params.getWinSize(); 160 161 int winSizeComb1 = (winSize-1)*(winSize-2)/2; 162 163 traceIndexContainer = new int[traceMaxSize]; 164 165 // CE: unused code. distAll is always false and both loops do the same??? 166 // CE v2.3 calls this Weight factors for trace extension 167 if(distAll ) { 168 for(int i=0; i<traceMaxSize; i++) 169 traceIndexContainer[i]=(i+1)*i*winSize*winSize/2+(i+1)*winSizeComb1; 170 } else { 171 for(int i=0; i<traceMaxSize; i++) { 172 traceIndexContainer[i]=(i+1)*i*winSize/2+(i+1)*winSizeComb1; 173 174 175 } 176 } 177 178 // verified: a[] is set correctly. 179 180 mat = initSumOfDistances(nse1, nse2, winSize, winSizeComb1, ca1, ca2); 181 182 183 184// try { 185// Matrix m2 = new Matrix(mat).copy(); 186// JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 187// JFrame frame = new JFrame(); 188// frame.addWindowListener(new WindowAdapter(){ 189// public void windowClosing(WindowEvent e){ 190// JFrame f = (JFrame) e.getSource(); 191// f.setVisible(false); 192// f.dispose(); 193// } 194// }); 195// 196// 197// frame.getContentPane().add(panel); 198// 199// frame.pack(); 200// frame.setVisible(true); 201// } catch (Exception e) { 202// e.printStackTrace(); 203// } 204 205 206 // Set the distance matrix 207 //afpChain.setDistanceMatrix(new Matrix(mat.clone())); 208 209 210 // 211 // double rmsdThr = params.getRmsdThr(); 212 // StringBuffer buf = new StringBuffer(" "); 213 // for(int i=0; i<nse2; i++) 214 // buf.append(String.format("%c", i%10==0?(i%100)/10+48:32)); 215 // buf.append("\n"); 216 // for(int i=0; i<nse1; i++) { 217 // buf.append(String.format("%c ", i%10==0?(i%100)/10+48:32)); 218 // for(int j=0; j<nse2; j++) 219 // buf.append(String.format("%c", (mat[i][j])<rmsdThr?'+':'X')); 220 // //printf("%c", ((int)*(mat[i]+j)/40)>9?'*':((int)*(mat[i]+j)/40)+48); 221 // buf.append("\n"); 222 // } 223 // buf.append("\n"); 224 // 225 // System.out.println(buf.toString()); 226 // 227 228 229 return afpChain; 230 } 231 232 /** 233 * Evaluates the distance between two atoms 234 * Several scoring functions are implemented and can be changed by calling 235 * {@link CeParameters#setScoringStrategy(Integer) setScoringStrategy()} 236 * on {@link CeParameters parameter} object this CECalculator was created with. 237 * <p> 238 * Scoring Strategies:<dl> 239 * <dt>DEFAULT_SCORING_STRATEGY</dt> 240 * <dd>Strategy of the original CE publication; CA-CA distance</dd> 241 * 242 * <dt>SIDE_CHAIN_SCORING</dt> 243 * <dd>CB-CB distance. This performs better for sheets and helices than CA.</dd> 244 * 245 * <dt>SIDE_CHAIN_ANGLE_SCORING</dt> 246 * <dd>Use the dot product (eg the cosine) of the two CA-CB vectors.</dd> 247 * 248 * <dt>CA_AND_SIDE_CHAIN_ANGLE_SCORING</dt> 249 * <dd>Equivalent to DEFAULT_SCORING_STRATEGY + SIDE_CHAIN_ANGLE_SCORING</dd> 250 * </dl> 251 * 252 * <dt>SEQUENCE_CONSERVATION</dt> 253 * <dd>A mix between the DEFAULT_SCORING_STRATEGY and a scoring function that favors the alignment of sequence conserved positions in the alignment</dd> 254 * </dl> 255 * 256 * 257 * 258 * @param ca1 The CA of the first residue 259 * @param ca2 The CA of the second residue 260 * @return The distance between the two fragments, according to the selected 261 * scoring strategy. Lower distances are better alignments. 262 * @throws StructureException 263 */ 264 private double getDistanceWithSidechain(Atom ca1, Atom ca2) throws StructureException { 265 266 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_SCORING) { 267 268 return Calc.getDistance(ca1,ca2); 269 270 } 271 272 double dist; 273 Group g1 = ca1.getGroup(); 274 Atom cb1 = null; 275 if ( g1.hasAtom(StructureTools.CB_ATOM_NAME)) { 276 cb1 = g1.getAtom(StructureTools.CB_ATOM_NAME); 277 } 278 // 279 Group g2 = ca2.getGroup(); 280 Atom cb2 = null; 281 if ( g2.hasAtom(StructureTools.CB_ATOM_NAME)) { 282 cb2 = g2.getAtom(StructureTools.CB_ATOM_NAME); 283 } 284 285 286 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_SCORING) { 287 288 289 // here we are using side chain orientation for scoring... 290 291 292 // score type 1 consider side chain distances 293 if ( cb1 != null && cb2 != null) { 294 // CB distance 295 dist = Calc.getDistance(cb1,cb2); 296 //dist = dist / 2.; 297 } else { 298 dist = Calc.getDistance(ca1,ca2); 299 } 300 301 return dist; 302 } 303 304 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_ANGLE_SCORING){ 305 306 // score type 2 add angle info 307 308 309 if ( cb1 != null && cb2 != null) { 310 // If the CA were overlaid, what is the distance between the CB? 311 // Recall c^2 = a^2 + b^2 -2ab*cos(theta), so this is a function of angle 312 Atom c1 = Calc.subtract(cb1, ca1); 313 Atom c2 = Calc.subtract(cb2, ca2); 314 Atom newA = Calc.subtract(c2, c1); 315 dist = Calc.amount(newA); 316 } else { 317 //dist += Calc.getDistance(ca1,ca2); 318 dist = 0; 319 } 320 321 return dist; 322 323 } 324 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_AND_SIDE_CHAIN_ANGLE_SCORING){ 325 326 // score type 3 327 // CA distance + cos(angle) 328 dist = 0; 329 if ( cb1 != null && cb2 != null) { 330 Atom cacb1 = Calc.subtract(cb1, ca1); 331 Atom cacb2 = Calc.subtract(cb2, ca2); 332 Atom newA = Calc.subtract(cacb2, cacb1); 333 //System.out.format("CACB 1: %s\nCACB 2: %s\ndiff: %s\nd: %f\n",cacb1.toString(),cacb2.toString(),newA.toString(),Calc.amount(newA)); 334 dist += Calc.amount(newA); 335 } 336 dist += Calc.getDistance(ca1,ca2); 337 338 return dist; 339 340 } else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 341 if ( cb1 != null && cb2 != null) { 342 // CB distance 343 dist = Calc.getDistance(cb1,cb2); 344 //dist = dist / 2.; 345 } else { 346 dist = Calc.getDistance(ca1,ca2); 347 } 348 return dist; 349 350 351 } 352 else { 353 // unsupported scoring scheme 354 return Calc.getDistance(ca1,ca2); 355 } 356 } 357 358 /** build up intramolecular distance matrix dist1 & dist2 359 * 360 * @param ca 361 * @param nse 362 * @return 363 * @throws StructureException 364 */ 365 private double[][] initIntraDistmatrix(Atom[] ca, int nse) throws StructureException 366 { 367 368 369 double[][] intraDist = new double[nse][nse]; 370 371 // 372 for(int ise1=0; ise1<nse; ise1++) { 373 374 for(int ise2=0; ise2<nse; ise2++) { 375 intraDist[ise1][ise2] = getDistanceWithSidechain(ca[ise1], ca[ise2]); 376 377 } 378 } 379 return intraDist; 380 } 381 382 383 public double[][] initSumOfDistances(int nse1, int nse2, int winSize, int winSizeComb1, Atom[] ca1, Atom[] ca2) { 384 385 double d; 386 387 double[][] mat = new double[nse1][nse2]; 388 389 // init the initial mat[] array. 390 // at this stage mat contains the sum of the distances of fragments of the matrices dist1, dist 391 for(int ise1=0; ise1<nse1; ise1++) { 392 for(int ise2=0; ise2<nse2; ise2++) { 393 394 mat[ise1][ise2]=-1.0; 395 396 if(ise1>nse1-winSize || ise2>nse2-winSize) continue; 397 398 d=0.0; 399 // this sums up over the distances of the fragments 400 for(int is1=0; is1<winSize-2; is1++) 401 for(int is2=is1+2; is2<winSize; is2++) { 402 //System.out.println("pos1 :" + (ise1+is1) + " " + (ise1+is2) + " " + (ise2+is1) + " " + (ise2+is2)); 403 // is this abs or floor? check! 404 d+=Math.abs(dist1[ise1+is1][ise1+is2]-dist2[ise2+is1][ise2+is2]); 405 } 406 mat[ise1][ise2]=d/winSizeComb1; 407 408 //System.out.println("mat ["+ise1+"]["+ise2+"]="+mat[ise1][ise2]); 409 } 410 411 } 412 413 // verified: mat[][] probably ok. 414 415 return mat; 416 } 417 418 419 420 421 422 423 @SuppressWarnings("unused") 424 public void traceFragmentMatrix( AFPChain afpChain, 425 Atom[] ca1, Atom[] ca2) { 426 427 double rmsdThr = params.getRmsdThr(); 428 429 430 double oldBestTraceScore=10000.0; 431 bestTraceScore = 100.0; 432 nBestTrace=0; 433 int nBestTrace0 = 0; 434 int winSize = params.getWinSize(); 435 int winSizeComb1=(winSize-1)*(winSize-2)/2; 436 boolean distAll = false; 437 438 int winSizeComb2=distAll?winSize*winSize:winSize; 439 double rmsdThrJoin = params.getRmsdThrJoin(); 440 441 double z0; 442 443 //double bestTraceZScore=-1.0; 444 445 int nse1 = ca1.length; 446 int nse2 = ca2.length; 447 448 //System.out.println("nse1 :" +nse1 + " nse2: " + nse2); 449 450 int traceMaxSize=nse1<nse2?nse1:nse2; 451 452 bestTrace1 = new int [traceMaxSize]; 453 bestTrace2 = new int [traceMaxSize]; 454 trace1 = new int [traceMaxSize]; 455 trace2 = new int [traceMaxSize]; 456 457 int[] traceIndex = new int [traceMaxSize]; 458 int[] traceIterLevel = new int [traceMaxSize]; 459 460 int ise11; 461 int ise12; 462 int ise21; 463 int ise22; 464 465 int ise1; 466 int ise2; 467 468 int gapMax=params.getMaxGapSize(); 469 470 int iterDepth; 471 if ( gapMax > 0){ 472 iterDepth =gapMax*2+1; 473 } else { 474 iterDepth = traceMaxSize; 475 } 476 double[][] traceScore = new double[traceMaxSize][iterDepth]; 477 478 nTraces =0; 479 long tracesLimit=(long)5e7; 480 double score =-1; 481 double score0 = -1; 482 double score1 = -1 ; 483 double score2 = -1; 484 485 int mse1; 486 int mse2; 487 int jgap; 488 int jdir; 489 int jse1=0; 490 int jse2=0; 491 492 int bestTracesMax=30; 493 bestTraces1 = new int[bestTracesMax][traceMaxSize]; 494 bestTraces2 = new int[bestTracesMax][ traceMaxSize]; 495 bestTracesN=new int [bestTracesMax]; 496 bestTracesScores = new double [bestTracesMax]; 497 for(int it=0; it<bestTracesMax; it++) { 498 bestTracesN[it]=0; 499 bestTracesScores[it]=100; 500 } 501 502 nBestTraces=0; 503 int newBestTrace=0; 504 505 double traceTotalScore=0; 506 double traceScoreMax =0; 507 double userRMSDMax = params.getMaxOptRMSD(); 508 int kse1; 509 int kse2; 510 511 iterLoop: 512 for(int iter=0; iter<nIter; iter++) { 513 514 if(iter>2) { 515 if(oldBestTraceScore<=bestTraceScore) break; 516 } 517 oldBestTraceScore=bestTraceScore; 518 519 if(iter==1) { 520 z0=zStrAlign(winSize, nBestTrace, bestTraceScore, 521 bestTrace1[nBestTrace]+winSize-bestTrace1[0]+ 522 bestTrace2[nBestTrace]+winSize-bestTrace2[0]- 523 nBestTrace*2*winSize); 524 if(z0<zThr) break; 525 nBestTrace0=nBestTrace; 526 nBestTrace=0; 527 bestTraceScore=100.0; 528 529 nTraces=0; 530 } 531 532 533 if(iter==0) { 534 ise11=0; ise12=nse1; 535 ise21=0; ise22=nse2; 536 537 } 538 else { 539 if(iter==1) { 540 ise11=bestTrace1[0]; ise12=bestTrace1[0]+1; 541 ise21=bestTrace2[0]; ise22=bestTrace2[0]+1; 542 } 543 else { 544 ise11=bestTrace1[0]-1; ise12=bestTrace1[0]+2; 545 ise21=bestTrace2[0]-1; ise22=bestTrace2[0]+2; 546 } 547 if(ise11<0) ise11=0; 548 if(ise12>nse1) ise12=nse1; 549 if(ise21<0) ise21=0; 550 if(ise22>nse2) ise22=nse2; 551 } 552 553 //System.out.println("ise1Loop: " + ise11 + " " + ise12 + " " + ise21 + " " + ise22); 554 ise1Loop: 555 for(int ise1_=ise11; ise1_<ise12; ise1_++) { 556 ise2Loop: 557 for(int ise2_=ise21; ise2_<ise22; ise2_++) { 558 559 ise1=ise1_; 560 ise2=ise2_; 561 if(iter>1 && ise1==ise11+1 && ise2==ise21+1) continue ise2Loop; 562 563 //if(ise2==ise21) System.out.println(String.format("(%d, %d)",ise1, nTraces)); 564 565 566 if(iter==0 && (ise1>nse1-winSize*(nBestTrace-1) || 567 ise2>nse2-winSize*(nBestTrace-1))) continue ise2Loop; 568 569 if(mat[ise1][ise2]<0.0) continue ise2Loop; 570 if(mat[ise1][ise2]>rmsdThr) continue ise2Loop; 571 if (mat[ise1][ise2]>userRMSDMax) continue ise2Loop; 572 nTrace=0; 573 trace1[nTrace]=ise1; 574 trace2[nTrace]=ise2; 575 traceIndex[nTrace]=0; 576 traceIterLevel[nTrace]=0; 577 578 score0=mat[ise1][ise2]; 579 580 581 nTrace++; 582 boolean isTraceUp=true; 583 int traceIndex_=0; 584 585 traceLoop: 586 while(nTrace>0) { 587 588 kse1=trace1[nTrace-1]+winSize; 589 kse2=trace2[nTrace-1]+winSize; 590 591 //System.out.println("isTraceUp " + isTraceUp + " " + nTrace + " " + kse1 + " " + kse2); 592 593 while(true) { 594 if(kse1>nse1-winSize-1) break; 595 if(kse2>nse2-winSize-1) break; 596 if(mat[kse1][kse2]>=0.0) break; 597 kse1++; 598 kse2++; 599 } 600 601 602 traceIndex_=-1; 603 604 if(isTraceUp) { 605 606 int nBestExtTrace=nTrace; 607 double bestExtScore=100.0; 608 609 610 // extension of the alignment path 611 // condition 4, 5 612 itLoop: 613 for(int it=0; it<iterDepth; it++) { 614 615 jgap=(it+1)/2; 616 jdir=(it+1)%2; 617 618 if(jdir==0) { 619 mse1=kse1+jgap; 620 mse2=kse2; 621 } 622 else { 623 mse1=kse1; 624 mse2=kse2+jgap; 625 } 626 627 if(mse1>nse1-winSize-1) continue itLoop; 628 if(mse2>nse2-winSize-1) continue itLoop; 629 630 if(mat[mse1][mse2]<0.0) continue itLoop; 631 if(mat[mse1][mse2]>rmsdThr) continue itLoop; 632 if(mat[mse1][mse2]>userRMSDMax) continue itLoop; 633 634 nTraces++; 635 if(nTraces>tracesLimit) { 636 637 return; 638 } 639 640 score=0.0; 641 642 // if(!distAll) { 643 //System.out.println("getting score " + mse1 + " " + mse2 + " " + winSize + " " + jgap + " " + jdir + " " + it + " " + kse1 + " " + kse2); 644 score = getScoreFromDistanceMatrices(mse1,mse2,winSize); 645 //System.out.println("got score: " + score); 646 score1=score/(nTrace*winSize); 647 648 // } else { 649 // // all dist 650 // for(int itrace=0; itrace<nTrace; itrace++) { 651 // for(int is1=0; is1<winSize; is1++) 652 // for(int is2=0; is2<winSize; is2++) 653 // score+=Math.abs(dist1[trace1[itrace]+is1][mse1+is2]- 654 // dist2[trace2[itrace]+is1][mse2+is2]); 655 // } 656 // score1=score/(nTrace*winSize*winSize); 657 // } 658 659 660 //System.out.println("up: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + winSize + " " + traceIndex_ + " " + it + " "); 661 if(score1>rmsdThrJoin) 662 continue itLoop; 663 if(score1>userRMSDMax) 664 continue itLoop; 665 score2=score1; 666 667 // this just got checked, no need to check again.. 668 //if(score2>rmsdThrJoin) 669 // continue itLoop; 670 671 if(nTrace>nBestExtTrace || (nTrace==nBestExtTrace && 672 score2<bestExtScore)) { 673 //System.out.println("setting traceindex to " + it + " " + score2); 674 bestExtScore=score2; 675 nBestExtTrace=nTrace; 676 traceIndex_=it; 677 traceScore[nTrace-1][traceIndex_]=score1; 678 } 679 680 } 681 } 682 683 if(traceIndex_!=-1) { 684 jgap=(traceIndex_+1)/2; 685 jdir=(traceIndex_+1)%2; 686 if(jdir==0) { 687 jse1=kse1+jgap; 688 jse2=kse2; 689 } 690 else { 691 jse1=kse1; 692 jse2=kse2+jgap; 693 } 694 695 if(iter==0){ 696 697 score1=(traceScore[nTrace-1][traceIndex_]*winSizeComb2*nTrace+ 698 mat[jse1][jse2]*winSizeComb1)/(winSizeComb2*nTrace+ 699 winSizeComb1); 700 701 score2 = getScore2(jse1, jse2, traceScore, traceIndex_, traceIndex, winSizeComb1, winSizeComb2, score0, score1); 702 703 if(score2>rmsdThrJoin) 704 traceIndex_=-1; 705 else if ( score2 > userRMSDMax) 706 traceIndex_=-1; 707 else { 708 traceScore[nTrace-1][traceIndex_]=score2; 709 710 traceTotalScore=score2; 711 } 712 713 } 714 else { 715 if(traceScoreMax>rmsdThrJoin && nBestTrace>=nBestTrace0) 716 traceIndex_=-1; 717 traceTotalScore=traceScoreMax; 718 } 719 } 720 721 //System.out.println("middle: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + score2 + " " + traceIndex_); 722 723 if(traceIndex_==-1) { 724 //System.out.println("continue traceLoop " + nTrace); 725 //if(iterLevel==1) break; 726 nTrace--; 727 isTraceUp=false; 728 continue traceLoop; 729 } 730 else { 731 traceIterLevel[nTrace-1]++; 732 trace1[nTrace]=jse1; 733 trace2[nTrace]=jse2; 734 traceIndex[nTrace]=traceIndex_; 735 traceIterLevel[nTrace]=0; 736 nTrace++; 737 isTraceUp=true; 738 739 if(nTrace>nBestTrace || 740 (nTrace==nBestTrace && 741 bestTraceScore>traceTotalScore)) { 742 743 for(int itrace=0; itrace<nTrace; itrace++) { 744 bestTrace1[itrace]=trace1[itrace]; 745 bestTrace2[itrace]=trace2[itrace]; 746 } 747 bestTraceScore=traceTotalScore; 748 nBestTrace=nTrace; 749 } 750 751 if(iter==0) { 752 //System.out.println("doing iter0 " + newBestTrace + " " + traceTotalScore + " " + bestTracesMax); 753 newBestTrace = doIter0(newBestTrace,traceTotalScore, bestTracesMax); 754 755 756 } 757 } 758 } 759 } 760 } 761 762 if ( isPrint) { 763 System.out.println("fragment length: " + params.getWinSize()); 764 System.out.println("ntraces : " + nTraces ); 765 } 766 767 768 769 } 770 771// try { 772// Matrix m2 = new Matrix(traceScore).copy(); 773// JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 774// JFrame frame = new JFrame(); 775// frame.addWindowListener(new WindowAdapter(){ 776// public void windowClosing(WindowEvent e){ 777// JFrame f = (JFrame) e.getSource(); 778// f.setVisible(false); 779// f.dispose(); 780// } 781// }); 782// 783// 784// frame.getContentPane().add(panel); 785// 786// frame.pack(); 787// frame.setVisible(true); 788// } catch (Exception e) { 789// e.printStackTrace(); 790// } 791 792 793 if ( params.isShowAFPRanges()){ 794 System.out.println("fragment length: " + params.getWinSize()); 795 System.out.println("ntraces : " + nTraces ); 796 797 } 798 799 } 800 801 protected double getScore2(int jse1, int jse2, double[][] traceScore, int traceIndex_,int[] traceIndex,int winSizeComb1, int winSizeComb2, double score0, double score1 ) { 802 803 804 805 /*double score2= 806 ((nTrace>1?traceScore[nTrace-2][traceIndex[nTrace-1]]:score0) 807 *a[nTrace-1]+score1*(a[nTrace]-a[nTrace-1]))/a[nTrace]; 808 */ 809 double val = 0; 810 if ( nTrace>1) 811 val =traceScore[nTrace-2][traceIndex[nTrace-1]]; 812 else 813 val = score0; 814 815 double score2 = (val * traceIndexContainer[nTrace-1]+score1*(traceIndexContainer[nTrace]-traceIndexContainer[nTrace-1]))/traceIndexContainer[nTrace]; 816 817 //System.out.println("check: score0 " + score0 + " score 1 " + score1 + " sc2: " + score2 + " val: " + val + " nTrace:" + nTrace+ " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace] ); 818 819 return score2; 820 821 822 } 823 824 protected int doIter0(int newBestTrace, double traceTotalScore, double bestTracesMax) { 825 826 827 // only do the whole method if this criteria is fulfilled... 828 if(nTrace>bestTracesN[newBestTrace] || 829 (nTrace==bestTracesN[newBestTrace] && 830 bestTracesScores[newBestTrace]>traceTotalScore)) { 831 832 833 for(int itrace=0; itrace<nTrace; itrace++) { 834 bestTraces1[newBestTrace][itrace]=trace1[itrace]; 835 bestTraces2[newBestTrace][itrace]=trace2[itrace]; 836 bestTracesN[newBestTrace]=nTrace; 837 bestTracesScores[newBestTrace]=traceTotalScore; 838 //System.out.println("bestTracesScrores ["+newBestTrace+"]=" +traceTotalScore); 839 } 840 841 if(nTrace>nBestTrace) nBestTrace=nTrace; 842 843 if(nBestTraces<bestTracesMax) { 844 nBestTraces++; 845 newBestTrace++; 846 } 847 848 if(nBestTraces==bestTracesMax) { 849 //System.out.println("nBestTraces == bestTracesmax " + nBestTraces); 850 newBestTrace=0; 851 double scoreTmp=bestTracesScores[0]; 852 int nTraceTmp=bestTracesN[0]; 853 for(int ir=1; ir<nBestTraces; ir++) { 854 if(bestTracesN[ir]<nTraceTmp || 855 (bestTracesN[ir]==nTraceTmp && 856 scoreTmp<bestTracesScores[ir])) { 857 nTraceTmp=bestTracesN[ir]; 858 scoreTmp=bestTracesScores[ir]; 859 newBestTrace=ir; 860 //System.out.println("setting new bestTracesScore to " + ir + " " + scoreTmp); 861 } 862 } 863 } 864 } 865 866 //System.out.println("iter0 : " + newBestTrace + " " + bestTracesN[newBestTrace] + " " + traceTotalScore + " " + nTrace); 867 868 869 870 /* 871z=zStrAlign(winSize, nTrace, traceTotalScore, 872trace1[nTrace-1]-trace1[0]+trace2[nTrace-1]-trace2[0]- 8732*(nTrace-1)*winSize); 874if(z>bestTraceZScore) { 875for(int itrace=0; itrace<nTrace; itrace++) { 876bestTrace1[itrace]=trace1[itrace]; 877bestTrace2[itrace]=trace2[itrace]; 878} 879bestTraceZScore=z; 880bestTraceScore=*(traceScore[nTrace-2]+traceIndex_); 881nBestTrace=nTrace; 882} 883 */ 884 return newBestTrace; 885 886 } 887 888 889 protected double getScoreFromDistanceMatrices(int mse1, int mse2,int winSize) { 890 891 double score = 0; 892 // (winSize) "best" dist 893 894 // reduce sign. values to C code.. 6 digits.. 895 896 for(int itrace=0; itrace<nTrace; itrace++) { 897 score+= Math.abs(dist1[trace1[itrace]][mse1]- 898 dist2[trace2[itrace]][mse2]); 899 900 score+= Math.abs(dist1[trace1[itrace]+winSize-1] 901 [mse1+winSize-1]- 902 dist2[trace2[itrace]+winSize-1][mse2+winSize-1]); 903 904 for(int id=1; id<winSize-1; id++) 905 score+= Math.abs(dist1[trace1[itrace]+id][mse1+winSize-1-id]- 906 dist2[trace2[itrace]+id][mse2+winSize-1-id]); 907 908 } 909 910 return score; 911 } 912 913 public void nextStep( AFPChain afpChain, 914 Atom[] ca1, Atom[] ca2) throws StructureException{ 915 916 917 if(nBestTrace>0) { 918 checkBestTraces(afpChain,ca1,ca2); 919 } else { 920 noBestTrace(); 921 } 922 923 convertAfpChain(afpChain, ca1, ca2); 924 AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2); 925 double tmScore = AFPChainScorer.getTMScore(afpChain, ca1, ca2,false); 926 afpChain.setTMScore(tmScore); 927 } 928 929 930 931 // this part is modified from the original CeCalculator 932 @SuppressWarnings("unused") 933 private void checkBestTraces( AFPChain afpChain, 934 Atom[] ca1, Atom[] ca2) throws StructureException{ 935 936 z=0.0; 937 938 939 int nGaps; 940 int winSize = params.getWinSize(); 941 int nse1 = ca1.length; 942 int nse2 = ca2.length; 943 int traceMaxSize=nse1<nse2?nse1:nse2; 944 int idir; 945 946 947 align_se1=new int [nse1+nse2]; 948 align_se2=new int [nse1+nse2]; 949 lcmp = 0; 950 951 // we now support alignment using any particular atoms.. 952 953 Atom[] strBuf1 = new Atom[traceMaxSize]; 954 Atom[] strBuf2 = new Atom[traceMaxSize]; 955 956 double rmsdNew; 957 958 959 960 // removing some loops that are run in orig CE 961 // and which did not do anything 962 if ( debug ){ 963 checkPrintRmsdNew(traceMaxSize, winSize, ca1, ca2); 964 } 965 966 double rmsd=100.0; 967 968 int iBestTrace=0; 969 970 for(int ir=0; ir<nBestTraces; ir++) { 971 if(bestTracesN[ir]!=nBestTrace) continue; 972 973 rmsdNew = getRMSDForBestTrace(ir, strBuf1, strBuf2, bestTracesN,bestTraces1, bestTrace2,winSize,ca1,ca2); 974 if ( isPrint) 975 System.out.println(String.format("%d %d %d %.2f", ir, bestTracesN[ir], nBestTrace, rmsdNew)); 976 977 if(rmsd>rmsdNew) { 978 iBestTrace=ir; 979 rmsd=rmsdNew; 980 //System.out.println(" iBestTrace:" + iBestTrace + " new rmsd = " + rmsd); 981 } 982 } 983 for(int it=0; it<bestTracesN[iBestTrace]; it++) { 984 bestTrace1[it]=bestTraces1[iBestTrace][it]; 985 bestTrace2[it]=bestTraces2[iBestTrace][it]; 986 } 987 988 //System.out.println("iBestTrace: "+iBestTrace+" = bestTracesScores " + bestTracesScores[iBestTrace]); 989 990 nBestTrace=bestTracesN[iBestTrace]; 991 992 bestTraceScore=bestTracesScores[iBestTrace]; 993 994 995 //printf("\nOptimizing gaps...\n"); 996 997 int[] traceLen=new int [traceMaxSize]; 998 bestTraceLen=new int [traceMaxSize]; 999 1000 1001 int strLen=0; 1002 1003 int jt; 1004 strLen=0; 1005 nGaps=0; 1006 nTrace=nBestTrace; 1007 1008 for(jt=0; jt<nBestTrace; jt++) { 1009 trace1[jt]=bestTrace1[jt]; 1010 trace2[jt]=bestTrace2[jt]; 1011 traceLen[jt]=winSize; 1012 1013 if(jt<nBestTrace-1) { 1014 nGaps+=bestTrace1[jt+1]-bestTrace1[jt]-winSize+ 1015 bestTrace2[jt+1]-bestTrace2[jt]-winSize; 1016 } 1017 } 1018 nBestTrace=0; 1019 for(int it=0; it<nTrace; ) { 1020 int cSize=traceLen[it]; 1021 for(jt=it+1; jt<nTrace; jt++) { 1022 if(trace1[jt]-trace1[jt-1]-traceLen[jt-1]!=0 || 1023 trace2[jt]-trace2[jt-1]-traceLen[jt-1]!=0) break; 1024 cSize+=traceLen[jt]; 1025 } 1026 bestTrace1[nBestTrace]=trace1[it]; 1027 bestTrace2[nBestTrace]=trace2[it]; 1028 bestTraceLen[nBestTrace]=cSize; 1029 nBestTrace++; 1030 strLen+=cSize; 1031 it=jt; 1032 } 1033 1034 1035 int is=0; 1036 for(jt=0; jt<nBestTrace; jt++) { 1037 for(int i=0; i<bestTraceLen[jt]; i++) { 1038 setStrBuf(strBuf1,is+i,ca1,bestTrace1[jt]+i ); 1039 setStrBuf(strBuf2,is+i,ca2,bestTrace2[jt]+i); 1040 } 1041 is+=bestTraceLen[jt]; 1042 } 1043 //sup_str(strBuf1, strBuf2, strLen, d_); 1044 1045 rmsd=calc_rmsd(strBuf1, strBuf2, strLen,true); 1046 1047 if ( isPrint) 1048 System.out.println("got first rmsd: " + rmsd); 1049 boolean isCopied=false; 1050 1051 outer_loop: 1052 for(int it=1; it<nBestTrace; it++) { 1053 1054 /* not needed... 1055 int igap; 1056 if(bestTrace1[it]-bestTrace1[it-1]-bestTraceLen[it-1]>0) igap=0; 1057 if(bestTrace2[it]-bestTrace2[it-1]-bestTraceLen[it-1]>0) igap=1; 1058 */ 1059 1060 1061 boolean wasBest=false; 1062 main_loop: 1063 for(idir=-1; idir<=1; idir+=2) { 1064 if(wasBest) break; 1065 1066 inner_loop: 1067 for(int idep=1; idep<=winSize/2; idep++) { 1068 1069 // isCopied indicates that bestTrace has changed and needs to be re-copied 1070 if(!isCopied) 1071 for(jt=0; jt<nBestTrace; jt++) { 1072 trace1[jt]=bestTrace1[jt]; 1073 trace2[jt]=bestTrace2[jt]; 1074 traceLen[jt]=bestTraceLen[jt]; 1075 } 1076 isCopied=false; 1077 1078 // Move an atom from the previous trace to the current on, or vice versa 1079 traceLen[it-1]+=idir; 1080 traceLen[it]-=idir; 1081 trace1[it]+=idir; 1082 trace2[it]+=idir; 1083 1084 // Copy atoms from the current trace into strBuf 1085 is=0; 1086 for(jt=0; jt<nBestTrace; jt++) { 1087 for(int i=0; i<traceLen[jt]; i++) { 1088 if(ca1[trace1[jt]+i].getX()>1e10 || ca2[trace2[jt]+i].getX()>1e10) 1089 continue main_loop; 1090 strBuf1[is+i]=ca1[trace1[jt]+i]; 1091 strBuf2[is+i]=ca2[trace2[jt]+i]; 1092 } 1093 is+=traceLen[jt]; 1094 } 1095 // Check new RMSD 1096 //sup_str(strBuf1, strBuf2, strLen, d_); 1097 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen, true); 1098 //System.out.println(String.format("step %d %d %d %.2f old: %.2f", it, idir, idep, rmsdNew, rmsd)); 1099 1100 // Update best trace if RMSD improved 1101 if(rmsdNew<rmsd) { 1102 1103 for(jt=0; jt<nBestTrace; jt++) { 1104 bestTrace1[jt] = trace1[jt]; 1105 bestTrace2[jt] = trace2[jt]; 1106 bestTraceLen[jt]= traceLen[jt]; 1107 } 1108 isCopied=true; 1109 wasBest=true; 1110 rmsd=rmsdNew; 1111 continue inner_loop; 1112 } 1113 // AP 1114 //bad_ca: break; 1115 continue main_loop; 1116 } 1117 } 1118 } 1119 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen,true); 1120 if ( isPrint) 1121 System.out.println("rmsdNew: " + rmsdNew + " rmsd " + rmsd); 1122 afpChain.setTotalRmsdIni(rmsdNew); 1123 afpChain.setTotalLenIni(strBuf1.length); 1124 1125 1126 nAtom = strLen; 1127 1128 //System.out.println("zStrAlign: " + winSize + " strLen " + strLen + " s/w " + (strLen/winSize) + " " + bestTraceScore + " " + nGaps); 1129 z=zStrAlign(winSize, strLen/winSize, bestTraceScore, nGaps); 1130 1131 if(params.isShowAFPRanges()) { 1132 System.out.println("win size: " + winSize + " strLen/winSize: " + strLen/winSize + " best trace score: " + String.format("%.2f",bestTraceScore) + " nr gaps: " + nGaps + " nr residues: " + nAtom); 1133 1134 System.out.println(String.format("size=%d rmsd=%.2f z=%.1f gaps=%d(%.1f%%) comb=%d", 1135 nAtom, rmsd, z, nGaps, nGaps*100.0/nAtom, 1136 nTraces)); 1137 1138 System.out.println("Best Trace, before optimization"); 1139 for(int k=0; k<nBestTrace; k++) 1140 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1141 bestTraceLen[k])); 1142 1143 } 1144 1145 // start to convert CE internal datastructure to generic AFPChain one... 1146 List<AFP> afpSet = new ArrayList<>(); 1147 for (int afp=0;afp<nBestTrace;afp++){ 1148 // fill in data from nBestTrace into AFP 1149 1150 AFP afpI = new AFP(); 1151 1152 afpI.setFragLen(bestTraceLen[afp]); 1153 afpI.setP1(bestTrace1[afp]+1); 1154 afpI.setP2(bestTrace2[afp]+1); 1155 1156 afpSet.add( afpI); 1157 } 1158 1159 afpChain.setAfpSet(afpSet); 1160 1161 //System.out.println("z:"+z + " zThr" + zThr+ " bestTraceScore " + bestTraceScore + " " + nGaps ); 1162 if(params.isOptimizeAlignment() && z>=zThr) { 1163 nGaps = optimizeSuperposition(afpChain,nse1, nse2, strLen, rmsd, ca1, ca2,nGaps,strBuf1,strBuf2); 1164 // if(isPrint) { 1165 // /* 1166 // FILE *f=fopen("homologies", "a"); 1167 // fprintf(f, "%s(%d) %s(%d) %3d %4.1f %4.1f %d(%d) ", 1168 // name1, nse1, name2, nse2, nAtom, rmsd, z, 1169 // nGaps, nGaps*100/nAtom); 1170 // for(int k=0; k<nBestTrace; k++) 1171 // fprintf(f, "(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1172 // bestTraceLen[k]); 1173 // fprintf(f, "\n"); 1174 // fclose(f); 1175 // */ 1176 // } 1177 } 1178 else { 1179 int lali_x_ = 0; 1180 for(int k=0; k<nBestTrace; k++) { 1181 for(int l=0; l<bestTraceLen[k]; l++) { 1182 align_se1[lcmp+l]=bestTrace1[k]+l; 1183 align_se2[lcmp+l]=bestTrace2[k]+l; 1184 } 1185 lali_x_+=bestTraceLen[k]; 1186 lcmp+=bestTraceLen[k]; 1187 if(k<nBestTrace-1) { 1188 if(bestTrace1[k]+bestTraceLen[k]!=bestTrace1[k+1]) 1189 for(int l=bestTrace1[k]+bestTraceLen[k]; l<bestTrace1[k+1]; l++) { 1190 align_se1[lcmp]=l; 1191 align_se2[lcmp]=-1; 1192 lcmp++; 1193 } 1194 if(bestTrace2[k]+bestTraceLen[k]!=bestTrace2[k+1]) 1195 for(int l=bestTrace2[k]+bestTraceLen[k]; l<bestTrace2[k+1]; l++) { 1196 align_se1[lcmp]=-1; 1197 align_se2[lcmp]=l; 1198 lcmp++; 1199 } 1200 } 1201 } 1202 nAtom=lali_x_; 1203 afpChain.setTotalRmsdOpt(afpChain.getTotalRmsdIni()); 1204 } 1205 1206 timeEnd = System.currentTimeMillis(); 1207 long time_q=(timeEnd-timeStart); 1208 1209 double gapsP = ( nGaps*100.0/nAtom) ; 1210 if(isPrint) { 1211 String msg = String.format("Alignment length = %d Rmsd = %.2fA Z-Score = %.1f Gaps = %d(%.1f%%)",nAtom,rmsd,z,nGaps, gapsP); 1212 System.out.println(msg + " CPU = " + time_q); 1213 } 1214 1215 // if ( params.isShowAFPRanges()){ 1216 1217 // this is actually the final alignment... 1218 // System.out.println("Best Trace: (index1,index2,len)"); 1219 // for(int k=0; k<nBestTrace; k++) 1220 // System.out.println( 1221 // String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, bestTraceLen[k])); 1222 // 1223 // 1224 // 1225 // } 1226 1227 afpChain.setCalculationTime(time_q); 1228 afpChain.setGapLen(nGaps); 1229 1230 int[] optLen = new int[]{nAtom}; 1231 afpChain.setOptLen(optLen); 1232 afpChain.setOptLength(nAtom); 1233 afpChain.setAlnLength(lcmp); 1234 1235 afpChain.setProbability(z); 1236 1237 1238 } 1239 1240 /** set the Atoms for a particular residue position. 1241 * Requires that atom.getParent returns the correct group! 1242 * take care during cloning of atoms. Best to use StructureTools.cloneCaAtoms(); 1243 * 1244 * @param strBuf 1245 * @param i 1246 * @param ca 1247 * @param j 1248 */ 1249 private void setStrBuf(Atom[] strBuf, int i, Atom[] ca, int j) { 1250 // TODO Auto-generated method stub 1251 //TODO 1252 Group parent = ca[j].getGroup(); 1253 int pos = 0; 1254 String atomName = ca[j].getName(); 1255 1256 Atom a = null; 1257 1258 a= parent.getAtom(atomName); 1259 if ( a != null){ 1260 strBuf[i]=a; 1261 } else { 1262 // probably a GLY and no CB was found... 1263 //e.printStackTrace(); 1264 } 1265 strBuf[i+pos] = a; 1266 pos++; 1267 1268 1269 1270 } 1271 1272 // TODO: consider all requested Atoms? 1273 private double getRMSDForBestTrace(int ir, Atom[] strBuf1, Atom[] strBuf2, 1274 int[] bestTracesN2, int[][] bestTraces12, int[] bestTrace22, 1275 int winSize,Atom[] ca1, Atom[] ca2 ) throws StructureException { 1276 int is=0; 1277 for(int jt=0; jt<bestTracesN[ir]; jt++) { 1278 for(int i=0; i<winSize; i++) { 1279 1280 setStrBuf(strBuf1, is+i, ca1, bestTraces1[ir][jt]+i); 1281 setStrBuf(strBuf2, is+i, ca2, bestTraces2[ir][jt]+i); 1282 } 1283 is+=winSize; 1284 } 1285 //sup_str(strBuf1, strBuf2, bestTracesN[ir]*winSize, d_); 1286 double rmsdNew=calc_rmsd(strBuf1, strBuf2, bestTracesN[ir]*winSize, true); 1287 return rmsdNew; 1288 1289 } 1290 1291 1292 1293 1294 1295 1296 /** calc initial RMSD for bestTrace1 in debug only 1297 * 1298 */ 1299 private void checkPrintRmsdNew(int traceMaxSize, int winSize, Atom[] ca1, Atom[] ca2) throws StructureException{ 1300 1301 int is = 0; 1302 // calc initial RMSD for bestTrace1 1303 Atom[] strBuf1 = new Atom[traceMaxSize]; 1304 Atom[] strBuf2 = new Atom[traceMaxSize]; 1305 for(int jt=0; jt<nBestTrace; jt++) { 1306 for(int i=0; i<winSize; i++) { 1307 setStrBuf(strBuf1, is+i, ca1, bestTrace1[jt]+i ); 1308 setStrBuf(strBuf2, is+i, ca2, bestTrace2[jt]+i ); 1309 } 1310 is+=winSize; 1311 } 1312 1313 //sup_str(strBuf1, strBuf2, nBestTrace*winSize, d_); 1314 double rmsdNew=calc_rmsd(strBuf1, strBuf2, nBestTrace*winSize, true); 1315 //afpChain.setTotalRmsdIni(rmsdNew); 1316 1317 1318 if ( isPrint){ 1319 System.out.println("rmsdNew after trace: " +rmsdNew); 1320 1321 for(int k=0; k<nBestTrace; k++) 1322 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1,8)); 1323 } 1324 1325 if ( isPrint){ 1326 System.out.println("best traces: " + nBestTraces); 1327 } 1328 1329 1330 } 1331 1332 1333 1334 1335 1336 1337 private static char getOneLetter(Group g){ 1338 1339 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 1340 1341 return StructureTools.get1LetterCode(g.getPDBName()); 1342 } 1343 1344 1345 1346 private int optimizeSuperposition(AFPChain afpChain, int nse1, int nse2, int strLen, double rmsd, Atom[] ca1, Atom[] ca2,int nGaps, 1347 Atom[] strBuf1, Atom[] strBuf2 ) throws StructureException { 1348 1349 //System.out.println("optimizing Superimposition..."); 1350 1351 //nAtom=strLen; 1352 // optimization on superposition 1353 Atom[] ca3=new Atom[nse2]; 1354 1355 1356 double rmsdLen = 0.0; 1357 1358 // this flag tests if the RMSDLen has been assigned. 1359 // this is to enforce that the alignment ends up 1360 // smaller than 95% of the original alignment. 1361 // +/- 1362 boolean isRmsdLenAssigned=false; 1363 int nAtomPrev=-1; 1364 1365 double oRmsdThr = params.getORmsdThr(); 1366 1367 double distanceIncrement = params.getDistanceIncrement(); 1368 double maxUserRMSD = params.getMaxOptRMSD(); 1369 nAtom=0; 1370 int counter = -1; 1371 1372 int maxNrIterations = params.getMaxNrIterationsForOptimization(); 1373 while((nAtom<strLen*0.95 || 1374 (isRmsdLenAssigned && rmsd<rmsdLen*1.1 && nAtomPrev!=nAtom)) && ( counter< maxNrIterations)) { 1375 1376 counter++; 1377 if ( debug) 1378 System.out.println("nAtom: " + nAtom + " " + nAtomPrev + " " + rmsdLen + " " + isRmsdLenAssigned + " strLen:" + strLen); 1379 nAtomPrev=nAtom; 1380 oRmsdThr += distanceIncrement; 1381 1382 rot_mol(ca2, ca3, nse2, r,t); 1383 1384 for(int ise1=0; ise1<nse1; ise1++) { 1385 for(int ise2=0; ise2<nse2; ise2++) { 1386 1387 //mat[ise1][ise2]=-0.001; 1388 1389 // this needs to be a parameter... 1390 1391 1392 double dist = getDistanceWithSidechain(ca1[ise1], ca3[ise2]); 1393 mat[ise1][ise2] = oRmsdThr - dist; 1394 1395 //double distold = Calc.getDistance(ca1[ise1],ca3[ise2]); 1396 //double scoreOld = oRmsdThr - distold ; 1397 //mat[ise1][ise2] = scoreOld; 1398 //mat[ise1][ise2] = oRmsdThr - Calc.getDistance(ca1[ise1],ca3[ise2]); 1399 1400 //if ( counter == 0 && ise1 == ise2) { 1401 1402 // System.out.println("mat[" + ise1 + "][" + ise2 + "] " + mat[ise1][ise2] + " scoreOld:" + scoreOld + " oRmsdThr: " + oRmsdThr +" dist: " + dist + " distold:" + distold ); 1403 // } 1404 1405 1406 } 1407 } 1408 1409 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 1410 mat = updateMatrixWithSequenceConservation(mat,ca1,ca2, params); 1411 } 1412 1413 mat = notifyMatrixListener(mat); 1414 1415 double gapOpen = params.getGapOpen(); 1416 double gapExtension = params.getGapExtension(); 1417 1418 double score = dpAlign( nse1, nse2, gapOpen , gapExtension , false, false); 1419 1420 if (debug) 1421 System.out.println("iter: "+ counter + " score:" + score + " " + " nAtomPrev: " + nAtomPrev + " nAtom:" + nAtom + " oRmsdThr: " + oRmsdThr); 1422 1423 afpChain.setAlignScore(score); 1424 1425 1426 nAtom=0; nGaps=0; 1427 for(int ia=0; ia<lcmp; ia++) { 1428 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1429 1430 strBuf1[nAtom]=ca1[align_se1[ia]]; 1431 strBuf2[nAtom]=ca2[align_se2[ia]]; 1432 1433 nAtom++; 1434 1435 } 1436 else { 1437 nGaps++; 1438 } 1439 } 1440 1441 1442 1443 if(nAtom<4) continue; 1444 1445 //sup_str(strBuf1, strBuf2, nAtom, _d); 1446 // here we don't store the rotation matrix for the user! 1447 rmsd= calc_rmsd(strBuf1, strBuf2, nAtom,false); 1448 if ( isPrint ) 1449 System.out.println("iter: " + counter + " nAtom " + nAtom + " rmsd: " + rmsd); 1450 //afpChain.setTotalRmsdOpt(rmsd); 1451 //System.out.println("rmsd: " + rmsd); 1452 1453 if(!(nAtom<strLen*0.95) && (!isRmsdLenAssigned)) { 1454 rmsdLen=rmsd; 1455 isRmsdLenAssigned=true; 1456 } 1457 //System.out.println(String.format("nAtom %d %d rmsd %.1f", nAtom, nAtomPrev, rmsd)); 1458 1459 1460 afpChain.setBlockRmsd(new double[]{rmsd}); 1461 afpChain.setOptRmsd(new double[]{rmsd}); 1462 afpChain.setTotalRmsdOpt(rmsd); 1463 afpChain.setChainRmsd(rmsd); 1464 1465 if ( rmsd >= maxUserRMSD) { 1466 break; 1467 } 1468 1469 } 1470 1471 1472 1473 //System.out.println("done optimizing"); 1474 /* 1475 nAtom=0; nGaps=0; 1476 for(int ia=0; ia<lcmp; ia++) 1477 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1478 if(ca1[align_se1[ia]].X<1e10 && ca2[align_se2[ia]].X<1e10) { 1479 strBuf1[nAtom]=ca1[align_se1[ia]]; 1480 strBuf2[nAtom]=ca2[align_se2[ia]]; 1481 nAtom++; 1482 } 1483 } 1484 else { 1485 nGaps++; 1486 } 1487 1488 sup_str(strBuf1, strBuf2, nAtom, _d); 1489 rmsd=calc_rmsd(strBuf1, strBuf2, nAtom, _d); 1490 */ 1491 nBestTrace=0; 1492 boolean newBestTrace=true; 1493 for(int ia=0; ia<lcmp; ia++) { 1494 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1495 //System.out.println(" " +align_se1[ia] + " " + align_se2[ia]); 1496 1497 if(newBestTrace) { 1498 bestTrace1[nBestTrace]=align_se1[ia]; 1499 bestTrace2[nBestTrace]=align_se2[ia]; 1500 bestTraceLen[nBestTrace]=0; 1501 newBestTrace=false; 1502 nBestTrace++; 1503 } 1504 bestTraceLen[nBestTrace-1]++; 1505 1506 } 1507 else { 1508 newBestTrace=true; 1509 } 1510 } 1511 1512 return nGaps; 1513 1514 // end of optimization on superposition 1515 1516 } 1517 1518 /** Modifies an alignment matrix by favoring the alignment of similar and identical amino acids and penalizing the alignment of unrelated ones. 1519 * 1520 * @param max alignment matrix 1521 * @param ca1 Atoms for protein 1 1522 * @param ca2 Atoms for Protein 2 1523 * @param params alignment parameters 1524 * @return modified alignment matrix 1525 */ 1526 public static double[][] updateMatrixWithSequenceConservation(double[][] max, Atom[] ca1, Atom[] ca2, CeParameters params) { 1527 Matrix origM = new Matrix(max); 1528 1529 SubstitutionMatrix<AminoAcidCompound> substMatrix = 1530 params.getSubstitutionMatrix(); 1531 1532 int internalScale = 1; 1533 if ( substMatrix instanceof ScaledSubstitutionMatrix) { 1534 ScaledSubstitutionMatrix scaledMatrix = (ScaledSubstitutionMatrix) substMatrix; 1535 internalScale = scaledMatrix.getScale(); 1536 } 1537 1538 1539 AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 1540 1541 for (int i = 0 ; i < origM.getRowDimension() ; i++){ 1542 for ( int j =0; j < origM.getColumnDimension() ; j ++ ) { 1543 double val = origM.get(i,j); 1544 Atom a1 = ca1[i]; 1545 Atom a2 = ca2[j]; 1546 1547 AminoAcidCompound ac1 = 1548 set.getCompoundForString(a1.getGroup().getChemComp().getOneLetterCode()); 1549 AminoAcidCompound ac2 = 1550 set.getCompoundForString(a2.getGroup().getChemComp().getOneLetterCode()); 1551 1552 1553 if ( ac1 == null || ac2 == null) 1554 continue; 1555 1556 short aaScore = substMatrix.getValue(ac1,ac2); 1557 1558 double weightedScore = (aaScore / internalScale) * params.getSeqWeight(); 1559 1560 1561 val += weightedScore; 1562 origM.set(i,j,val); 1563 1564 } 1565 } 1566 max = origM.getArray(); 1567 1568 //SymmetryTools.showMatrix((Matrix)origM.clone(), "in optimizer " + loopCount ); 1569 //SymmetryTools.showMatrix(origM, "iteration matrix " + loopCount + " after"); 1570 1571 return max; 1572 } 1573 1574 private double[][] notifyMatrixListener(double[][] mat2) { 1575 for (MatrixListener li : matrixListeners) { 1576 mat2 = li.matrixInOptimizer(mat2); 1577 } 1578 return mat2; 1579 } 1580 1581 private boolean[][] notifyBreakFlagListener(boolean[][] brkFlag){ 1582 for (MatrixListener li : matrixListeners) { 1583 brkFlag = li.initializeBreakFlag(brkFlag); 1584 } 1585 return brkFlag; 1586 } 1587 1588 public void addMatrixListener(MatrixListener li){ 1589 matrixListeners.add(li); 1590 } 1591 1592 1593 /** 1594 * On input, mat[i][j] should give the score for aligning positions i and j. 1595 * On output, mat[i][j] gives the maximum score possible for aligning 1..i 1596 * of protein 1 with 1..j of protein 2. 1597 * 1598 * @param nSeq1 The length of protein 1 (mat.length) 1599 * @param nSeq2 The length of protein 2 (mat[0].length) 1600 * @param gapI gap initiation penalty 1601 * @param gapE gap extension penalty 1602 * @param isGlobal1 The alignment is global for protein 1 1603 * @param isGlobal2 The alignment is global for protein 2 1604 * @return The maximum score 1605 */ 1606 private double dpAlign(int nSeq1, int nSeq2, double gapI, double gapE, 1607 boolean isGlobal1, boolean isGlobal2) { 1608 1609 // isGlobal1,isGlobal2 are always false... 1610 1611 int i, j, is, js, iMax, jMax, k; 1612 boolean ge=(gapE!=0.0?true:false); 1613 double sum, sum_ret, sum_brk; 1614 1615 boolean[][] brk_flg=new boolean [nSeq1][nSeq2]; 1616 for(i=0; i<nSeq1; i++) brk_flg[i]=new boolean [nSeq2]; 1617 1618 brk_flg = notifyBreakFlagListener(brk_flg); 1619 1620 // ge = true here... 1621 /* 1622 for(i=0; i<nSeq1; i++) 1623 { 1624 printf("\n"); 1625 for(j=0; j<nSeq2; j++) 1626 { 1627 printf("%4d", (int)(*(mat[i]+j)*10)); 1628 } 1629 } 1630 printf("\n\n\n"); 1631 */ 1632 if(!ge) 1633 { 1634 for(i=nSeq1-1; i>=0; i--) 1635 for(j=nSeq2-1; j>=0; j--) 1636 { 1637 brk_flg[i][j]=false; 1638 if(j<nSeq2-1 && i<nSeq1-1) 1639 { 1640 sum=mat[i+1][j+1]; 1641 } 1642 else 1643 { 1644 sum=0.0; 1645 if((isGlobal1 && i!=nSeq1-1) || (isGlobal2 && j!=nSeq2-1)) 1646 sum=-gapI; 1647 } 1648 if(j+1<nSeq2) 1649 for(k=i+2; k<nSeq1; k++) 1650 { 1651 if(mat[k][j+1]-gapI>sum) 1652 sum=mat[k][j+1]-gapI; 1653 } 1654 if(i+1<nSeq1) 1655 for(k=j+2; k<nSeq2; k++) 1656 { 1657 if(mat[i+1][k]-gapI>sum) 1658 sum=mat[i+1][k]-gapI; 1659 } 1660 sum+=mat[i][j]; 1661 sum_brk=(isGlobal1?-gapI:0.0)+(isGlobal2?-gapI:0.0); 1662 if(sum<sum_brk) 1663 { 1664 sum=sum_brk; 1665 brk_flg[i][j]=true; 1666 //System.out.println("break at: " + i + " " + j); 1667 } 1668 mat[i][j]=sum; 1669 } 1670 } 1671 else 1672 { 1673 for(i=nSeq1-1; i>=0; i--) 1674 for(j=nSeq2-1; j>=0; j--) 1675 { 1676 brk_flg[i][j]=false; 1677 if(j<nSeq2-1 && i<nSeq1-1) 1678 { 1679 sum=mat[i+1][j+1]; 1680 } 1681 else 1682 { 1683 sum=0.0; 1684 if(isGlobal1 && i!=nSeq1-1) sum=-gapI-gapE*(nSeq1-i-1); 1685 if(isGlobal2 && j!=nSeq2-1) sum=-gapI-gapE*(nSeq2-j-1); 1686 } 1687 if(j+1<nSeq2) 1688 for(k=i+2; k<nSeq1; k++) 1689 if(mat[k][j+1]-gapI-gapE*(k-i-1)>sum) 1690 sum=mat[k][j+1]-gapI-gapE*(k-i-1); 1691 if(i+1<nSeq1) 1692 for(k=j+2; k<nSeq2; k++) 1693 if(mat[i+1][k]-gapI-gapE*(k-j-1)>sum) 1694 sum=mat[i+1][k]-gapI-gapE*(k-j-1); 1695 sum+=mat[i][j]; 1696 sum_brk=(isGlobal1?(-gapI-gapE*(nSeq1-1-i)):0.0)+(isGlobal2?(-gapI-gapE*(nSeq2-1-j)):0.0); 1697 if(sum<sum_brk) 1698 { 1699 sum=sum_brk; 1700 brk_flg[i][j]=true; 1701 } 1702 mat[i][j]=sum; 1703 } 1704 } 1705 1706 // if (debug ){ 1707 // ScaleableMatrixPanel smp = new ScaleableMatrixPanel(); 1708 // JFrame frame = new JFrame("CE alignment matrix in dpAlign " ); 1709 // frame.addWindowListener(new WindowAdapter(){ 1710 // public void windowClosing(WindowEvent e){ 1711 // JFrame f = (JFrame) e.getSource(); 1712 // f.setVisible(false); 1713 // f.dispose(); 1714 // } 1715 // 1716 // }); 1717 // 1718 // smp.getMatrixPanel().setScalevalue(100); 1719 // Matrix mx = (Matrix) new Matrix(mat).clone(); 1720 // smp.setMatrix(mx); 1721 // 1722 // frame.getContentPane().add(smp); 1723 // 1724 // frame.pack(); 1725 // frame.setVisible(true); 1726 // } 1727 1728 /* 1729 for(i=0; i<nSeq1; i++) 1730 { 1731 printf("\n"); 1732 for(j=0; j<nSeq2; j++) 1733 { 1734 printf("%4d", (int)(*(mat[i]+j)*10)); 1735 } 1736 } 1737 printf("\n\n\n"); 1738 for(i=0; i<nSeq1; i++) 1739 { 1740 printf("\n"); 1741 for(j=0; j<nSeq2; j++) 1742 { 1743 printf("%4d", (int)(*(brk_flg[i]+j))); 1744 } 1745 } 1746 // exit(0); 1747 */ 1748 1749 is=0; js=0; lcmp=0; 1750 // no nc-end penalty - begin 1751 sum_ret=mat[0][0]; 1752 1753 // look for the highest score in mat[i][j] 1754 for(i=0; i<nSeq1; i++) 1755 for(j=0; j<nSeq2; j++) 1756 { 1757 if(i==0 && j==0) continue; 1758 sum=mat[i][j]; 1759 if(isGlobal1) sum+=-gapI-gapE*i; 1760 if(isGlobal2) sum+=-gapI-gapE*j; 1761 if(sum>sum_ret) 1762 { 1763 sum_ret=sum; 1764 is=i; js=j; 1765 } 1766 } 1767 1768 //System.out.println("start at " + is + " " + js); 1769 //for(k=0; k<is; k++) align1[k]=-1; 1770 //for(k=0; k<js; k++) align2[k]=-1; 1771 // no nc-end penalty - end 1772 1773 for(i=is, j=js; i<nSeq1 && j<nSeq2; i++, j++) 1774 { 1775 iMax=i; jMax=j; 1776 sum=mat[i][j]; 1777 if(!ge) 1778 { 1779 for(k=i+1; k<nSeq1; k++) 1780 if(mat[k][j]-gapI>sum) 1781 { 1782 iMax=k; jMax=j; 1783 sum=mat[k][j]-gapI; 1784 } 1785 1786 for(k=j+1; k<nSeq2; k++) 1787 if(mat[i][k]-gapI>sum) 1788 { 1789 iMax=i; jMax=k; 1790 sum=mat[i][k]-gapI; 1791 } 1792 } 1793 else 1794 { 1795 for(k=i+1; k<nSeq1; k++) 1796 if(mat[k][j]-gapI-gapE*(k-i)>sum) 1797 { 1798 //System.out.println("gap1 " + k + " " + j + " " + sum + "<" +(mat[k][j]-gapI-gapE*(k-i))); 1799 iMax=k; jMax=j; 1800 sum=mat[k][j]-gapI-gapE*(k-i); 1801 } 1802 1803 for(k=j+1; k<nSeq2; k++) 1804 if(mat[i][k]-gapI-gapE*(k-j)>sum) 1805 { 1806 //System.out.println("gap2 " + i + " " + k + " " + sum + "<"+ (mat[i][k]-gapI-gapE*(k-j))); 1807 iMax=i; jMax=k; 1808 sum=mat[i][k]-gapI-gapE*(k-j); 1809 } 1810 } 1811 1812 //if ( i != iMax || j != jMax ) 1813 // System.out.println("FOUND GAP AT: " + i+ " " + iMax + " " + j + " " + jMax); 1814 1815 //System.out.println(" iMax " + iMax + " jMax " + jMax); 1816 // set the gap positions: 1817 //printf("%d %d\n", iMax, jMax); 1818 1819 1820 for(k=i; k<iMax; k++, i++) { 1821 align_se1[lcmp]=k; 1822 align_se2[lcmp]=-1; 1823 1824 1825 lcmp++; 1826 } 1827 1828 for(k=j; k<jMax; k++, j++) { 1829 align_se1[lcmp]=-1; 1830 align_se2[lcmp]=k; 1831 1832 1833 lcmp++; 1834 } 1835 1836 1837 align_se1[lcmp]=iMax; 1838 align_se2[lcmp]=jMax; 1839 lcmp++; 1840 1841 if(brk_flg[i][j]) { 1842 //System.out.println("hit break flag at: " + i + " " + j + " sum " + sum_ret + " lcmp " + lcmp); 1843 break; 1844 1845 } 1846 } 1847 1848 1849 return sum_ret; 1850 } 1851 1852 1853 private void rot_mol(Atom[] caA, Atom[] caB, int nse2, Matrix m , Atom shift) throws StructureException{ 1854 1855 for(int l=0; l<nse2; l++) { 1856 Atom a = caA[l]; 1857 Group g = (Group)a.getGroup().clone(); 1858 //Group g = (Group)a.getParent(); 1859 1860 Calc.rotate( g, m); 1861 Calc.shift( g, shift); 1862 caB[l] = g.getAtom(a.getName()); 1863 } 1864 } 1865 1866 //void rot_mol(XYZ *molA, XYZ *molB, int nAtom, double d_[20] ) { 1867 // double dx, dy, dz; 1868 // for(int l=0; l<nAtom; l++) { 1869 // if(molA[l].X<1e10) { 1870 // dx=molA[l].X; 1871 // dy=molA[l].Y; 1872 // dz=molA[l].Z; 1873 // molB[l].X=dx*d_[0]+dy*d_[1]+dz*d_[2]+d_[9]; 1874 // molB[l].Y=dx*d_[3]+dy*d_[4]+dz*d_[5]+d_[10]; 1875 // molB[l].Z=dx*d_[6]+dy*d_[7]+dz*d_[8]+d_[11]; 1876 // } 1877 // else { 1878 // molB[l]=molA[l]; 1879 // } 1880 // } 1881 // } 1882 // 1883 1884 /** superimpose and get rmsd 1885 * 1886 * @param pro1 1887 * @param pro2 1888 * @param strLen Number of atoms from pro1 and pro2 to use 1889 * @param storeTransform Store rotation and shift matrices locally 1890 * @return RMSD 1891 * @throws StructureException 1892 */ 1893 public double calc_rmsd(Atom[] pro1, Atom[] pro2, int strLen, 1894 boolean storeTransform) throws StructureException { 1895 1896 Atom[] cod1 = getAtoms(pro1, strLen,false); 1897 Atom[] cod2 = getAtoms(pro2, strLen,true); 1898 1899 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(cod1), 1900 Calc.atomsToPoints(cod2)); 1901 1902 Matrix matrix = Matrices.getRotationJAMA(trans); 1903 Atom shift = Calc.getTranslationVector(trans); 1904 1905 if ( storeTransform) { 1906 r = matrix; 1907 t = shift; 1908 } 1909 for (Atom a : cod2) 1910 Calc.transform(a.getGroup(), trans); 1911 1912 return Calc.rmsd(cod1, cod2); 1913 1914 } 1915 1916 /** 1917 * Copies the first length atoms from the input array 1918 * @param ca The array to copy 1919 * @param length the number of atoms to copy 1920 * @param clone If true, preform a deep copy, cloning the underlying Groups 1921 * @return An array with the first length items of ca, possibly cloning the Atoms. 1922 * @throws StructureException 1923 */ 1924 private Atom[] getAtoms(Atom[] ca, int length, boolean clone) throws StructureException{ 1925 1926 List<Atom> atoms = new ArrayList<>(); 1927 for ( int i = 0 ; i < length ; i++){ 1928 1929 Atom a; 1930 if ( clone ){ 1931 Group g = (Group)ca[i].getGroup().clone(); 1932 a = g.getAtom(ca[i].getName()); 1933 } 1934 else { 1935 a = ca[i]; 1936 } 1937 atoms.add(a); 1938 } 1939 return atoms.toArray(new Atom[atoms.size()]); 1940 } 1941 1942 1943 1944 private void noBestTrace(){ 1945 1946 if(isPrint) { 1947 timeEnd = System.currentTimeMillis(); 1948 long time_q=(timeEnd-timeStart); 1949 1950 String msg = String.format("size=0 time=%d comb=%d\n", (int)(time_q), nTraces); 1951 System.out.println(msg); 1952 } 1953 } 1954 1955 1956 1957 private double zToP(double z) { 1958 int ind=(int)(z/0.1); 1959 if(ind<0) ind=0; 1960 if(ind>149) ind=149; 1961 return(tableZtoP[ind]); 1962 } 1963 /////////////////////////////////////////////////////////////////////////// 1964 private double pToZ(double p) { 1965 int ind=(int)(-Math.log10(p)*3.0); 1966 if(ind<0) ind=0; 1967 if(ind>149) ind=149; 1968 return(tablePtoZ[ind]); 1969 } 1970 /////////////////////////////////////////////////////////////////////////// 1971 private double zByZ(double z1, double z2) { 1972 double p1=zToP(z1); 1973 double p2=zToP(z2); 1974 return(pToZ(p1*p2)); 1975 } 1976 1977 protected double zStrAlign(int winSize, int nTrace, double score, int nGaps) { 1978 double z1=zScore(winSize, nTrace, score); 1979 double z2=zGaps(winSize, nTrace, nGaps); 1980 return(zByZ(z1, z2)); 1981 } 1982 1983 double zScore(int winSize, int nTrace, double score) { 1984 1985 if(winSize==8) { 1986 1987 if(nTrace<1) return(0.0); 1988 1989 double scoreAv_, scoreSd_; 1990 if(nTrace<21) { 1991 scoreAv_=scoreAv8[nTrace-1]; 1992 scoreSd_=scoreSd8[nTrace-1]; 1993 } 1994 else { 1995 scoreAv_=0.209874*nTrace+2.944714; 1996 scoreSd_=0.039487*nTrace+0.675735; 1997 } 1998 if(score>scoreAv_) return(0.0); 1999 return((scoreAv_-score)/scoreSd_); 2000 } 2001 2002 if(winSize==6) { 2003 2004 if(nTrace<1) return(0.0); 2005 2006 double scoreAv_, scoreSd_; 2007 if(nTrace<21) { 2008 scoreAv_=scoreAv6[nTrace-1]; 2009 scoreSd_=scoreSd6[nTrace-1]; 2010 } 2011 else { 2012 scoreAv_=0.198534*nTrace+2.636477; 2013 scoreSd_=0.040922*nTrace+0.715636; 2014 } 2015 if(score>scoreAv_) return(0.0); 2016 return((scoreAv_-score)/scoreSd_); 2017 } 2018 2019 return(0.0); 2020 2021 } 2022 /////////////////////////////////////////////////////////////////////////// 2023 double zGaps(int winSize, int nTrace, int nGaps) { 2024 2025 if(nTrace<2) return(0.0); 2026 double scoreAv_, scoreSd_; 2027 2028 if(winSize==8) { 2029 if(nTrace<21) { 2030 scoreAv_=gapsAv8[nTrace-1]; 2031 scoreSd_=gapsSd8[nTrace-1]; 2032 } 2033 else { 2034 scoreAv_=14.949173*nTrace-14.581193; 2035 scoreSd_=2.045067*nTrace+13.191095; 2036 } 2037 if(nGaps>scoreAv_) return(0.0); 2038 return((scoreAv_-nGaps)/scoreSd_); 2039 } 2040 2041 if(winSize==6) { 2042 if(nTrace<21) { 2043 scoreAv_=gapsAv6[nTrace-1]; 2044 scoreSd_=gapsSd6[nTrace-1]; 2045 } 2046 else { 2047 scoreAv_=13.574490*nTrace-13.977223; 2048 scoreSd_=1.719977*nTrace+19.615014; 2049 } 2050 if(nGaps>scoreAv_) return(0.0); 2051 return((scoreAv_-nGaps)/scoreSd_); 2052 } 2053 2054 return(0.0); 2055 } 2056 2057 private static final double[] scoreAv8 ={2.54, 2.51, 2.72, 3.01, 3.31, 3.61, 3.90, 4.19, 4.47, 4.74, 2058 4.99, 5.22, 5.46, 5.70, 5.94, 6.13, 6.36, 6.52, 6.68, 6.91}; 2059 private static final double[] scoreSd8 ={1.33, 0.88, 0.73, 0.71, 0.74, 0.80, 0.86, 0.92, 0.98, 1.04, 2060 1.08, 1.10, 1.15, 1.19, 1.23, 1.25, 1.32, 1.34, 1.36, 1.45}; 2061 private static final double[] gapsAv8 ={0.00, 11.50, 23.32, 35.95, 49.02, 62.44, 76.28, 90.26, 2062 104.86, 119.97, 134.86, 150.54, 164.86, 179.57, 194.39, 2063 209.38, 224.74, 238.96, 253.72, 270.79}; 2064 private static final double[] gapsSd8 ={0.00, 9.88, 14.34, 17.99, 21.10, 23.89, 26.55, 29.00, 31.11, 2065 33.10, 35.02, 36.03, 37.19, 38.82, 41.04, 43.35, 45.45, 2066 48.41, 50.87, 52.27}; 2067 private static final double[] scoreAv6 ={1.98, 1.97, 2.22, 2.54, 2.87, 3.18, 3.48, 3.77, 4.05, 4.31, 2068 4.57, 4.82, 5.03, 5.24, 5.43, 5.64, 5.82, 6.02, 6.21, 6.42}; 2069 private static final double[] scoreSd6 ={1.15, 0.73, 0.63, 0.64, 0.71, 0.80, 0.87, 0.95, 1.01, 1.07, 2070 1.13, 1.19, 1.22, 1.25, 1.28, 1.32, 1.35, 1.39, 1.45, 1.50}; 2071 private static final double[] gapsAv6 ={0.00, 10.12, 20.25, 31.29, 42.95, 55.20, 67.53, 80.15, 2072 93.30, 106.47, 120.52, 134.38, 148.59, 162.58, 176.64, 2073 191.23, 204.12, 218.64, 231.82, 243.43}; 2074 private static final double[] gapsSd6 ={0.00, 9.80, 14.44, 18.14, 21.35, 24.37, 27.00, 29.68, 32.22, 2075 34.37, 36.65, 38.63, 40.31, 42.16, 43.78, 44.98, 47.08, 2076 49.09, 50.78, 52.15}; 2077 2078 2079 private static final double[] tableZtoP={ 2080 1.0, 9.20e-01,8.41e-01,7.64e-01,6.89e-01,6.17e-01,5.49e-01,4.84e-01,4.24e-01,3.68e-01, 2081 3.17e-01,2.71e-01,2.30e-01,1.94e-01,1.62e-01,1.34e-01,1.10e-01,8.91e-02,7.19e-02,5.74e-02, 2082 4.55e-02,3.57e-02,2.78e-02,2.14e-02,1.64e-02,1.24e-02,9.32e-03,6.93e-03,5.11e-03,3.73e-03, 2083 2.70e-03,1.94e-03,1.37e-03,9.67e-04,6.74e-04,4.65e-04,3.18e-04,2.16e-04,1.45e-04,9.62e-05, 2084 6.33e-05,4.13e-05,2.67e-05,1.71e-05,1.08e-05,6.80e-06,4.22e-06,2.60e-06,1.59e-06,9.58e-07, 2085 5.73e-07,3.40e-07,1.99e-07,1.16e-07,6.66e-08,3.80e-08,2.14e-08,1.20e-08,6.63e-09,3.64e-09, 2086 1.97e-09,1.06e-09,5.65e-10,2.98e-10,1.55e-10,8.03e-11,4.11e-11,2.08e-11,1.05e-11,5.20e-12, 2087 2.56e-12,1.25e-12,6.02e-13,2.88e-13,1.36e-13,6.38e-14,2.96e-14,1.36e-14,6.19e-15,2.79e-15, 2088 1.24e-15,5.50e-16,2.40e-16,1.04e-16,4.46e-17,1.90e-17,7.97e-18,3.32e-18,1.37e-18,5.58e-19, 2089 2.26e-19,9.03e-20,3.58e-20,1.40e-20,5.46e-21,2.10e-21,7.99e-22,3.02e-22,1.13e-22,4.16e-23, 2090 1.52e-23,5.52e-24,1.98e-24,7.05e-25,2.48e-25,8.64e-26,2.98e-26,1.02e-26,3.44e-27,1.15e-27, 2091 3.82e-28,1.25e-28,4.08e-29,1.31e-29,4.18e-30,1.32e-30,4.12e-31,1.27e-31,3.90e-32,1.18e-32, 2092 3.55e-33,1.06e-33,3.11e-34,9.06e-35,2.61e-35,7.47e-36,2.11e-36,5.91e-37,1.64e-37,4.50e-38, 2093 1.22e-38,3.29e-39,8.77e-40,2.31e-40,6.05e-41,1.56e-41,4.00e-42,1.02e-42,2.55e-43,6.33e-44, 2094 1.56e-44,3.80e-45,9.16e-46,2.19e-46,5.17e-47,1.21e-47,2.81e-48,6.45e-49,1.46e-49,3.30e-50}; 2095 private static final double[] tablePtoZ ={ 2096 0.00,0.73,1.24,1.64,1.99,2.30,2.58,2.83,3.07,3.29, 2097 3.50,3.70,3.89,4.07,4.25,4.42,4.58,4.74,4.89,5.04, 2098 5.19,5.33,5.46,5.60,5.73,5.86,5.99,6.11,6.23,6.35, 2099 6.47,6.58,6.70,6.81,6.92,7.02,7.13,7.24,7.34,7.44, 2100 7.54,7.64,7.74,7.84,7.93,8.03,8.12,8.21,8.30,8.40, 2101 8.49,8.57,8.66,8.75,8.84,8.92,9.01,9.09,9.17,9.25, 2102 9.34,9.42,9.50,9.58,9.66,9.73,9.81,9.89,9.97,10.04, 2103 10.12,10.19,10.27,10.34,10.41,10.49,10.56,10.63,10.70,10.77, 2104 10.84,10.91,10.98,11.05,11.12,11.19,11.26,11.32,11.39,11.46, 2105 11.52,11.59,11.66,11.72,11.79,11.85,11.91,11.98,12.04,12.10, 2106 12.17,12.23,12.29,12.35,12.42,12.48,12.54,12.60,12.66,12.72, 2107 12.78,12.84,12.90,12.96,13.02,13.07,13.13,13.19,13.25,13.31, 2108 13.36,13.42,13.48,13.53,13.59,13.65,13.70,13.76,13.81,13.87, 2109 13.92,13.98,14.03,14.09,14.14,14.19,14.25,14.30,14.35,14.41, 2110 14.46,14.51,14.57,14.62,14.67,14.72,14.77,14.83,14.88,14.93}; 2111 2112 /** copy data from this class into AFPChain container object. 2113 * 2114 * @param afpChain 2115 * @param ca1 2116 * @param ca2 2117 */ 2118 public void convertAfpChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2) { 2119 2120 afpChain.setBlockNum(1); 2121 //afpChain.setAlignScore(z); 2122 Matrix[] m ; 2123 2124 if ( r != null ) { 2125 m = new Matrix[1]; 2126 m[0] = r; 2127 } else { 2128 m = new Matrix[0]; 2129 } 2130 2131 Atom[] as ; 2132 if ( t != null) { 2133 as = new Atom[1]; 2134 as[0] = t; 2135 } else { 2136 as = new Atom[0]; 2137 } 2138 2139 afpChain.setBlockRotationMatrix(m); 2140 afpChain.setBlockShiftVector(as); 2141 2142 int nse1 = ca1.length; 2143 int nse2 = ca2.length; 2144 //System.out.println("dist1 :" + dist1.length + " " + dist2.length); 2145 2146 if ( nse1 > 0 && dist1.length > 0 ) 2147 afpChain.setDisTable1(new Matrix(dist1)); 2148 else 2149 afpChain.setDisTable1 (Matrix.identity(3, 3)); 2150 if ( nse2 > 0 && dist2.length > 0 ) 2151 afpChain.setDisTable2(new Matrix(dist2)); 2152 else 2153 afpChain.setDisTable2(Matrix.identity(3, 3)); 2154 2155 2156 char[] alnseq1 = new char[nse1+nse2+1]; 2157 char[] alnseq2 = new char[nse1+nse2+1] ; 2158 char[] alnsymb = new char[nse1+nse2+1]; 2159 2160 int[][][] optAln = new int[1][2][nAtom]; 2161 afpChain.setOptAln(optAln); 2162 2163 int pos = 0; 2164 int nrIdent = 0; 2165 int nrSim = 0; 2166 for(int ia=0; ia<lcmp; ia++) { 2167 2168 // no gap 2169 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 2170 //System.out.println("ia " + ia + " pos " + pos + " " + align_se1[ia] + " " + align_se2[ia]); 2171 optAln[0][0][pos] = align_se1[ia]; 2172 optAln[0][1][pos] = align_se2[ia]; 2173 2174 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2175 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2176 2177 alnseq1[ia] = Character.toUpperCase(l1); 2178 alnseq2[ia] = Character.toUpperCase(l2); 2179 alnsymb[ia] = ' '; 2180 if ( l1 == l2) { 2181 nrIdent++; 2182 nrSim++; 2183 alnsymb[ia] = '|'; 2184 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 2185 nrSim++; 2186 alnsymb[ia] = ':'; 2187 } 2188 2189 pos++; 2190 2191 } else { 2192 // there is a gap at this position 2193 alnsymb[ia] = ' '; 2194 if (align_se1[ia] == -1 ) { 2195 alnseq1[ia] = '-'; 2196 } else { 2197 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2198 alnseq1[ia] = Character.toUpperCase(l1); 2199 } 2200 if ( align_se2[ia] == -1 ) { 2201 alnseq2[ia] = '-'; 2202 } else { 2203 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2204 alnseq2[ia] = Character.toUpperCase(l2); 2205 } 2206 2207 } 2208 } 2209 2210 2211 afpChain.setAlnseq1(alnseq1); 2212 afpChain.setAlnseq2(alnseq2); 2213 afpChain.setAlnsymb(alnsymb); 2214 2215 2216 // CE uses the aligned pairs as reference not the whole alignment including gaps... 2217 if ( pos > 0) { 2218 afpChain.setIdentity(nrIdent*1.0/pos); 2219 afpChain.setSimilarity(nrSim*1.0/pos); 2220 } else { 2221 afpChain.setIdentity(0); 2222 afpChain.setSimilarity(0); 2223 } 2224 2225 //AFPAlignmentDisplay.getAlign( afpChain,ca1,ca2); 2226 2227 } 2228 2229 public int getnAtom() { 2230 return nAtom; 2231 } 2232 2233 2234 2235 public void setnAtom(int nAtom) { 2236 this.nAtom = nAtom; 2237 } 2238 2239 2240 2241 public int getLcmp() { 2242 return lcmp; 2243 } 2244 2245 2246 2247 public void setLcmp(int lcmp) { 2248 this.lcmp = lcmp; 2249 } 2250 2251 2252 2253 public int[] getAlign_se1() { 2254 return align_se1; 2255 } 2256 2257 2258 2259 public void setAlign_se1(int[] alignSe1) { 2260 align_se1 = alignSe1; 2261 } 2262 2263 2264 2265 public int[] getAlign_se2() { 2266 return align_se2; 2267 } 2268 2269 2270 2271 public void setAlign_se2(int[] alignSe2) { 2272 align_se2 = alignSe2; 2273 } 2274 2275 /** 2276 * Caution: this matrix is overwriten with very different data at several 2277 * points in the alignment algorithm. After 2278 * {@link #initSumOfDistances(int, int, int, int, Atom[], Atom[]) initSumOfDistances} 2279 * is run, this will hold the distance matrix between AFPs. 2280 * @return mat 2281 */ 2282 public double[][] getMatMatrix() { 2283 return mat; 2284 } 2285 2286 public void setMatMatrix(double[][] matrix){ 2287 mat = matrix; 2288 } 2289 2290 /** 2291 * Gets the rotation matrix from the last call to 2292 * {@link #calc_rmsd(Atom[], Atom[], int, boolean) calc_rmsd}. 2293 * @return The rotatiokn matrix 2294 */ 2295 public Matrix getRotationMatrix() { 2296 return r; 2297 } 2298 2299 /** 2300 * Gets the shift from the last call to 2301 * {@link #calc_rmsd(Atom[], Atom[], int, boolean) calc_rmsd}. 2302 * @return The shift 2303 */ 2304 public Atom getShift() { 2305 return t; 2306 } 2307 2308 public double[][] getDist1() { 2309 return dist1; 2310 } 2311 2312 public void setDist1(double[][] dist1) { 2313 this.dist1 = dist1; 2314 } 2315 2316 public double[][] getDist2() { 2317 return dist2; 2318 } 2319 2320 public void setDist2(double[][] dist2) { 2321 this.dist2 = dist2; 2322 } 2323 2324 2325}