001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Sep 25, 2009 021 * Author: Andreas Prlic 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027import org.biojava.nbio.core.alignment.matrices.ScaledSubstitutionMatrix; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.structure.Atom; 030import org.biojava.nbio.structure.Calc; 031import org.biojava.nbio.structure.Group; 032import org.biojava.nbio.structure.SVDSuperimposer; 033import org.biojava.nbio.structure.StructureException; 034import org.biojava.nbio.structure.StructureTools; 035import org.biojava.nbio.structure.align.model.AFP; 036import org.biojava.nbio.structure.align.model.AFPChain; 037import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 038import org.biojava.nbio.structure.jama.Matrix; 039import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 040import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 041 042import java.util.ArrayList; 043import java.util.List; 044 045 046 047/** This is based on the original Combinatorial Extension (CE) source code from 2003 or 2004 (CE version 2.3), 048 * as has been originally developed by I. Shindyalov and P.Bourne (1998). 049 * The original CE paper is available from here: <a href="http://peds.oxfordjournals.org/cgi/content/short/11/9/739">http://peds.oxfordjournals.org/cgi/content/short/11/9/739</a>. 050 * 051 * This class is a pretty much exact 1:1 port from C, where I cared about exact reproduce of the CE results 052 * and not about Java style. 053 * 054 * @author Andreas Prlic 055 056 * 057 */ 058public class CECalculator { 059 060 protected static final boolean isPrint = false; 061 private static final boolean debug = false; 062 063 int[] f1; 064 int[] f2; 065 double[][]dist1; 066 double[][]dist2; 067 protected double[][]mat; 068 protected int[] bestTrace1; 069 protected int[] bestTrace2; 070 protected int[][] bestTraces1; 071 protected int[][] bestTraces2; 072 protected int nBestTrace; 073 protected int nBestTraces; 074 double d_[] = new double[20]; 075 protected int[] bestTracesN; 076 protected double bestTraceScore; 077 protected int nTrace; 078 protected double[] bestTracesScores; 079 protected int[] trace1; 080 protected int[] trace2; 081 082 protected static final double zThr=-0.1; 083 084 long timeStart ; 085 long timeEnd; 086 private int nAtom; 087 088 // the equivalent positions in the alignment... 089 private int[] align_se1; 090 private int[] align_se2; 091 092 093 private int lcmp; 094 private int[] bestTraceLen; 095 private Matrix r; 096 private Atom t; 097 protected int nTraces; 098 099 private double z; 100 private int[] traceIndexContainer; 101 102 protected CeParameters params; 103 // SHOULD these fields be PARAMETERS? 104 105 protected static final int nIter = 1; 106 private static final boolean distAll = false; 107 108 List<MatrixListener> matrixListeners; 109 110 111 112 public CECalculator(CeParameters params){ 113 timeStart = System.currentTimeMillis(); 114 dist1= new double[0][0]; 115 dist2= new double[0][0]; 116 this.params = params; 117 matrixListeners = new ArrayList<MatrixListener>(); 118 119 } 120 121 /** 122 * 123 * @param afpChain A new AFPChain, which will be filled in by this function 124 * @param ca1 125 * @param ca2 126 * @return afpChain 127 * @throws StructureException 128 */ 129 public AFPChain extractFragments(AFPChain afpChain, 130 Atom[] ca1, Atom[] ca2) throws StructureException{ 131 132 int nse1 = ca1.length; 133 int nse2 = ca2.length; 134 135 afpChain.setCa1Length(nse1); 136 afpChain.setCa2Length(nse2); 137 138 int traceMaxSize=nse1<nse2?nse1:nse2; 139 140 f1 = new int[nse1]; 141 f2 = new int[nse2]; 142 143 dist1 = initIntraDistmatrix(ca1, nse1); 144 dist2 = initIntraDistmatrix(ca2, nse2); 145 146 147 if ( debug ) 148 System.out.println("parameters: " + params); 149 150 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 151 if ( params.getSeqWeight() < 1) 152 params.setSeqWeight(2); 153 } 154 155 int winSize = params.getWinSize(); 156 157 int winSizeComb1 = (winSize-1)*(winSize-2)/2; 158 159 traceIndexContainer = new int[traceMaxSize]; 160 161 // CE: unused code. distAll is always false and both loops do the same??? 162 // CE v2.3 calls this Weight factors for trace extension 163 if(distAll ) { 164 for(int i=0; i<traceMaxSize; i++) 165 traceIndexContainer[i]=(i+1)*i*winSize*winSize/2+(i+1)*winSizeComb1; 166 } else { 167 for(int i=0; i<traceMaxSize; i++) { 168 traceIndexContainer[i]=(i+1)*i*winSize/2+(i+1)*winSizeComb1; 169 170 171 } 172 } 173 174 // verified: a[] is set correctly. 175 176 mat = initSumOfDistances(nse1, nse2, winSize, winSizeComb1, ca1, ca2); 177 178 179 180// try { 181// Matrix m2 = new Matrix(mat).copy(); 182// JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 183// JFrame frame = new JFrame(); 184// frame.addWindowListener(new WindowAdapter(){ 185// public void windowClosing(WindowEvent e){ 186// JFrame f = (JFrame) e.getSource(); 187// f.setVisible(false); 188// f.dispose(); 189// } 190// }); 191// 192// 193// frame.getContentPane().add(panel); 194// 195// frame.pack(); 196// frame.setVisible(true); 197// } catch (Exception e) { 198// e.printStackTrace(); 199// } 200 201 202 // Set the distance matrix 203 //afpChain.setDistanceMatrix(new Matrix(mat.clone())); 204 205 206 // 207 // double rmsdThr = params.getRmsdThr(); 208 // StringBuffer buf = new StringBuffer(" "); 209 // for(int i=0; i<nse2; i++) 210 // buf.append(String.format("%c", i%10==0?(i%100)/10+48:32)); 211 // buf.append("\n"); 212 // for(int i=0; i<nse1; i++) { 213 // buf.append(String.format("%c ", i%10==0?(i%100)/10+48:32)); 214 // for(int j=0; j<nse2; j++) 215 // buf.append(String.format("%c", (mat[i][j])<rmsdThr?'+':'X')); 216 // //printf("%c", ((int)*(mat[i]+j)/40)>9?'*':((int)*(mat[i]+j)/40)+48); 217 // buf.append("\n"); 218 // } 219 // buf.append("\n"); 220 // 221 // System.out.println(buf.toString()); 222 // 223 224 225 return afpChain; 226 } 227 228 /** 229 * Evaluates the distance between two atoms 230 * Several scoring functions are implemented and can be changed by calling 231 * {@link CeParameters#setScoringStrategy(Integer) setScoringStrategy()} 232 * on {@link CeParameters parameter} object this CECalculator was created with. 233 * <p> 234 * Scoring Strategies:<dl> 235 * <dt>DEFAULT_SCORING_STRATEGY</dt> 236 * <dd>Strategy of the original CE publication; CA-CA distance</dd> 237 * 238 * <dt>SIDE_CHAIN_SCORING</dt> 239 * <dd>CB-CB distance. This performs better for sheets and helices than CA.</dd> 240 * 241 * <dt>SIDE_CHAIN_ANGLE_SCORING</dt> 242 * <dd>Use the dot product (eg the cosine) of the two CA-CB vectors.</dd> 243 * 244 * <dt>CA_AND_SIDE_CHAIN_ANGLE_SCORING</dt> 245 * <dd>Equivalent to DEFAULT_SCORING_STRATEGY + SIDE_CHAIN_ANGLE_SCORING</dd> 246 * </dl> 247 * 248 * <dt>SEQUENCE_CONSERVATION</dt> 249 * <dd>A mix between the DEFAULT_SCORING_STRATEGY and a scoring function that favors the alignment of sequence conserved positions in the alignment</dd> 250 * </dl> 251 * 252 * 253 * 254 * @param ca1 The CA of the first residue 255 * @param ca2 The CA of the second residue 256 * @return The distance between the two fragments, according to the selected 257 * scoring strategy. Lower distances are better alignments. 258 * @throws StructureException 259 */ 260 private double getDistanceWithSidechain(Atom ca1, Atom ca2) throws StructureException { 261 262 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_SCORING) { 263 264 return Calc.getDistance(ca1,ca2); 265 266 } 267 268 double dist; 269 Group g1 = ca1.getGroup(); 270 Atom cb1 = null; 271 if ( g1.hasAtom(StructureTools.CB_ATOM_NAME)) { 272 cb1 = g1.getAtom(StructureTools.CB_ATOM_NAME); 273 } 274 // 275 Group g2 = ca2.getGroup(); 276 Atom cb2 = null; 277 if ( g2.hasAtom(StructureTools.CB_ATOM_NAME)) { 278 cb2 = g2.getAtom(StructureTools.CB_ATOM_NAME); 279 } 280 281 282 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_SCORING) { 283 284 285 // here we are using side chain orientation for scoring... 286 287 288 // score type 1 consider side chain distances 289 if ( cb1 != null && cb2 != null) { 290 // CB distance 291 dist = Calc.getDistance(cb1,cb2); 292 //dist = dist / 2.; 293 } else { 294 dist = Calc.getDistance(ca1,ca2); 295 } 296 297 return dist; 298 } 299 300 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SIDE_CHAIN_ANGLE_SCORING){ 301 302 // score type 2 add angle info 303 304 305 if ( cb1 != null && cb2 != null) { 306 // If the CA were overlaid, what is the distance between the CB? 307 // Recall c^2 = a^2 + b^2 -2ab*cos(theta), so this is a function of angle 308 Atom c1 = Calc.subtract(cb1, ca1); 309 Atom c2 = Calc.subtract(cb2, ca2); 310 Atom newA = Calc.subtract(c2, c1); 311 dist = Calc.amount(newA); 312 } else { 313 //dist += Calc.getDistance(ca1,ca2); 314 dist = 0; 315 } 316 317 return dist; 318 319 } 320 else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.CA_AND_SIDE_CHAIN_ANGLE_SCORING){ 321 322 // score type 3 323 // CA distance + cos(angle) 324 dist = 0; 325 if ( cb1 != null && cb2 != null) { 326 Atom cacb1 = Calc.subtract(cb1, ca1); 327 Atom cacb2 = Calc.subtract(cb2, ca2); 328 Atom newA = Calc.subtract(cacb2, cacb1); 329 //System.out.format("CACB 1: %s\nCACB 2: %s\ndiff: %s\nd: %f\n",cacb1.toString(),cacb2.toString(),newA.toString(),Calc.amount(newA)); 330 dist += Calc.amount(newA); 331 } 332 dist += Calc.getDistance(ca1,ca2); 333 334 return dist; 335 336 } else if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 337 if ( cb1 != null && cb2 != null) { 338 // CB distance 339 dist = Calc.getDistance(cb1,cb2); 340 //dist = dist / 2.; 341 } else { 342 dist = Calc.getDistance(ca1,ca2); 343 } 344 return dist; 345 346 347 } 348 else { 349 // unsupported scoring scheme 350 return Calc.getDistance(ca1,ca2); 351 } 352 } 353 354 /** build up intramolecular distance matrix dist1 & dist2 355 * 356 * @param ca 357 * @param nse 358 * @return 359 * @throws StructureException 360 */ 361 private double[][] initIntraDistmatrix(Atom[] ca, int nse) throws StructureException 362 { 363 364 365 double[][] intraDist = new double[nse][nse]; 366 367 // 368 for(int ise1=0; ise1<nse; ise1++) { 369 370 for(int ise2=0; ise2<nse; ise2++) { 371 intraDist[ise1][ise2] = getDistanceWithSidechain(ca[ise1], ca[ise2]); 372 373 } 374 } 375 return intraDist; 376 } 377 378 379 public double[][] initSumOfDistances(int nse1, int nse2, int winSize, int winSizeComb1, Atom[] ca1, Atom[] ca2) { 380 381 double d; 382 383 double[][] mat = new double[nse1][nse2]; 384 385 // init the initial mat[] array. 386 // at this stage mat contains the sum of the distances of fragments of the matrices dist1, dist 387 for(int ise1=0; ise1<nse1; ise1++) { 388 for(int ise2=0; ise2<nse2; ise2++) { 389 390 mat[ise1][ise2]=-1.0; 391 392 if(ise1>nse1-winSize || ise2>nse2-winSize) continue; 393 394 d=0.0; 395 // this sums up over the distances of the fragments 396 for(int is1=0; is1<winSize-2; is1++) 397 for(int is2=is1+2; is2<winSize; is2++) { 398 //System.out.println("pos1 :" + (ise1+is1) + " " + (ise1+is2) + " " + (ise2+is1) + " " + (ise2+is2)); 399 // is this abs or floor? check! 400 d+=Math.abs(dist1[ise1+is1][ise1+is2]-dist2[ise2+is1][ise2+is2]); 401 } 402 mat[ise1][ise2]=d/winSizeComb1; 403 404 //System.out.println("mat ["+ise1+"]["+ise2+"]="+mat[ise1][ise2]); 405 } 406 407 } 408 409 // verified: mat[][] probably ok. 410 411 return mat; 412 } 413 414 415 416 417 418 419 @SuppressWarnings("unused") 420 public void traceFragmentMatrix( AFPChain afpChain, 421 Atom[] ca1, Atom[] ca2) { 422 423 double rmsdThr = params.getRmsdThr(); 424 425 426 double oldBestTraceScore=10000.0; 427 bestTraceScore = 100.0; 428 nBestTrace=0; 429 int nBestTrace0 = 0; 430 int winSize = params.getWinSize(); 431 int winSizeComb1=(winSize-1)*(winSize-2)/2; 432 boolean distAll = false; 433 434 int winSizeComb2=distAll?winSize*winSize:winSize; 435 double rmsdThrJoin = params.getRmsdThrJoin(); 436 437 double z0; 438 439 //double bestTraceZScore=-1.0; 440 441 int nse1 = ca1.length; 442 int nse2 = ca2.length; 443 444 //System.out.println("nse1 :" +nse1 + " nse2: " + nse2); 445 446 int traceMaxSize=nse1<nse2?nse1:nse2; 447 448 bestTrace1 = new int [traceMaxSize]; 449 bestTrace2 = new int [traceMaxSize]; 450 trace1 = new int [traceMaxSize]; 451 trace2 = new int [traceMaxSize]; 452 453 int[] traceIndex = new int [traceMaxSize]; 454 int[] traceIterLevel = new int [traceMaxSize]; 455 456 int ise11; 457 int ise12; 458 int ise21; 459 int ise22; 460 461 int ise1; 462 int ise2; 463 464 int gapMax=params.getMaxGapSize(); 465 466 int iterDepth; 467 if ( gapMax > 0){ 468 iterDepth =gapMax*2+1; 469 } else { 470 iterDepth = traceMaxSize; 471 } 472 double[][] traceScore = new double[traceMaxSize][iterDepth]; 473 474 nTraces =0; 475 long tracesLimit=(long)5e7; 476 double score =-1; 477 double score0 = -1; 478 double score1 = -1 ; 479 double score2 = -1; 480 481 int mse1; 482 int mse2; 483 int jgap; 484 int jdir; 485 int jse1=0; 486 int jse2=0; 487 488 int bestTracesMax=30; 489 bestTraces1 = new int[bestTracesMax][traceMaxSize]; 490 bestTraces2 = new int[bestTracesMax][ traceMaxSize]; 491 bestTracesN=new int [bestTracesMax]; 492 bestTracesScores = new double [bestTracesMax]; 493 for(int it=0; it<bestTracesMax; it++) { 494 bestTracesN[it]=0; 495 bestTracesScores[it]=100; 496 } 497 498 nBestTraces=0; 499 int newBestTrace=0; 500 501 double traceTotalScore=0; 502 double traceScoreMax =0; 503 double userRMSDMax = params.getMaxOptRMSD(); 504 int kse1; 505 int kse2; 506 507 iterLoop: 508 for(int iter=0; iter<nIter; iter++) { 509 510 if(iter>2) { 511 if(oldBestTraceScore<=bestTraceScore) break; 512 } 513 oldBestTraceScore=bestTraceScore; 514 515 if(iter==1) { 516 z0=zStrAlign(winSize, nBestTrace, bestTraceScore, 517 bestTrace1[nBestTrace]+winSize-bestTrace1[0]+ 518 bestTrace2[nBestTrace]+winSize-bestTrace2[0]- 519 nBestTrace*2*winSize); 520 if(z0<zThr) break; 521 nBestTrace0=nBestTrace; 522 nBestTrace=0; 523 bestTraceScore=100.0; 524 525 nTraces=0; 526 } 527 528 529 if(iter==0) { 530 ise11=0; ise12=nse1; 531 ise21=0; ise22=nse2; 532 533 } 534 else { 535 if(iter==1) { 536 ise11=bestTrace1[0]; ise12=bestTrace1[0]+1; 537 ise21=bestTrace2[0]; ise22=bestTrace2[0]+1; 538 } 539 else { 540 ise11=bestTrace1[0]-1; ise12=bestTrace1[0]+2; 541 ise21=bestTrace2[0]-1; ise22=bestTrace2[0]+2; 542 } 543 if(ise11<0) ise11=0; 544 if(ise12>nse1) ise12=nse1; 545 if(ise21<0) ise21=0; 546 if(ise22>nse2) ise22=nse2; 547 } 548 549 //System.out.println("ise1Loop: " + ise11 + " " + ise12 + " " + ise21 + " " + ise22); 550 ise1Loop: 551 for(int ise1_=ise11; ise1_<ise12; ise1_++) { 552 ise2Loop: 553 for(int ise2_=ise21; ise2_<ise22; ise2_++) { 554 555 ise1=ise1_; 556 ise2=ise2_; 557 if(iter>1 && ise1==ise11+1 && ise2==ise21+1) continue ise2Loop; 558 559 //if(ise2==ise21) System.out.println(String.format("(%d, %d)",ise1, nTraces)); 560 561 562 if(iter==0 && (ise1>nse1-winSize*(nBestTrace-1) || 563 ise2>nse2-winSize*(nBestTrace-1))) continue ise2Loop; 564 565 if(mat[ise1][ise2]<0.0) continue ise2Loop; 566 if(mat[ise1][ise2]>rmsdThr) continue ise2Loop; 567 if (mat[ise1][ise2]>userRMSDMax) continue ise2Loop; 568 nTrace=0; 569 trace1[nTrace]=ise1; 570 trace2[nTrace]=ise2; 571 traceIndex[nTrace]=0; 572 traceIterLevel[nTrace]=0; 573 574 score0=mat[ise1][ise2]; 575 576 577 nTrace++; 578 boolean isTraceUp=true; 579 int traceIndex_=0; 580 581 traceLoop: 582 while(nTrace>0) { 583 584 kse1=trace1[nTrace-1]+winSize; 585 kse2=trace2[nTrace-1]+winSize; 586 587 //System.out.println("isTraceUp " + isTraceUp + " " + nTrace + " " + kse1 + " " + kse2); 588 589 while(true) { 590 if(kse1>nse1-winSize-1) break; 591 if(kse2>nse2-winSize-1) break; 592 if(mat[kse1][kse2]>=0.0) break; 593 kse1++; 594 kse2++; 595 } 596 597 598 traceIndex_=-1; 599 600 if(isTraceUp) { 601 602 int nBestExtTrace=nTrace; 603 double bestExtScore=100.0; 604 605 606 // extension of the alignment path 607 // condition 4, 5 608 itLoop: 609 for(int it=0; it<iterDepth; it++) { 610 611 jgap=(it+1)/2; 612 jdir=(it+1)%2; 613 614 if(jdir==0) { 615 mse1=kse1+jgap; 616 mse2=kse2; 617 } 618 else { 619 mse1=kse1; 620 mse2=kse2+jgap; 621 } 622 623 if(mse1>nse1-winSize-1) continue itLoop; 624 if(mse2>nse2-winSize-1) continue itLoop; 625 626 if(mat[mse1][mse2]<0.0) continue itLoop; 627 if(mat[mse1][mse2]>rmsdThr) continue itLoop; 628 if(mat[mse1][mse2]>userRMSDMax) continue itLoop; 629 630 nTraces++; 631 if(nTraces>tracesLimit) { 632 633 return; 634 } 635 636 score=0.0; 637 638 // if(!distAll) { 639 //System.out.println("getting score " + mse1 + " " + mse2 + " " + winSize + " " + jgap + " " + jdir + " " + it + " " + kse1 + " " + kse2); 640 score = getScoreFromDistanceMatrices(mse1,mse2,winSize); 641 //System.out.println("got score: " + score); 642 score1=score/(nTrace*winSize); 643 644 // } else { 645 // // all dist 646 // for(int itrace=0; itrace<nTrace; itrace++) { 647 // for(int is1=0; is1<winSize; is1++) 648 // for(int is2=0; is2<winSize; is2++) 649 // score+=Math.abs(dist1[trace1[itrace]+is1][mse1+is2]- 650 // dist2[trace2[itrace]+is1][mse2+is2]); 651 // } 652 // score1=score/(nTrace*winSize*winSize); 653 // } 654 655 656 //System.out.println("up: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + winSize + " " + traceIndex_ + " " + it + " "); 657 if(score1>rmsdThrJoin) 658 continue itLoop; 659 if(score1>userRMSDMax) 660 continue itLoop; 661 score2=score1; 662 663 // this just got checked, no need to check again.. 664 //if(score2>rmsdThrJoin) 665 // continue itLoop; 666 667 if(nTrace>nBestExtTrace || (nTrace==nBestExtTrace && 668 score2<bestExtScore)) { 669 //System.out.println("setting traceindex to " + it + " " + score2); 670 bestExtScore=score2; 671 nBestExtTrace=nTrace; 672 traceIndex_=it; 673 traceScore[nTrace-1][traceIndex_]=score1; 674 } 675 676 } 677 } 678 679 if(traceIndex_!=-1) { 680 jgap=(traceIndex_+1)/2; 681 jdir=(traceIndex_+1)%2; 682 if(jdir==0) { 683 jse1=kse1+jgap; 684 jse2=kse2; 685 } 686 else { 687 jse1=kse1; 688 jse2=kse2+jgap; 689 } 690 691 if(iter==0){ 692 693 score1=(traceScore[nTrace-1][traceIndex_]*winSizeComb2*nTrace+ 694 mat[jse1][jse2]*winSizeComb1)/(winSizeComb2*nTrace+ 695 winSizeComb1); 696 697 score2 = getScore2(jse1, jse2, traceScore, traceIndex_, traceIndex, winSizeComb1, winSizeComb2, score0, score1); 698 699 if(score2>rmsdThrJoin) 700 traceIndex_=-1; 701 else if ( score2 > userRMSDMax) 702 traceIndex_=-1; 703 else { 704 traceScore[nTrace-1][traceIndex_]=score2; 705 706 traceTotalScore=score2; 707 } 708 709 } 710 else { 711 if(traceScoreMax>rmsdThrJoin && nBestTrace>=nBestTrace0) 712 traceIndex_=-1; 713 traceTotalScore=traceScoreMax; 714 } 715 } 716 717 //System.out.println("middle: " + nTrace + " " + score + " " + score0 + " " + score1 + " " + score2 + " " + traceIndex_); 718 719 if(traceIndex_==-1) { 720 //System.out.println("continue traceLoop " + nTrace); 721 //if(iterLevel==1) break; 722 nTrace--; 723 isTraceUp=false; 724 continue traceLoop; 725 } 726 else { 727 traceIterLevel[nTrace-1]++; 728 trace1[nTrace]=jse1; 729 trace2[nTrace]=jse2; 730 traceIndex[nTrace]=traceIndex_; 731 traceIterLevel[nTrace]=0; 732 nTrace++; 733 isTraceUp=true; 734 735 if(nTrace>nBestTrace || 736 (nTrace==nBestTrace && 737 bestTraceScore>traceTotalScore)) { 738 739 for(int itrace=0; itrace<nTrace; itrace++) { 740 bestTrace1[itrace]=trace1[itrace]; 741 bestTrace2[itrace]=trace2[itrace]; 742 } 743 bestTraceScore=traceTotalScore; 744 nBestTrace=nTrace; 745 } 746 747 if(iter==0) { 748 //System.out.println("doing iter0 " + newBestTrace + " " + traceTotalScore + " " + bestTracesMax); 749 newBestTrace = doIter0(newBestTrace,traceTotalScore, bestTracesMax); 750 751 752 } 753 } 754 } 755 } 756 } 757 758 if ( isPrint) { 759 System.out.println("fragment length: " + params.getWinSize()); 760 System.out.println("ntraces : " + nTraces ); 761 } 762 763 764 765 } 766 767// try { 768// Matrix m2 = new Matrix(traceScore).copy(); 769// JPanel panel = GuiWrapper.getScaleableMatrixPanel(m2); 770// JFrame frame = new JFrame(); 771// frame.addWindowListener(new WindowAdapter(){ 772// public void windowClosing(WindowEvent e){ 773// JFrame f = (JFrame) e.getSource(); 774// f.setVisible(false); 775// f.dispose(); 776// } 777// }); 778// 779// 780// frame.getContentPane().add(panel); 781// 782// frame.pack(); 783// frame.setVisible(true); 784// } catch (Exception e) { 785// e.printStackTrace(); 786// } 787 788 789 if ( params.isShowAFPRanges()){ 790 System.out.println("fragment length: " + params.getWinSize()); 791 System.out.println("ntraces : " + nTraces ); 792 793 } 794 795 } 796 797 protected double getScore2(int jse1, int jse2, double[][] traceScore, int traceIndex_,int[] traceIndex,int winSizeComb1, int winSizeComb2, double score0, double score1 ) { 798 799 800 801 /*double score2= 802 ((nTrace>1?traceScore[nTrace-2][traceIndex[nTrace-1]]:score0) 803 *a[nTrace-1]+score1*(a[nTrace]-a[nTrace-1]))/a[nTrace]; 804 */ 805 double val = 0; 806 if ( nTrace>1) 807 val =traceScore[nTrace-2][traceIndex[nTrace-1]]; 808 else 809 val = score0; 810 811 double score2 = (val * traceIndexContainer[nTrace-1]+score1*(traceIndexContainer[nTrace]-traceIndexContainer[nTrace-1]))/traceIndexContainer[nTrace]; 812 813 //System.out.println("check: score0 " + score0 + " score 1 " + score1 + " sc2: " + score2 + " val: " + val + " nTrace:" + nTrace+ " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace-1] + " " + traceIndexContainer[nTrace] ); 814 815 return score2; 816 817 818 } 819 820 protected int doIter0(int newBestTrace, double traceTotalScore, double bestTracesMax) { 821 822 823 // only do the whole method if this criteria is fulfilled... 824 if(nTrace>bestTracesN[newBestTrace] || 825 (nTrace==bestTracesN[newBestTrace] && 826 bestTracesScores[newBestTrace]>traceTotalScore)) { 827 828 829 for(int itrace=0; itrace<nTrace; itrace++) { 830 bestTraces1[newBestTrace][itrace]=trace1[itrace]; 831 bestTraces2[newBestTrace][itrace]=trace2[itrace]; 832 bestTracesN[newBestTrace]=nTrace; 833 bestTracesScores[newBestTrace]=traceTotalScore; 834 //System.out.println("bestTracesScrores ["+newBestTrace+"]=" +traceTotalScore); 835 } 836 837 if(nTrace>nBestTrace) nBestTrace=nTrace; 838 839 if(nBestTraces<bestTracesMax) { 840 nBestTraces++; 841 newBestTrace++; 842 } 843 844 if(nBestTraces==bestTracesMax) { 845 //System.out.println("nBestTraces == bestTracesmax " + nBestTraces); 846 newBestTrace=0; 847 double scoreTmp=bestTracesScores[0]; 848 int nTraceTmp=bestTracesN[0]; 849 for(int ir=1; ir<nBestTraces; ir++) { 850 if(bestTracesN[ir]<nTraceTmp || 851 (bestTracesN[ir]==nTraceTmp && 852 scoreTmp<bestTracesScores[ir])) { 853 nTraceTmp=bestTracesN[ir]; 854 scoreTmp=bestTracesScores[ir]; 855 newBestTrace=ir; 856 //System.out.println("setting new bestTracesScore to " + ir + " " + scoreTmp); 857 } 858 } 859 } 860 } 861 862 //System.out.println("iter0 : " + newBestTrace + " " + bestTracesN[newBestTrace] + " " + traceTotalScore + " " + nTrace); 863 864 865 866 /* 867z=zStrAlign(winSize, nTrace, traceTotalScore, 868trace1[nTrace-1]-trace1[0]+trace2[nTrace-1]-trace2[0]- 8692*(nTrace-1)*winSize); 870if(z>bestTraceZScore) { 871for(int itrace=0; itrace<nTrace; itrace++) { 872bestTrace1[itrace]=trace1[itrace]; 873bestTrace2[itrace]=trace2[itrace]; 874} 875bestTraceZScore=z; 876bestTraceScore=*(traceScore[nTrace-2]+traceIndex_); 877nBestTrace=nTrace; 878} 879 */ 880 return newBestTrace; 881 882 } 883 884 885 protected double getScoreFromDistanceMatrices(int mse1, int mse2,int winSize) { 886 887 double score = 0; 888 // (winSize) "best" dist 889 890 // reduce sign. values to C code.. 6 digits.. 891 892 for(int itrace=0; itrace<nTrace; itrace++) { 893 score+= Math.abs(dist1[trace1[itrace]][mse1]- 894 dist2[trace2[itrace]][mse2]); 895 896 score+= Math.abs(dist1[trace1[itrace]+winSize-1] 897 [mse1+winSize-1]- 898 dist2[trace2[itrace]+winSize-1][mse2+winSize-1]); 899 900 for(int id=1; id<winSize-1; id++) 901 score+= Math.abs(dist1[trace1[itrace]+id][mse1+winSize-1-id]- 902 dist2[trace2[itrace]+id][mse2+winSize-1-id]); 903 904 } 905 906 return score; 907 } 908 909 public void nextStep( AFPChain afpChain, 910 Atom[] ca1, Atom[] ca2) throws StructureException{ 911 912 913 if(nBestTrace>0) { 914 checkBestTraces(afpChain,ca1,ca2); 915 } else { 916 noBestTrace(); 917 } 918 919 convertAfpChain(afpChain, ca1, ca2); 920 AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2); 921 } 922 923 924 925 // this part is modified from the original CeCalculator 926 @SuppressWarnings("unused") 927 private void checkBestTraces( AFPChain afpChain, 928 Atom[] ca1, Atom[] ca2) throws StructureException{ 929 930 z=0.0; 931 932 933 int nGaps; 934 int winSize = params.getWinSize(); 935 int nse1 = ca1.length; 936 int nse2 = ca2.length; 937 int traceMaxSize=nse1<nse2?nse1:nse2; 938 int idir; 939 940 941 align_se1=new int [nse1+nse2]; 942 align_se2=new int [nse1+nse2]; 943 lcmp = 0; 944 945 // we now support alignment using any particular atoms.. 946 947 Atom[] strBuf1 = new Atom[traceMaxSize]; 948 Atom[] strBuf2 = new Atom[traceMaxSize]; 949 950 double rmsdNew; 951 952 953 954 // removing some loops that are run in orig CE 955 // and which did not do anything 956 if ( debug ){ 957 checkPrintRmsdNew(traceMaxSize, winSize, ca1, ca2); 958 } 959 960 double rmsd=100.0; 961 962 int iBestTrace=0; 963 964 for(int ir=0; ir<nBestTraces; ir++) { 965 if(bestTracesN[ir]!=nBestTrace) continue; 966 967 rmsdNew = getRMSDForBestTrace(ir, strBuf1, strBuf2, bestTracesN,bestTraces1, bestTrace2,winSize,ca1,ca2); 968 if ( isPrint) 969 System.out.println(String.format("%d %d %d %.2f", ir, bestTracesN[ir], nBestTrace, rmsdNew)); 970 971 if(rmsd>rmsdNew) { 972 iBestTrace=ir; 973 rmsd=rmsdNew; 974 //System.out.println(" iBestTrace:" + iBestTrace + " new rmsd = " + rmsd); 975 } 976 } 977 for(int it=0; it<bestTracesN[iBestTrace]; it++) { 978 bestTrace1[it]=bestTraces1[iBestTrace][it]; 979 bestTrace2[it]=bestTraces2[iBestTrace][it]; 980 } 981 982 //System.out.println("iBestTrace: "+iBestTrace+" = bestTracesScores " + bestTracesScores[iBestTrace]); 983 984 nBestTrace=bestTracesN[iBestTrace]; 985 986 bestTraceScore=bestTracesScores[iBestTrace]; 987 988 989 //printf("\nOptimizing gaps...\n"); 990 991 int[] traceLen=new int [traceMaxSize]; 992 bestTraceLen=new int [traceMaxSize]; 993 994 995 int strLen=0; 996 997 int jt; 998 strLen=0; 999 nGaps=0; 1000 nTrace=nBestTrace; 1001 1002 for(jt=0; jt<nBestTrace; jt++) { 1003 trace1[jt]=bestTrace1[jt]; 1004 trace2[jt]=bestTrace2[jt]; 1005 traceLen[jt]=winSize; 1006 1007 if(jt<nBestTrace-1) { 1008 nGaps+=bestTrace1[jt+1]-bestTrace1[jt]-winSize+ 1009 bestTrace2[jt+1]-bestTrace2[jt]-winSize; 1010 } 1011 } 1012 nBestTrace=0; 1013 for(int it=0; it<nTrace; ) { 1014 int cSize=traceLen[it]; 1015 for(jt=it+1; jt<nTrace; jt++) { 1016 if(trace1[jt]-trace1[jt-1]-traceLen[jt-1]!=0 || 1017 trace2[jt]-trace2[jt-1]-traceLen[jt-1]!=0) break; 1018 cSize+=traceLen[jt]; 1019 } 1020 bestTrace1[nBestTrace]=trace1[it]; 1021 bestTrace2[nBestTrace]=trace2[it]; 1022 bestTraceLen[nBestTrace]=cSize; 1023 nBestTrace++; 1024 strLen+=cSize; 1025 it=jt; 1026 } 1027 1028 1029 int is=0; 1030 for(jt=0; jt<nBestTrace; jt++) { 1031 for(int i=0; i<bestTraceLen[jt]; i++) { 1032 setStrBuf(strBuf1,is+i,ca1,bestTrace1[jt]+i ); 1033 setStrBuf(strBuf2,is+i,ca2,bestTrace2[jt]+i); 1034 } 1035 is+=bestTraceLen[jt]; 1036 } 1037 //sup_str(strBuf1, strBuf2, strLen, d_); 1038 1039 rmsd=calc_rmsd(strBuf1, strBuf2, strLen,true); 1040 1041 if ( isPrint) 1042 System.out.println("got first rmsd: " + rmsd); 1043 boolean isCopied=false; 1044 1045 outer_loop: 1046 for(int it=1; it<nBestTrace; it++) { 1047 1048 /* not needed... 1049 int igap; 1050 if(bestTrace1[it]-bestTrace1[it-1]-bestTraceLen[it-1]>0) igap=0; 1051 if(bestTrace2[it]-bestTrace2[it-1]-bestTraceLen[it-1]>0) igap=1; 1052 */ 1053 1054 1055 boolean wasBest=false; 1056 main_loop: 1057 for(idir=-1; idir<=1; idir+=2) { 1058 if(wasBest) break; 1059 1060 inner_loop: 1061 for(int idep=1; idep<=winSize/2; idep++) { 1062 1063 // isCopied indicates that bestTrace has changed and needs to be re-copied 1064 if(!isCopied) 1065 for(jt=0; jt<nBestTrace; jt++) { 1066 trace1[jt]=bestTrace1[jt]; 1067 trace2[jt]=bestTrace2[jt]; 1068 traceLen[jt]=bestTraceLen[jt]; 1069 } 1070 isCopied=false; 1071 1072 // Move an atom from the previous trace to the current on, or vice versa 1073 traceLen[it-1]+=idir; 1074 traceLen[it]-=idir; 1075 trace1[it]+=idir; 1076 trace2[it]+=idir; 1077 1078 // Copy atoms from the current trace into strBuf 1079 is=0; 1080 for(jt=0; jt<nBestTrace; jt++) { 1081 for(int i=0; i<traceLen[jt]; i++) { 1082 if(ca1[trace1[jt]+i].getX()>1e10 || ca2[trace2[jt]+i].getX()>1e10) 1083 continue main_loop; 1084 strBuf1[is+i]=ca1[trace1[jt]+i]; 1085 strBuf2[is+i]=ca2[trace2[jt]+i]; 1086 } 1087 is+=traceLen[jt]; 1088 } 1089 // Check new RMSD 1090 //sup_str(strBuf1, strBuf2, strLen, d_); 1091 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen, true); 1092 //System.out.println(String.format("step %d %d %d %.2f old: %.2f", it, idir, idep, rmsdNew, rmsd)); 1093 1094 // Update best trace if RMSD improved 1095 if(rmsdNew<rmsd) { 1096 1097 for(jt=0; jt<nBestTrace; jt++) { 1098 bestTrace1[jt] = trace1[jt]; 1099 bestTrace2[jt] = trace2[jt]; 1100 bestTraceLen[jt]= traceLen[jt]; 1101 } 1102 isCopied=true; 1103 wasBest=true; 1104 rmsd=rmsdNew; 1105 continue inner_loop; 1106 } 1107 // AP 1108 //bad_ca: break; 1109 continue main_loop; 1110 } 1111 } 1112 } 1113 rmsdNew=calc_rmsd(strBuf1, strBuf2, strLen,true); 1114 if ( isPrint) 1115 System.out.println("rmsdNew: " + rmsdNew + " rmsd " + rmsd); 1116 afpChain.setTotalRmsdIni(rmsdNew); 1117 afpChain.setTotalLenIni(strBuf1.length); 1118 1119 1120 nAtom = strLen; 1121 1122 //System.out.println("zStrAlign: " + winSize + " strLen " + strLen + " s/w " + (strLen/winSize) + " " + bestTraceScore + " " + nGaps); 1123 z=zStrAlign(winSize, strLen/winSize, bestTraceScore, nGaps); 1124 1125 if(params.isShowAFPRanges()) { 1126 System.out.println("win size: " + winSize + " strLen/winSize: " + strLen/winSize + " best trace score: " + String.format("%.2f",bestTraceScore) + " nr gaps: " + nGaps + " nr residues: " + nAtom); 1127 1128 System.out.println(String.format("size=%d rmsd=%.2f z=%.1f gaps=%d(%.1f%%) comb=%d", 1129 nAtom, rmsd, z, nGaps, nGaps*100.0/nAtom, 1130 nTraces)); 1131 1132 System.out.println("Best Trace, before optimization"); 1133 for(int k=0; k<nBestTrace; k++) 1134 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1135 bestTraceLen[k])); 1136 1137 } 1138 1139 // start to convert CE internal datastructure to generic AFPChain one... 1140 List<AFP> afpSet = new ArrayList<AFP>(); 1141 for (int afp=0;afp<nBestTrace;afp++){ 1142 // fill in data from nBestTrace into AFP 1143 1144 AFP afpI = new AFP(); 1145 1146 afpI.setFragLen(bestTraceLen[afp]); 1147 afpI.setP1(bestTrace1[afp]+1); 1148 afpI.setP2(bestTrace2[afp]+1); 1149 1150 afpSet.add( afpI); 1151 } 1152 1153 afpChain.setAfpSet(afpSet); 1154 1155 1156 1157 1158 //System.out.println("z:"+z + " zThr" + zThr+ " bestTraceScore " + bestTraceScore + " " + nGaps ); 1159 if(z>=zThr) { 1160 nGaps = optimizeSuperposition(afpChain,nse1, nse2, strLen, rmsd, ca1, ca2,nGaps,strBuf1,strBuf2); 1161 // if(isPrint) { 1162 // /* 1163 // FILE *f=fopen("homologies", "a"); 1164 // fprintf(f, "%s(%d) %s(%d) %3d %4.1f %4.1f %d(%d) ", 1165 // name1, nse1, name2, nse2, nAtom, rmsd, z, 1166 // nGaps, nGaps*100/nAtom); 1167 // for(int k=0; k<nBestTrace; k++) 1168 // fprintf(f, "(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, 1169 // bestTraceLen[k]); 1170 // fprintf(f, "\n"); 1171 // fclose(f); 1172 // */ 1173 // } 1174 } 1175 else { 1176 int lali_x_ = 0; 1177 for(int k=0; k<nBestTrace; k++) { 1178 for(int l=0; l<bestTraceLen[k]; l++) { 1179 align_se1[lcmp+l]=bestTrace1[k]+l; 1180 align_se2[lcmp+l]=bestTrace2[k]+l; 1181 } 1182 lali_x_+=bestTraceLen[k]; 1183 if(k<nBestTrace-1) { 1184 if(bestTrace1[k]+bestTraceLen[k]!=bestTrace1[k+1]) 1185 for(int l=bestTrace1[k]+bestTraceLen[k]; l<bestTrace1[k+1]; l++) { 1186 align_se1[lcmp]=l; 1187 align_se2[lcmp]=-1; 1188 lcmp++; 1189 } 1190 if(bestTrace2[k]+bestTraceLen[k]!=bestTrace2[k+1]) 1191 for(int l=bestTrace2[k]+bestTraceLen[k]; l<bestTrace2[k+1]; l++) { 1192 align_se1[lcmp]=-1; 1193 align_se2[lcmp]=l; 1194 lcmp++; 1195 } 1196 } 1197 } 1198 nAtom=lali_x_; 1199 } 1200 1201 timeEnd = System.currentTimeMillis(); 1202 long time_q=(timeEnd-timeStart); 1203 1204 double gapsP = ( nGaps*100.0/nAtom) ; 1205 if(isPrint) { 1206 String msg = String.format("Alignment length = %d Rmsd = %.2fA Z-Score = %.1f Gaps = %d(%.1f%%)",nAtom,rmsd,z,nGaps, gapsP); 1207 System.out.println(msg + " CPU = " + time_q); 1208 } 1209 1210 // if ( params.isShowAFPRanges()){ 1211 1212 // this is actually the final alignment... 1213 // System.out.println("Best Trace: (index1,index2,len)"); 1214 // for(int k=0; k<nBestTrace; k++) 1215 // System.out.println( 1216 // String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1, bestTraceLen[k])); 1217 // 1218 // 1219 // 1220 // } 1221 1222 afpChain.setCalculationTime(time_q); 1223 afpChain.setGapLen(nGaps); 1224 1225 int[] optLen = new int[]{nAtom}; 1226 afpChain.setOptLen(optLen); 1227 afpChain.setOptLength(nAtom); 1228 afpChain.setAlnLength(lcmp); 1229 1230 afpChain.setProbability(z); 1231 1232 1233 } 1234 1235 /** set the Atoms for a particular residue position. 1236 * Requires that atom.getParent returns the correct group! 1237 * take care during cloning of atoms. Best to use StructureTools.cloneCaAtoms(); 1238 * 1239 * @param strBuf 1240 * @param i 1241 * @param ca 1242 * @param j 1243 */ 1244 private void setStrBuf(Atom[] strBuf, int i, Atom[] ca, int j) { 1245 // TODO Auto-generated method stub 1246 //TODO 1247 Group parent = ca[j].getGroup(); 1248 int pos = 0; 1249 String atomName = ca[j].getName(); 1250 1251 Atom a = null; 1252 1253 a= parent.getAtom(atomName); 1254 if ( a != null){ 1255 strBuf[i]=a; 1256 } else { 1257 // probably a GLY and no CB was found... 1258 //e.printStackTrace(); 1259 } 1260 strBuf[i+pos] = a; 1261 pos++; 1262 1263 1264 1265 } 1266 1267 // TODO: consider all requested Atoms? 1268 private double getRMSDForBestTrace(int ir, Atom[] strBuf1, Atom[] strBuf2, 1269 int[] bestTracesN2, int[][] bestTraces12, int[] bestTrace22, 1270 int winSize,Atom[] ca1, Atom[] ca2 ) throws StructureException { 1271 int is=0; 1272 for(int jt=0; jt<bestTracesN[ir]; jt++) { 1273 for(int i=0; i<winSize; i++) { 1274 1275 setStrBuf(strBuf1, is+i, ca1, bestTraces1[ir][jt]+i); 1276 setStrBuf(strBuf2, is+i, ca2, bestTraces2[ir][jt]+i); 1277 } 1278 is+=winSize; 1279 } 1280 //sup_str(strBuf1, strBuf2, bestTracesN[ir]*winSize, d_); 1281 double rmsdNew=calc_rmsd(strBuf1, strBuf2, bestTracesN[ir]*winSize, true); 1282 return rmsdNew; 1283 1284 } 1285 1286 1287 1288 1289 1290 1291 /** calc initial RMSD for bestTrace1 in debug only 1292 * 1293 */ 1294 private void checkPrintRmsdNew(int traceMaxSize, int winSize, Atom[] ca1, Atom[] ca2) throws StructureException{ 1295 1296 int is = 0; 1297 // calc initial RMSD for bestTrace1 1298 Atom[] strBuf1 = new Atom[traceMaxSize]; 1299 Atom[] strBuf2 = new Atom[traceMaxSize]; 1300 for(int jt=0; jt<nBestTrace; jt++) { 1301 for(int i=0; i<winSize; i++) { 1302 setStrBuf(strBuf1, is+i, ca1, bestTrace1[jt]+i ); 1303 setStrBuf(strBuf2, is+i, ca2, bestTrace2[jt]+i ); 1304 } 1305 is+=winSize; 1306 } 1307 1308 //sup_str(strBuf1, strBuf2, nBestTrace*winSize, d_); 1309 double rmsdNew=calc_rmsd(strBuf1, strBuf2, nBestTrace*winSize, true); 1310 //afpChain.setTotalRmsdIni(rmsdNew); 1311 1312 1313 if ( isPrint){ 1314 System.out.println("rmsdNew after trace: " +rmsdNew); 1315 1316 for(int k=0; k<nBestTrace; k++) 1317 System.out.println(String.format("(%d,%d,%d) ", bestTrace1[k]+1, bestTrace2[k]+1,8)); 1318 } 1319 1320 if ( isPrint){ 1321 System.out.println("best traces: " + nBestTraces); 1322 } 1323 1324 1325 } 1326 1327 1328 1329 1330 1331 1332 private static char getOneLetter(Group g){ 1333 1334 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 1335 1336 return StructureTools.get1LetterCode(g.getPDBName()); 1337 } 1338 1339 1340 1341 private int optimizeSuperposition(AFPChain afpChain, int nse1, int nse2, int strLen, double rmsd, Atom[] ca1, Atom[] ca2,int nGaps, 1342 Atom[] strBuf1, Atom[] strBuf2 ) throws StructureException { 1343 1344 //System.out.println("optimizing Superimposition..."); 1345 1346 //nAtom=strLen; 1347 // optimization on superposition 1348 Atom[] ca3=new Atom[nse2]; 1349 1350 1351 double rmsdLen = 0.0; 1352 1353 // this flag tests if the RMSDLen has been assigned. 1354 // this is to enforce that the alignment ends up 1355 // smaller than 95% of the original alignment. 1356 // +/- 1357 boolean isRmsdLenAssigned=false; 1358 int nAtomPrev=-1; 1359 1360 double oRmsdThr = params.getORmsdThr(); 1361 1362 double distanceIncrement = params.getDistanceIncrement(); 1363 double maxUserRMSD = params.getMaxOptRMSD(); 1364 nAtom=0; 1365 int counter = -1; 1366 1367 int maxNrIterations = params.getMaxNrIterationsForOptimization(); 1368 while((nAtom<strLen*0.95 || 1369 (isRmsdLenAssigned && rmsd<rmsdLen*1.1 && nAtomPrev!=nAtom)) && ( counter< maxNrIterations)) { 1370 1371 counter++; 1372 if ( debug) 1373 System.out.println("nAtom: " + nAtom + " " + nAtomPrev + " " + rmsdLen + " " + isRmsdLenAssigned + " strLen:" + strLen); 1374 nAtomPrev=nAtom; 1375 oRmsdThr += distanceIncrement; 1376 1377 rot_mol(ca2, ca3, nse2, r,t); 1378 1379 for(int ise1=0; ise1<nse1; ise1++) { 1380 for(int ise2=0; ise2<nse2; ise2++) { 1381 1382 //mat[ise1][ise2]=-0.001; 1383 1384 // this needs to be a parameter... 1385 1386 1387 double dist = getDistanceWithSidechain(ca1[ise1], ca3[ise2]); 1388 mat[ise1][ise2] = oRmsdThr - dist; 1389 1390 //double distold = Calc.getDistance(ca1[ise1],ca3[ise2]); 1391 //double scoreOld = oRmsdThr - distold ; 1392 //mat[ise1][ise2] = scoreOld; 1393 //mat[ise1][ise2] = oRmsdThr - Calc.getDistance(ca1[ise1],ca3[ise2]); 1394 1395 //if ( counter == 0 && ise1 == ise2) { 1396 1397 // System.out.println("mat[" + ise1 + "][" + ise2 + "] " + mat[ise1][ise2] + " scoreOld:" + scoreOld + " oRmsdThr: " + oRmsdThr +" dist: " + dist + " distold:" + distold ); 1398 // } 1399 1400 1401 } 1402 } 1403 1404 if ( params.getScoringStrategy() == CeParameters.ScoringStrategy.SEQUENCE_CONSERVATION){ 1405 mat = updateMatrixWithSequenceConservation(mat,ca1,ca2, params); 1406 } 1407 1408 mat = notifyMatrixListener(mat); 1409 1410 double gapOpen = params.getGapOpen(); 1411 double gapExtension = params.getGapExtension(); 1412 1413 double score = dpAlign( nse1, nse2, gapOpen , gapExtension , false, false); 1414 1415 if (debug) 1416 System.out.println("iter: "+ counter + " score:" + score + " " + " nAtomPrev: " + nAtomPrev + " nAtom:" + nAtom + " oRmsdThr: " + oRmsdThr); 1417 1418 afpChain.setAlignScore(score); 1419 1420 1421 nAtom=0; nGaps=0; 1422 for(int ia=0; ia<lcmp; ia++) { 1423 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1424 1425 strBuf1[nAtom]=ca1[align_se1[ia]]; 1426 strBuf2[nAtom]=ca2[align_se2[ia]]; 1427 1428 nAtom++; 1429 1430 } 1431 else { 1432 nGaps++; 1433 } 1434 } 1435 1436 1437 1438 if(nAtom<4) continue; 1439 1440 //sup_str(strBuf1, strBuf2, nAtom, _d); 1441 // here we don't store the rotation matrix for the user! 1442 rmsd= calc_rmsd(strBuf1, strBuf2, nAtom,false); 1443 if ( isPrint ) 1444 System.out.println("iter: " + counter + " nAtom " + nAtom + " rmsd: " + rmsd); 1445 //afpChain.setTotalRmsdOpt(rmsd); 1446 //System.out.println("rmsd: " + rmsd); 1447 1448 if(!(nAtom<strLen*0.95) && (!isRmsdLenAssigned)) { 1449 rmsdLen=rmsd; 1450 isRmsdLenAssigned=true; 1451 } 1452 //System.out.println(String.format("nAtom %d %d rmsd %.1f", nAtom, nAtomPrev, rmsd)); 1453 1454 1455 afpChain.setBlockRmsd(new double[]{rmsd}); 1456 afpChain.setOptRmsd(new double[]{rmsd}); 1457 afpChain.setTotalRmsdOpt(rmsd); 1458 afpChain.setChainRmsd(rmsd); 1459 1460 if ( rmsd >= maxUserRMSD) { 1461 break; 1462 } 1463 1464 } 1465 1466 1467 1468 //System.out.println("done optimizing"); 1469 /* 1470 nAtom=0; nGaps=0; 1471 for(int ia=0; ia<lcmp; ia++) 1472 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1473 if(ca1[align_se1[ia]].X<1e10 && ca2[align_se2[ia]].X<1e10) { 1474 strBuf1[nAtom]=ca1[align_se1[ia]]; 1475 strBuf2[nAtom]=ca2[align_se2[ia]]; 1476 nAtom++; 1477 } 1478 } 1479 else { 1480 nGaps++; 1481 } 1482 1483 sup_str(strBuf1, strBuf2, nAtom, _d); 1484 rmsd=calc_rmsd(strBuf1, strBuf2, nAtom, _d); 1485 */ 1486 nBestTrace=0; 1487 boolean newBestTrace=true; 1488 for(int ia=0; ia<lcmp; ia++) { 1489 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 1490 //System.out.println(" " +align_se1[ia] + " " + align_se2[ia]); 1491 1492 if(newBestTrace) { 1493 bestTrace1[nBestTrace]=align_se1[ia]; 1494 bestTrace2[nBestTrace]=align_se2[ia]; 1495 bestTraceLen[nBestTrace]=0; 1496 newBestTrace=false; 1497 nBestTrace++; 1498 } 1499 bestTraceLen[nBestTrace-1]++; 1500 1501 } 1502 else { 1503 newBestTrace=true; 1504 } 1505 } 1506 1507 return nGaps; 1508 1509 // end of optimization on superposition 1510 1511 } 1512 1513 /** Modifies an alignment matrix by favoring the alignment of similar and identical amino acids and penalizing the alignment of unrelated ones. 1514 * 1515 * @param max alignment matrix 1516 * @param ca1 Atoms for protein 1 1517 * @param ca2 Atoms for Protein 2 1518 * @param params alignment parameters 1519 * @return modified alignment matrix 1520 */ 1521 public static double[][] updateMatrixWithSequenceConservation(double[][] max, Atom[] ca1, Atom[] ca2, CeParameters params) { 1522 Matrix origM = new Matrix(max); 1523 1524 SubstitutionMatrix<AminoAcidCompound> substMatrix = 1525 params.getSubstitutionMatrix(); 1526 1527 int internalScale = 1; 1528 if ( substMatrix instanceof ScaledSubstitutionMatrix) { 1529 ScaledSubstitutionMatrix scaledMatrix = (ScaledSubstitutionMatrix) substMatrix; 1530 internalScale = scaledMatrix.getScale(); 1531 } 1532 1533 1534 AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 1535 1536 for (int i = 0 ; i < origM.getRowDimension() ; i++){ 1537 for ( int j =0; j < origM.getColumnDimension() ; j ++ ) { 1538 double val = origM.get(i,j); 1539 Atom a1 = ca1[i]; 1540 Atom a2 = ca2[j]; 1541 1542 AminoAcidCompound ac1 = 1543 set.getCompoundForString(a1.getGroup().getChemComp().getOne_letter_code()); 1544 AminoAcidCompound ac2 = 1545 set.getCompoundForString(a2.getGroup().getChemComp().getOne_letter_code()); 1546 1547 1548 if ( ac1 == null || ac2 == null) 1549 continue; 1550 1551 short aaScore = substMatrix.getValue(ac1,ac2); 1552 1553 double weightedScore = (aaScore / internalScale) * params.getSeqWeight(); 1554 1555 1556 val += weightedScore; 1557 origM.set(i,j,val); 1558 1559 } 1560 } 1561 max = origM.getArray(); 1562 1563 //SymmetryTools.showMatrix((Matrix)origM.clone(), "in optimizer " + loopCount ); 1564 //SymmetryTools.showMatrix(origM, "iteration matrix " + loopCount + " after"); 1565 1566 return max; 1567 } 1568 1569 private double[][] notifyMatrixListener(double[][] mat2) { 1570 for (MatrixListener li : matrixListeners) { 1571 mat2 = li.matrixInOptimizer(mat2); 1572 } 1573 return mat2; 1574 } 1575 1576 private boolean[][] notifyBreakFlagListener(boolean[][] brkFlag){ 1577 for (MatrixListener li : matrixListeners) { 1578 brkFlag = li.initializeBreakFlag(brkFlag); 1579 } 1580 return brkFlag; 1581 } 1582 1583 public void addMatrixListener(MatrixListener li){ 1584 matrixListeners.add(li); 1585 } 1586 1587 1588 /** 1589 * On input, mat[i][j] should give the score for aligning positions i and j. 1590 * On output, mat[i][j] gives the maximum score possible for aligning 1..i 1591 * of protein 1 with 1..j of protein 2. 1592 * 1593 * @param nSeq1 The length of protein 1 (mat.length) 1594 * @param nSeq2 The length of protein 2 (mat[0].length) 1595 * @param gapI gap initiation penalty 1596 * @param gapE gap extension penalty 1597 * @param isGlobal1 The alignment is global for protein 1 1598 * @param isGlobal2 The alignment is global for protein 2 1599 * @return The maximum score 1600 */ 1601 private double dpAlign(int nSeq1, int nSeq2, double gapI, double gapE, 1602 boolean isGlobal1, boolean isGlobal2) { 1603 1604 // isGlobal1,isGlobal2 are always false... 1605 1606 int i, j, is, js, iMax, jMax, k; 1607 boolean ge=(gapE!=0.0?true:false); 1608 double sum, sum_ret, sum_brk; 1609 1610 boolean[][] brk_flg=new boolean [nSeq1][nSeq2]; 1611 for(i=0; i<nSeq1; i++) brk_flg[i]=new boolean [nSeq2]; 1612 1613 brk_flg = notifyBreakFlagListener(brk_flg); 1614 1615 // ge = true here... 1616 /* 1617 for(i=0; i<nSeq1; i++) 1618 { 1619 printf("\n"); 1620 for(j=0; j<nSeq2; j++) 1621 { 1622 printf("%4d", (int)(*(mat[i]+j)*10)); 1623 } 1624 } 1625 printf("\n\n\n"); 1626 */ 1627 if(!ge) 1628 { 1629 for(i=nSeq1-1; i>=0; i--) 1630 for(j=nSeq2-1; j>=0; j--) 1631 { 1632 brk_flg[i][j]=false; 1633 if(j<nSeq2-1 && i<nSeq1-1) 1634 { 1635 sum=mat[i+1][j+1]; 1636 } 1637 else 1638 { 1639 sum=0.0; 1640 if((isGlobal1 && i!=nSeq1-1) || (isGlobal2 && j!=nSeq2-1)) 1641 sum=-gapI; 1642 } 1643 if(j+1<nSeq2) 1644 for(k=i+2; k<nSeq1; k++) 1645 { 1646 if(mat[k][j+1]-gapI>sum) 1647 sum=mat[k][j+1]-gapI; 1648 } 1649 if(i+1<nSeq1) 1650 for(k=j+2; k<nSeq2; k++) 1651 { 1652 if(mat[i+1][k]-gapI>sum) 1653 sum=mat[i+1][k]-gapI; 1654 } 1655 sum+=mat[i][j]; 1656 sum_brk=(isGlobal1?-gapI:0.0)+(isGlobal2?-gapI:0.0); 1657 if(sum<sum_brk) 1658 { 1659 sum=sum_brk; 1660 brk_flg[i][j]=true; 1661 //System.out.println("break at: " + i + " " + j); 1662 } 1663 mat[i][j]=sum; 1664 } 1665 } 1666 else 1667 { 1668 for(i=nSeq1-1; i>=0; i--) 1669 for(j=nSeq2-1; j>=0; j--) 1670 { 1671 brk_flg[i][j]=false; 1672 if(j<nSeq2-1 && i<nSeq1-1) 1673 { 1674 sum=mat[i+1][j+1]; 1675 } 1676 else 1677 { 1678 sum=0.0; 1679 if(isGlobal1 && i!=nSeq1-1) sum=-gapI-gapE*(nSeq1-i-1); 1680 if(isGlobal2 && j!=nSeq2-1) sum=-gapI-gapE*(nSeq2-j-1); 1681 } 1682 if(j+1<nSeq2) 1683 for(k=i+2; k<nSeq1; k++) 1684 if(mat[k][j+1]-gapI-gapE*(k-i-1)>sum) 1685 sum=mat[k][j+1]-gapI-gapE*(k-i-1); 1686 if(i+1<nSeq1) 1687 for(k=j+2; k<nSeq2; k++) 1688 if(mat[i+1][k]-gapI-gapE*(k-j-1)>sum) 1689 sum=mat[i+1][k]-gapI-gapE*(k-j-1); 1690 sum+=mat[i][j]; 1691 sum_brk=(isGlobal1?(-gapI-gapE*(nSeq1-1-i)):0.0)+(isGlobal2?(-gapI-gapE*(nSeq2-1-j)):0.0); 1692 if(sum<sum_brk) 1693 { 1694 sum=sum_brk; 1695 brk_flg[i][j]=true; 1696 } 1697 mat[i][j]=sum; 1698 } 1699 } 1700 1701 // if (debug ){ 1702 // ScaleableMatrixPanel smp = new ScaleableMatrixPanel(); 1703 // JFrame frame = new JFrame("CE alignment matrix in dpAlign " ); 1704 // frame.addWindowListener(new WindowAdapter(){ 1705 // public void windowClosing(WindowEvent e){ 1706 // JFrame f = (JFrame) e.getSource(); 1707 // f.setVisible(false); 1708 // f.dispose(); 1709 // } 1710 // 1711 // }); 1712 // 1713 // smp.getMatrixPanel().setScalevalue(100); 1714 // Matrix mx = (Matrix) new Matrix(mat).clone(); 1715 // smp.setMatrix(mx); 1716 // 1717 // frame.getContentPane().add(smp); 1718 // 1719 // frame.pack(); 1720 // frame.setVisible(true); 1721 // } 1722 1723 /* 1724 for(i=0; i<nSeq1; i++) 1725 { 1726 printf("\n"); 1727 for(j=0; j<nSeq2; j++) 1728 { 1729 printf("%4d", (int)(*(mat[i]+j)*10)); 1730 } 1731 } 1732 printf("\n\n\n"); 1733 for(i=0; i<nSeq1; i++) 1734 { 1735 printf("\n"); 1736 for(j=0; j<nSeq2; j++) 1737 { 1738 printf("%4d", (int)(*(brk_flg[i]+j))); 1739 } 1740 } 1741 // exit(0); 1742 */ 1743 1744 is=0; js=0; lcmp=0; 1745 // no nc-end penalty - begin 1746 sum_ret=mat[0][0]; 1747 1748 // look for the highest score in mat[i][j] 1749 for(i=0; i<nSeq1; i++) 1750 for(j=0; j<nSeq2; j++) 1751 { 1752 if(i==0 && j==0) continue; 1753 sum=mat[i][j]; 1754 if(isGlobal1) sum+=-gapI-gapE*i; 1755 if(isGlobal2) sum+=-gapI-gapE*j; 1756 if(sum>sum_ret) 1757 { 1758 sum_ret=sum; 1759 is=i; js=j; 1760 } 1761 } 1762 1763 //System.out.println("start at " + is + " " + js); 1764 //for(k=0; k<is; k++) align1[k]=-1; 1765 //for(k=0; k<js; k++) align2[k]=-1; 1766 // no nc-end penalty - end 1767 1768 for(i=is, j=js; i<nSeq1 && j<nSeq2; i++, j++) 1769 { 1770 iMax=i; jMax=j; 1771 sum=mat[i][j]; 1772 if(!ge) 1773 { 1774 for(k=i+1; k<nSeq1; k++) 1775 if(mat[k][j]-gapI>sum) 1776 { 1777 iMax=k; jMax=j; 1778 sum=mat[k][j]-gapI; 1779 } 1780 1781 for(k=j+1; k<nSeq2; k++) 1782 if(mat[i][k]-gapI>sum) 1783 { 1784 iMax=i; jMax=k; 1785 sum=mat[i][k]-gapI; 1786 } 1787 } 1788 else 1789 { 1790 for(k=i+1; k<nSeq1; k++) 1791 if(mat[k][j]-gapI-gapE*(k-i)>sum) 1792 { 1793 //System.out.println("gap1 " + k + " " + j + " " + sum + "<" +(mat[k][j]-gapI-gapE*(k-i))); 1794 iMax=k; jMax=j; 1795 sum=mat[k][j]-gapI-gapE*(k-i); 1796 } 1797 1798 for(k=j+1; k<nSeq2; k++) 1799 if(mat[i][k]-gapI-gapE*(k-j)>sum) 1800 { 1801 //System.out.println("gap2 " + i + " " + k + " " + sum + "<"+ (mat[i][k]-gapI-gapE*(k-j))); 1802 iMax=i; jMax=k; 1803 sum=mat[i][k]-gapI-gapE*(k-j); 1804 } 1805 } 1806 1807 //if ( i != iMax || j != jMax ) 1808 // System.out.println("FOUND GAP AT: " + i+ " " + iMax + " " + j + " " + jMax); 1809 1810 //System.out.println(" iMax " + iMax + " jMax " + jMax); 1811 // set the gap positions: 1812 //printf("%d %d\n", iMax, jMax); 1813 1814 1815 for(k=i; k<iMax; k++, i++) { 1816 align_se1[lcmp]=k; 1817 align_se2[lcmp]=-1; 1818 1819 1820 lcmp++; 1821 } 1822 1823 for(k=j; k<jMax; k++, j++) { 1824 align_se1[lcmp]=-1; 1825 align_se2[lcmp]=k; 1826 1827 1828 lcmp++; 1829 } 1830 1831 1832 align_se1[lcmp]=iMax; 1833 align_se2[lcmp]=jMax; 1834 lcmp++; 1835 1836 if(brk_flg[i][j]) { 1837 //System.out.println("hit break flag at: " + i + " " + j + " sum " + sum_ret + " lcmp " + lcmp); 1838 break; 1839 1840 } 1841 } 1842 1843 1844 return sum_ret; 1845 } 1846 1847 1848 1849 1850 private void rot_mol(Atom[] caA, Atom[] caB, int nse2, Matrix m , Atom shift) throws StructureException{ 1851 1852 1853 1854 for(int l=0; l<nse2; l++) { 1855 Atom a = caA[l]; 1856 Group g = (Group)a.getGroup().clone(); 1857 //Group g = (Group)a.getParent(); 1858 1859 Calc.rotate( g, m); 1860 Calc.shift( g, shift); 1861 caB[l] = g.getAtom(a.getName()); 1862 } 1863 } 1864 1865 //void rot_mol(XYZ *molA, XYZ *molB, int nAtom, double d_[20] ) { 1866 // double dx, dy, dz; 1867 // for(int l=0; l<nAtom; l++) { 1868 // if(molA[l].X<1e10) { 1869 // dx=molA[l].X; 1870 // dy=molA[l].Y; 1871 // dz=molA[l].Z; 1872 // molB[l].X=dx*d_[0]+dy*d_[1]+dz*d_[2]+d_[9]; 1873 // molB[l].Y=dx*d_[3]+dy*d_[4]+dz*d_[5]+d_[10]; 1874 // molB[l].Z=dx*d_[6]+dy*d_[7]+dz*d_[8]+d_[11]; 1875 // } 1876 // else { 1877 // molB[l]=molA[l]; 1878 // } 1879 // } 1880 // } 1881 // 1882 1883 1884 /** superimpose and get rmsd 1885 * 1886 * @param pro1 1887 * @param pro2 1888 * @param strLen 1889 * @param storeTransform 1890 * @param show Ignored. Formerly displayed the superposition with jmol. 1891 * @return RMSD 1892 * @throws StructureException 1893 * @deprecated Use {@link #calc_rmsd(Atom[],Atom[],int,boolean)} instead 1894 */ 1895 @Deprecated 1896 public double calc_rmsd(Atom[] pro1, Atom[] pro2, int strLen, boolean storeTransform, boolean show) throws StructureException { 1897 return calc_rmsd(pro1, pro2, strLen, storeTransform); 1898 } 1899 1900 /** superimpose and get rmsd 1901 * 1902 * @param pro1 1903 * @param pro2 1904 * @param strLen Number of atoms from pro1 and pro2 to use 1905 * @param storeTransform Store rotation and shift matrices locally 1906 * @return RMSD 1907 * @throws StructureException 1908 */ 1909 public double calc_rmsd(Atom[] pro1, Atom[] pro2, int strLen, boolean storeTransform) throws StructureException { 1910 1911 Atom[] cod1 = getAtoms(pro1, strLen,false); 1912 Atom[] cod2 = getAtoms(pro2, strLen,true); 1913 1914 assert(cod1.length == cod2.length); 1915 SVDSuperimposer svd = new SVDSuperimposer(cod1, cod2); 1916 1917 Matrix matrix = svd.getRotation(); 1918 Atom shift = svd.getTranslation(); 1919 1920 if ( storeTransform) { 1921 r=matrix; 1922 t = shift; 1923 } 1924 for (Atom a : cod2){ 1925 Calc.rotate(a.getGroup(), matrix); 1926 Calc.shift(a.getGroup(), shift); 1927 } 1928 return SVDSuperimposer.getRMS(cod1, cod2); 1929 1930 } 1931 1932 /** 1933 * Copies the first length atoms from the input array 1934 * @param ca The array to copy 1935 * @param length the number of atoms to copy 1936 * @param clone If true, preform a deep copy, cloning the underlying Groups 1937 * @return An array with the first length items of ca, possibly cloning the Atoms. 1938 * @throws StructureException 1939 */ 1940 private Atom[] getAtoms(Atom[] ca, int length, boolean clone) throws StructureException{ 1941 1942 List<Atom> atoms = new ArrayList<Atom>(); 1943 for ( int i = 0 ; i < length ; i++){ 1944 1945 Atom a; 1946 if ( clone ){ 1947 Group g = (Group)ca[i].getGroup().clone(); 1948 a = g.getAtom(ca[i].getName()); 1949 } 1950 else { 1951 a = ca[i]; 1952 } 1953 atoms.add(a); 1954 } 1955 return atoms.toArray(new Atom[atoms.size()]); 1956 } 1957 1958 1959 1960 private void noBestTrace(){ 1961 1962 if(isPrint) { 1963 timeEnd = System.currentTimeMillis(); 1964 long time_q=(timeEnd-timeStart); 1965 1966 String msg = String.format("size=0 time=%d comb=%d\n", (int)(time_q), nTraces); 1967 System.out.println(msg); 1968 } 1969 } 1970 1971 1972 1973 private double zToP(double z) { 1974 int ind=(int)(z/0.1); 1975 if(ind<0) ind=0; 1976 if(ind>149) ind=149; 1977 return(tableZtoP[ind]); 1978 } 1979 /////////////////////////////////////////////////////////////////////////// 1980 private double pToZ(double p) { 1981 int ind=(int)(-Math.log10(p)*3.0); 1982 if(ind<0) ind=0; 1983 if(ind>149) ind=149; 1984 return(tablePtoZ[ind]); 1985 } 1986 /////////////////////////////////////////////////////////////////////////// 1987 private double zByZ(double z1, double z2) { 1988 double p1=zToP(z1); 1989 double p2=zToP(z2); 1990 return(pToZ(p1*p2)); 1991 } 1992 1993 protected double zStrAlign(int winSize, int nTrace, double score, int nGaps) { 1994 double z1=zScore(winSize, nTrace, score); 1995 double z2=zGaps(winSize, nTrace, nGaps); 1996 return(zByZ(z1, z2)); 1997 } 1998 1999 double zScore(int winSize, int nTrace, double score) { 2000 2001 if(winSize==8) { 2002 2003 if(nTrace<1) return(0.0); 2004 2005 double scoreAv_, scoreSd_; 2006 if(nTrace<21) { 2007 scoreAv_=scoreAv8[nTrace-1]; 2008 scoreSd_=scoreSd8[nTrace-1]; 2009 } 2010 else { 2011 scoreAv_=0.209874*nTrace+2.944714; 2012 scoreSd_=0.039487*nTrace+0.675735; 2013 } 2014 if(score>scoreAv_) return(0.0); 2015 return((scoreAv_-score)/scoreSd_); 2016 } 2017 2018 if(winSize==6) { 2019 2020 if(nTrace<1) return(0.0); 2021 2022 double scoreAv_, scoreSd_; 2023 if(nTrace<21) { 2024 scoreAv_=scoreAv6[nTrace-1]; 2025 scoreSd_=scoreSd6[nTrace-1]; 2026 } 2027 else { 2028 scoreAv_=0.198534*nTrace+2.636477; 2029 scoreSd_=0.040922*nTrace+0.715636; 2030 } 2031 if(score>scoreAv_) return(0.0); 2032 return((scoreAv_-score)/scoreSd_); 2033 } 2034 2035 return(0.0); 2036 2037 } 2038 /////////////////////////////////////////////////////////////////////////// 2039 double zGaps(int winSize, int nTrace, int nGaps) { 2040 2041 if(nTrace<2) return(0.0); 2042 double scoreAv_, scoreSd_; 2043 2044 if(winSize==8) { 2045 if(nTrace<21) { 2046 scoreAv_=gapsAv8[nTrace-1]; 2047 scoreSd_=gapsSd8[nTrace-1]; 2048 } 2049 else { 2050 scoreAv_=14.949173*nTrace-14.581193; 2051 scoreSd_=2.045067*nTrace+13.191095; 2052 } 2053 if(nGaps>scoreAv_) return(0.0); 2054 return((scoreAv_-nGaps)/scoreSd_); 2055 } 2056 2057 if(winSize==6) { 2058 if(nTrace<21) { 2059 scoreAv_=gapsAv6[nTrace-1]; 2060 scoreSd_=gapsSd6[nTrace-1]; 2061 } 2062 else { 2063 scoreAv_=13.574490*nTrace-13.977223; 2064 scoreSd_=1.719977*nTrace+19.615014; 2065 } 2066 if(nGaps>scoreAv_) return(0.0); 2067 return((scoreAv_-nGaps)/scoreSd_); 2068 } 2069 2070 return(0.0); 2071 } 2072 2073 private static final double scoreAv8[]={2.54, 2.51, 2.72, 3.01, 3.31, 3.61, 3.90, 4.19, 4.47, 4.74, 2074 4.99, 5.22, 5.46, 5.70, 5.94, 6.13, 6.36, 6.52, 6.68, 6.91}; 2075 private static final double scoreSd8[]={1.33, 0.88, 0.73, 0.71, 0.74, 0.80, 0.86, 0.92, 0.98, 1.04, 2076 1.08, 1.10, 1.15, 1.19, 1.23, 1.25, 1.32, 1.34, 1.36, 1.45}; 2077 private static final double gapsAv8[]={0.00, 11.50, 23.32, 35.95, 49.02, 62.44, 76.28, 90.26, 2078 104.86, 119.97, 134.86, 150.54, 164.86, 179.57, 194.39, 2079 209.38, 224.74, 238.96, 253.72, 270.79}; 2080 private static final double gapsSd8[]={0.00, 9.88, 14.34, 17.99, 21.10, 23.89, 26.55, 29.00, 31.11, 2081 33.10, 35.02, 36.03, 37.19, 38.82, 41.04, 43.35, 45.45, 2082 48.41, 50.87, 52.27}; 2083 private static final double scoreAv6[]={1.98, 1.97, 2.22, 2.54, 2.87, 3.18, 3.48, 3.77, 4.05, 4.31, 2084 4.57, 4.82, 5.03, 5.24, 5.43, 5.64, 5.82, 6.02, 6.21, 6.42}; 2085 private static final double scoreSd6[]={1.15, 0.73, 0.63, 0.64, 0.71, 0.80, 0.87, 0.95, 1.01, 1.07, 2086 1.13, 1.19, 1.22, 1.25, 1.28, 1.32, 1.35, 1.39, 1.45, 1.50}; 2087 private static final double gapsAv6[]={0.00, 10.12, 20.25, 31.29, 42.95, 55.20, 67.53, 80.15, 2088 93.30, 106.47, 120.52, 134.38, 148.59, 162.58, 176.64, 2089 191.23, 204.12, 218.64, 231.82, 243.43}; 2090 private static final double gapsSd6[]={0.00, 9.80, 14.44, 18.14, 21.35, 24.37, 27.00, 29.68, 32.22, 2091 34.37, 36.65, 38.63, 40.31, 42.16, 43.78, 44.98, 47.08, 2092 49.09, 50.78, 52.15}; 2093 2094 2095 private static final double tableZtoP[]={ 2096 1.0, 9.20e-01,8.41e-01,7.64e-01,6.89e-01,6.17e-01,5.49e-01,4.84e-01,4.24e-01,3.68e-01, 2097 3.17e-01,2.71e-01,2.30e-01,1.94e-01,1.62e-01,1.34e-01,1.10e-01,8.91e-02,7.19e-02,5.74e-02, 2098 4.55e-02,3.57e-02,2.78e-02,2.14e-02,1.64e-02,1.24e-02,9.32e-03,6.93e-03,5.11e-03,3.73e-03, 2099 2.70e-03,1.94e-03,1.37e-03,9.67e-04,6.74e-04,4.65e-04,3.18e-04,2.16e-04,1.45e-04,9.62e-05, 2100 6.33e-05,4.13e-05,2.67e-05,1.71e-05,1.08e-05,6.80e-06,4.22e-06,2.60e-06,1.59e-06,9.58e-07, 2101 5.73e-07,3.40e-07,1.99e-07,1.16e-07,6.66e-08,3.80e-08,2.14e-08,1.20e-08,6.63e-09,3.64e-09, 2102 1.97e-09,1.06e-09,5.65e-10,2.98e-10,1.55e-10,8.03e-11,4.11e-11,2.08e-11,1.05e-11,5.20e-12, 2103 2.56e-12,1.25e-12,6.02e-13,2.88e-13,1.36e-13,6.38e-14,2.96e-14,1.36e-14,6.19e-15,2.79e-15, 2104 1.24e-15,5.50e-16,2.40e-16,1.04e-16,4.46e-17,1.90e-17,7.97e-18,3.32e-18,1.37e-18,5.58e-19, 2105 2.26e-19,9.03e-20,3.58e-20,1.40e-20,5.46e-21,2.10e-21,7.99e-22,3.02e-22,1.13e-22,4.16e-23, 2106 1.52e-23,5.52e-24,1.98e-24,7.05e-25,2.48e-25,8.64e-26,2.98e-26,1.02e-26,3.44e-27,1.15e-27, 2107 3.82e-28,1.25e-28,4.08e-29,1.31e-29,4.18e-30,1.32e-30,4.12e-31,1.27e-31,3.90e-32,1.18e-32, 2108 3.55e-33,1.06e-33,3.11e-34,9.06e-35,2.61e-35,7.47e-36,2.11e-36,5.91e-37,1.64e-37,4.50e-38, 2109 1.22e-38,3.29e-39,8.77e-40,2.31e-40,6.05e-41,1.56e-41,4.00e-42,1.02e-42,2.55e-43,6.33e-44, 2110 1.56e-44,3.80e-45,9.16e-46,2.19e-46,5.17e-47,1.21e-47,2.81e-48,6.45e-49,1.46e-49,3.30e-50}; 2111 private static final double tablePtoZ[]={ 2112 0.00,0.73,1.24,1.64,1.99,2.30,2.58,2.83,3.07,3.29, 2113 3.50,3.70,3.89,4.07,4.25,4.42,4.58,4.74,4.89,5.04, 2114 5.19,5.33,5.46,5.60,5.73,5.86,5.99,6.11,6.23,6.35, 2115 6.47,6.58,6.70,6.81,6.92,7.02,7.13,7.24,7.34,7.44, 2116 7.54,7.64,7.74,7.84,7.93,8.03,8.12,8.21,8.30,8.40, 2117 8.49,8.57,8.66,8.75,8.84,8.92,9.01,9.09,9.17,9.25, 2118 9.34,9.42,9.50,9.58,9.66,9.73,9.81,9.89,9.97,10.04, 2119 10.12,10.19,10.27,10.34,10.41,10.49,10.56,10.63,10.70,10.77, 2120 10.84,10.91,10.98,11.05,11.12,11.19,11.26,11.32,11.39,11.46, 2121 11.52,11.59,11.66,11.72,11.79,11.85,11.91,11.98,12.04,12.10, 2122 12.17,12.23,12.29,12.35,12.42,12.48,12.54,12.60,12.66,12.72, 2123 12.78,12.84,12.90,12.96,13.02,13.07,13.13,13.19,13.25,13.31, 2124 13.36,13.42,13.48,13.53,13.59,13.65,13.70,13.76,13.81,13.87, 2125 13.92,13.98,14.03,14.09,14.14,14.19,14.25,14.30,14.35,14.41, 2126 14.46,14.51,14.57,14.62,14.67,14.72,14.77,14.83,14.88,14.93}; 2127 2128 /** copy data from this class into AFPChain container object. 2129 * 2130 * @param afpChain 2131 * @param ca1 2132 * @param ca2 2133 */ 2134 public void convertAfpChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2) { 2135 2136 afpChain.setBlockNum(1); 2137 //afpChain.setAlignScore(z); 2138 Matrix[] m ; 2139 2140 if ( r != null ) { 2141 m = new Matrix[1]; 2142 m[0] = r; 2143 } else { 2144 m = new Matrix[0]; 2145 } 2146 2147 Atom[] as ; 2148 if ( t != null) { 2149 as = new Atom[1]; 2150 as[0] = t; 2151 } else { 2152 as = new Atom[0]; 2153 } 2154 2155 afpChain.setBlockRotationMatrix(m); 2156 afpChain.setBlockShiftVector(as); 2157 2158 int nse1 = ca1.length; 2159 int nse2 = ca2.length; 2160 //System.out.println("dist1 :" + dist1.length + " " + dist2.length); 2161 2162 if ( nse1 > 0 && dist1.length > 0 ) 2163 afpChain.setDisTable1(new Matrix(dist1)); 2164 else 2165 afpChain.setDisTable1 (Matrix.identity(3, 3)); 2166 if ( nse2 > 0 && dist2.length > 0 ) 2167 afpChain.setDisTable2(new Matrix(dist2)); 2168 else 2169 afpChain.setDisTable2(Matrix.identity(3, 3)); 2170 2171 2172 char[] alnseq1 = new char[nse1+nse2+1]; 2173 char[] alnseq2 = new char[nse1+nse2+1] ; 2174 char[] alnsymb = new char[nse1+nse2+1]; 2175 2176 int[][][] optAln = new int[1][2][nAtom]; 2177 afpChain.setOptAln(optAln); 2178 2179 int pos = 0; 2180 int nrIdent = 0; 2181 int nrSim = 0; 2182 for(int ia=0; ia<lcmp; ia++) { 2183 2184 // no gap 2185 if(align_se1[ia]!=-1 && align_se2[ia]!=-1) { 2186 //System.out.println("ia " + ia + " pos " + pos + " " + align_se1[ia] + " " + align_se2[ia]); 2187 optAln[0][0][pos] = align_se1[ia]; 2188 optAln[0][1][pos] = align_se2[ia]; 2189 2190 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2191 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2192 2193 alnseq1[ia] = Character.toUpperCase(l1); 2194 alnseq2[ia] = Character.toUpperCase(l2); 2195 alnsymb[ia] = ' '; 2196 if ( l1 == l2) { 2197 nrIdent++; 2198 nrSim++; 2199 alnsymb[ia] = '|'; 2200 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 2201 nrSim++; 2202 alnsymb[ia] = ':'; 2203 } 2204 2205 pos++; 2206 2207 } else { 2208 // there is a gap at this position 2209 alnsymb[ia] = ' '; 2210 if (align_se1[ia] == -1 ) { 2211 alnseq1[ia] = '-'; 2212 } else { 2213 char l1 = getOneLetter(ca1[align_se1[ia]].getGroup()); 2214 alnseq1[ia] = Character.toUpperCase(l1); 2215 } 2216 if ( align_se2[ia] == -1 ) { 2217 alnseq2[ia] = '-'; 2218 } else { 2219 char l2 = getOneLetter(ca2[align_se2[ia]].getGroup()); 2220 alnseq2[ia] = Character.toUpperCase(l2); 2221 } 2222 2223 } 2224 } 2225 2226 2227 afpChain.setAlnseq1(alnseq1); 2228 afpChain.setAlnseq2(alnseq2); 2229 afpChain.setAlnsymb(alnsymb); 2230 2231 2232 // CE uses the aligned pairs as reference not the whole alignment including gaps... 2233 if ( pos > 0) { 2234 afpChain.setIdentity(nrIdent*1.0/pos); 2235 afpChain.setSimilarity(nrSim*1.0/pos); 2236 } else { 2237 afpChain.setIdentity(0); 2238 afpChain.setSimilarity(0); 2239 } 2240 2241 //AFPAlignmentDisplay.getAlign( afpChain,ca1,ca2); 2242 2243 } 2244 2245 public int getnAtom() { 2246 return nAtom; 2247 } 2248 2249 2250 2251 public void setnAtom(int nAtom) { 2252 this.nAtom = nAtom; 2253 } 2254 2255 2256 2257 public int getLcmp() { 2258 return lcmp; 2259 } 2260 2261 2262 2263 public void setLcmp(int lcmp) { 2264 this.lcmp = lcmp; 2265 } 2266 2267 2268 2269 public int[] getAlign_se1() { 2270 return align_se1; 2271 } 2272 2273 2274 2275 public void setAlign_se1(int[] alignSe1) { 2276 align_se1 = alignSe1; 2277 } 2278 2279 2280 2281 public int[] getAlign_se2() { 2282 return align_se2; 2283 } 2284 2285 2286 2287 public void setAlign_se2(int[] alignSe2) { 2288 align_se2 = alignSe2; 2289 } 2290 2291 /** 2292 * Caution: this matrix is overwriten with very different data at several 2293 * points in the alignment algorithm. After 2294 * {@link #initSumOfDistances(int, int, int, int, Atom[], Atom[]) initSumOfDistances} 2295 * is run, this will hold the distance matrix between AFPs. 2296 * @return mat 2297 */ 2298 public double[][] getMatMatrix() { 2299 return mat; 2300 } 2301 2302 public void setMatMatrix(double[][] matrix){ 2303 mat = matrix; 2304 } 2305 2306 /** 2307 * Gets the rotation matrix from the last call to 2308 * {@link #calc_rmsd(Atom[], Atom[], int, boolean) calc_rmsd}. 2309 * @return The rotatiokn matrix 2310 */ 2311 public Matrix getRotationMatrix() { 2312 return r; 2313 } 2314 2315 /** 2316 * Gets the shift from the last call to 2317 * {@link #calc_rmsd(Atom[], Atom[], int, boolean) calc_rmsd}. 2318 * @return The shift 2319 */ 2320 public Atom getShift() { 2321 return t; 2322 } 2323 2324 public double[][] getDist1() { 2325 return dist1; 2326 } 2327 2328 public void setDist1(double[][] dist1) { 2329 this.dist1 = dist1; 2330 } 2331 2332 public double[][] getDist2() { 2333 return dist2; 2334 } 2335 2336 public void setDist2(double[][] dist2) { 2337 this.dist2 = dist2; 2338 } 2339 2340 2341}