001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Mar 9, 2010 021 * Author: Spencer Bliven 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027 028import org.biojava.nbio.structure.Atom; 029import org.biojava.nbio.structure.StructureException; 030import org.biojava.nbio.structure.StructureTools; 031import org.biojava.nbio.structure.align.model.AFPChain; 032import org.biojava.nbio.structure.align.util.AFPChainScorer; 033import org.biojava.nbio.structure.align.util.AtomCache; 034import org.biojava.nbio.structure.jama.Matrix; 035 036import java.lang.reflect.InvocationTargetException; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.List; 040import java.util.Scanner; 041 042/** 043 * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding 044 * circular permutations. 045 * <p> 046 * A circular permutation consists of a single cleavage point and rearrangement 047 * between two structures, for example: 048 * <pre> 049 * ABCDEFG 050 * DEFGABC 051 * </pre> 052 * @author Spencer Bliven. 053 * 054 */ 055public class OptimalCECPMain extends CeMain { 056 private static final boolean debug = true; 057 058 059 public static final String algorithmName = "jCE Optimal Circular Permutation"; 060 061 /** 062 * version history: 063 * 1.0 - Initial version 064 */ 065 public static final String version = "1.0"; 066 067 protected OptimalCECPParameters params; 068 069 /** 070 * 071 */ 072 public OptimalCECPMain() { 073 super(); 074 params = new OptimalCECPParameters(); 075 } 076 077 @Override 078 public String getAlgorithmName() { 079 return OptimalCECPMain.algorithmName; 080 } 081 082 @Override 083 public String getVersion() { 084 return OptimalCECPMain.version; 085 } 086 087 /** 088 * @return an {@link OptimalCECPParameters} object 089 */ 090 @Override 091 public ConfigStrucAligParams getParameters() { 092 return params; 093 } 094 095 /** 096 * @param params Should be an {@link OptimalCECPParameters} object specifying alignment options 097 */ 098 @Override 099 public void setParameters(ConfigStrucAligParams params){ 100 if (! (params instanceof OptimalCECPParameters )){ 101 throw new IllegalArgumentException("provided parameter object is not of type CeParameter"); 102 } 103 this.params = (OptimalCECPParameters) params; 104 } 105 106 /** 107 * Circularly permutes arr in place. 108 * 109 * <p>Similar to {@link Collections#rotate(List, int)} but with reversed 110 * direction. Perhaps it would be more efficient to use the Collections version? 111 * @param <T> 112 * @param arr The array to be permuted 113 * @param cp The number of residues to shift leftward, or equivalently, the index of 114 * the first element after the permutation point. 115 */ 116 private static <T> void permuteArray(T[] arr, int cp) { 117 // Allow negative cp points for convenience. 118 if(cp == 0) { 119 return; 120 } 121 if(cp < 0) { 122 cp = arr.length+cp; 123 } 124 if(cp < 0 || cp >= arr.length) { 125 throw new ArrayIndexOutOfBoundsException( 126 "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" ); 127 } 128 129 List<T> temp = new ArrayList<T>(cp); 130 131 // shift residues left 132 for(int i=0;i<cp;i++) { 133 temp.add(arr[i]); 134 } 135 for(int j=cp;j<arr.length;j++) { 136 arr[j-cp]=arr[j]; 137 } 138 for(int i=0;i<cp;i++) { 139 arr[arr.length-cp+i] = temp.get(i); 140 } 141 } 142 143 /** 144 * Circularly permutes arr in place. 145 * 146 * <p>Similar to {@link Collections#rotate(List, int)} but with reversed 147 * direction. Perhaps it would be more efficient to use the Collections version? 148 * @param <T> 149 * @param arr The array to be permuted 150 * @param cp The number of residues to shift leftward, or equivalently, the index of 151 * the first element after the permutation point. 152 * 153 private static void permuteArray(int[] arr, int cp) { 154 // Allow negative cp points for convenience. 155 if(cp == 0) { 156 return; 157 } 158 if(cp < 0) { 159 cp = arr.length+cp; 160 } 161 if(cp < 0 || cp >= arr.length) { 162 throw new ArrayIndexOutOfBoundsException( 163 "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" ); 164 } 165 166 List<Integer> temp = new ArrayList<Integer>(cp); 167 168 // shift residues left 169 for(int i=0;i<cp;i++) { 170 temp.add(arr[i]); 171 } 172 for(int j=cp;j<arr.length;j++) { 173 arr[j-cp]=arr[j]; 174 } 175 for(int i=0;i<cp;i++) { 176 arr[arr.length-cp+i] = temp.get(i); 177 } 178 } 179 */ 180 181 /** 182 * Aligns ca1 with ca2 permuted by <i>cp</i> residues. 183 * <p><strong>WARNING:</strong> Modifies ca2 during the permutation. Be sure 184 * to make a copy before calling this method. 185 * 186 * @param ca1 187 * @param ca2 188 * @param param 189 * @param cp 190 * @return 191 * @throws StructureException 192 */ 193 public AFPChain alignPermuted(Atom[] ca1, Atom[] ca2, Object param, int cp) throws StructureException { 194 // initial permutation 195 permuteArray(ca2,cp); 196 197 // perform alignment 198 AFPChain afpChain = super.align(ca1, ca2, param); 199 200 // un-permute alignment 201 permuteAFPChain(afpChain, -cp); 202 203 if(afpChain.getName2() != null) { 204 afpChain.setName2(afpChain.getName2()+" CP="+cp); 205 } 206 207 // Specify the permuted 208 return afpChain; 209 } 210 211 /** 212 * Permute the second protein of afpChain by the specified number of residues. 213 * @param afpChain Input alignment 214 * @param cp Amount leftwards (or rightward, if negative) to shift the 215 * @return A new alignment equivalent to afpChain after the permutations 216 */ 217 private static void permuteAFPChain(AFPChain afpChain, int cp) { 218 219 int ca2len = afpChain.getCa2Length(); 220 221 //fix up cp to be positive 222 if(cp == 0) { 223 return; 224 } 225 if(cp < 0) { 226 cp = ca2len+cp; 227 } 228 if(cp < 0 || cp >= ca2len) { 229 throw new ArrayIndexOutOfBoundsException( 230 "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" ); 231 } 232 233 // Fix up optAln 234 permuteOptAln(afpChain,cp); 235 236 if(afpChain.getBlockNum() > 1) 237 afpChain.setSequentialAlignment(false); 238 // fix up matrices 239 // ca1 corresponds to row indices, while ca2 corresponds to column indices. 240 afpChain.setDistanceMatrix(permuteMatrix(afpChain.getDistanceMatrix(),0,-cp)); 241 // this is square, so permute both 242 afpChain.setDisTable2(permuteMatrix(afpChain.getDisTable2(),-cp,-cp)); 243 244 //TODO fix up other AFP parameters? 245 246 } 247 248 /** 249 * Permutes <i>mat</i> by moving the rows of the matrix upwards by <i>cp</i> 250 * rows. 251 * @param mat The original matrix 252 * @param cpRows Number of rows upward to move entries 253 * @param cpCols Number of columns leftward to move entries 254 * @return The permuted matrix 255 */ 256 private static Matrix permuteMatrix(Matrix mat, int cpRows, int cpCols) { 257 //fix up cp to be positive 258 if(cpRows == 0 && cpCols == 0) { 259 return mat.copy(); 260 } 261 if(cpRows < 0) { 262 cpRows = mat.getRowDimension()+cpRows; 263 } 264 if(cpRows < 0 || cpRows >= mat.getRowDimension()) { 265 throw new ArrayIndexOutOfBoundsException( String.format( 266 "Can't permute rows by %d: only %d rows.", 267 cpRows, mat.getRowDimension() ) 268 ); 269 } 270 271 if(cpCols < 0) { 272 cpCols = mat.getColumnDimension()+cpCols; 273 } 274 if(cpCols < 0 || cpCols >= mat.getColumnDimension()) { 275 throw new ArrayIndexOutOfBoundsException( String.format( 276 "Can't permute cols by %d: only %d rows.", 277 cpCols, mat.getColumnDimension() ) 278 ); 279 } 280 281 int[] rows = new int[mat.getRowDimension()]; 282 for(int i=0;i<rows.length;i++) { 283 rows[i] = (i+cpRows)%rows.length; 284 } 285 int[] cols = new int[mat.getColumnDimension()]; 286 for(int i=0;i<cols.length;i++) { 287 cols[i] = (i+cpCols)%cols.length; 288 } 289 290 Matrix newMat = mat.getMatrix(rows, cols); 291 assert(newMat.getRowDimension() == mat.getRowDimension()); 292 assert(newMat.getColumnDimension() == mat.getColumnDimension()); 293 assert(newMat.get(0, 0) == 294 mat.get(cpRows%mat.getRowDimension(), cpCols%mat.getColumnDimension())); 295 296 297 return newMat; 298 } 299 300 /** 301 * Modifies the {@link AFPChain#setOptAln(int[][][]) optAln} of an AFPChain 302 * by permuting the second protein. 303 * 304 * Sets residue numbers in the second protein to <i>(i-cp)%len</i> 305 * 306 * @param afpChain 307 * @param cp Amount leftwards (or rightward, if negative) to shift the 308 */ 309 private static void permuteOptAln(AFPChain afpChain, int cp) 310 { 311 int ca2len = afpChain.getCa2Length(); 312 313 if( ca2len <= 0) { 314 throw new IllegalArgumentException("No Ca2Length specified in "+afpChain); 315 } 316 317 // Allow negative cp points for convenience. 318 if(cp == 0) { 319 return; 320 } 321 if(cp <= -ca2len || cp >= ca2len) { 322 // could just take cp%ca2len, but probably its a bug if abs(cp)>=ca2len 323 throw new ArrayIndexOutOfBoundsException( String.format( 324 "Permutation point %d must be between %d and %d for %s", 325 cp, 1-ca2len,ca2len-1, afpChain.getName2() ) ); 326 } 327 if(cp < 0) { 328 cp = cp + ca2len; 329 } 330 331 // the unprocessed alignment 332 int[][][] optAln = afpChain.getOptAln(); 333 int[] optLen = afpChain.getOptLen(); 334 335 // the processed alignment 336 List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum()*2); 337 338 //Update residue indices 339 // newi = (oldi-cp) % N 340 for(int block = 0; block < afpChain.getBlockNum(); block++) { 341 if(optLen[block]<1) 342 continue; 343 344 // set up storage for the current block 345 List<List<Integer>> currBlock = new ArrayList<List<Integer>>(2); 346 currBlock.add( new ArrayList<Integer>()); 347 currBlock.add( new ArrayList<Integer>()); 348 blocks.add(currBlock); 349 350 // pos = 0 case 351 currBlock.get(0).add( optAln[block][0][0] ); 352 currBlock.get(1).add( (optAln[block][1][0]+cp ) % ca2len); 353 354 for(int pos = 1; pos < optLen[block]; pos++) { 355 //check if we need to start a new block 356 //this happens when the new alignment crosses the protein terminus 357 if( optAln[block][1][pos-1]+cp<ca2len && 358 optAln[block][1][pos]+cp >= ca2len) { 359 currBlock = new ArrayList<List<Integer>>(2); 360 currBlock.add( new ArrayList<Integer>()); 361 currBlock.add( new ArrayList<Integer>()); 362 blocks.add(currBlock); 363 } 364 currBlock.get(0).add( optAln[block][0][pos] ); 365 currBlock.get(1).add( (optAln[block][1][pos]+cp ) % ca2len); 366 } 367 } 368 369 // save permuted blocks to afpChain 370 assignOptAln(afpChain,blocks); 371 } 372 373 /** 374 * Sometimes it's convenient to store an alignment using java collections, 375 * where <tt>blocks.get(blockNum).get(0).get(pos)</tt> specifies the aligned 376 * residue at position <i>pos</i> of block <i>blockNum</i> of the first 377 * protein. 378 * 379 * This method takes such a collection and stores it into the afpChain's 380 * {@link AFPChain#setOptAln(int[][][]) optAln}, setting the associated 381 * length variables as well. 382 * 383 * @param afpChain 384 * @param blocks 385 */ 386 private static void assignOptAln(AFPChain afpChain, List<List<List<Integer>>> blocks) 387 { 388 389 int[][][] optAln = new int[blocks.size()][][]; 390 int[] optLen = new int[blocks.size()]; 391 int optLength = 0; 392 int numBlocks = blocks.size(); 393 394 for(int block = 0; block < numBlocks; block++) { 395 // block should be 2xN rectangular 396 assert(blocks.get(block).size() == 2); 397 assert( blocks.get(block).get(0).size() == blocks.get(block).get(1).size()); 398 399 optLen[block] = blocks.get(block).get(0).size(); 400 optLength+=optLen[block]; 401 402 optAln[block] = new int[][] { 403 new int[optLen[block]], 404 new int[optLen[block]] 405 }; 406 for(int pos = 0; pos < optLen[block]; pos++) { 407 optAln[block][0][pos] = blocks.get(block).get(0).get(pos); 408 optAln[block][1][pos] = blocks.get(block).get(1).get(pos); 409 } 410 } 411 412 413 afpChain.setBlockNum(numBlocks); 414 afpChain.setOptAln(optAln); 415 afpChain.setOptLen(optLen); 416 afpChain.setOptLength(optLength); 417 418 // TODO I don't know what these do. Should they be set? 419 //afpChain.setBlockSize(blockSize); 420 //afpChain.setBlockResList(blockResList); 421 //afpChain.setChainLen(chainLen); 422 423 } 424 425 /** 426 * Finds the optimal alignment between two proteins allowing for a circular 427 * permutation (CP). 428 * 429 * The precise algorithm is controlled by the 430 * {@link OptimalCECPParameters parameters}. If the parameter 431 * {@link OptimalCECPParameters#isTryAllCPs() tryAllCPs} is true, all possible 432 * CP sites are tried and the optimal site is returned. Otherwise, the 433 * {@link OptimalCECPParameters#getCPPoint() cpPoint} parameter is used to 434 * determine the CP point, greatly reducing the computation required. 435 * 436 * @param ca1 CA atoms of the first protein 437 * @param ca2 CA atoms of the second protein 438 * @param param {@link CeParameters} object 439 * @return The best-scoring alignment 440 * @throws StructureException 441 * 442 * @see #alignOptimal(Atom[], Atom[], Object, AFPChain[]) 443 */ 444 @Override 445 public AFPChain align(Atom[] ca1, Atom[] ca2, Object param) 446 throws StructureException 447 { 448 if(params.isTryAllCPs()) { 449 return alignOptimal(ca1,ca2,param,null); 450 } else { 451 int cpPoint = params.getCPPoint(); 452 return alignPermuted(ca1, ca2, param, cpPoint); 453 } 454 } 455 456 /** 457 * Finds the optimal alignment between two proteins allowing for a circular 458 * permutation (CP). 459 * 460 * This algorithm performs a CE alignment for each possible CP site. This is 461 * quite slow. Use {@link #alignHeuristic(Atom[], Atom[], Object)} for a 462 * faster algorithm. 463 * 464 * @param ca1 CA atoms of the first protein 465 * @param ca2 CA atoms of the second protein 466 * @param param {@link CeParameters} object 467 * @param alignments If not null, should be an empty array of the same length as 468 * ca2. This will be filled with the alignments from permuting ca2 by 469 * 0 to n-1 residues. 470 * @return The best-scoring alignment 471 * @throws StructureException 472 */ 473 public AFPChain alignOptimal(Atom[] ca1, Atom[] ca2, Object param, AFPChain[] alignments) 474 throws StructureException 475 { 476 long startTime = System.currentTimeMillis(); 477 478 if(alignments != null && alignments.length != ca2.length) { 479 throw new IllegalArgumentException("scores param should have same length as ca2"); 480 } 481 482 AFPChain unaligned = super.align(ca1, ca2, param); 483 AFPChain bestAlignment = unaligned; 484 485 if(debug) { 486 // print progress bar header 487 System.out.print("|"); 488 for(int cp=1;cp<ca2.length-1;cp++) { 489 System.out.print("="); 490 } 491 System.out.println("|"); 492 System.out.print("."); 493 } 494 495 if(alignments != null) { 496 alignments[0] = unaligned; 497 } 498 499 for(int cp=1;cp<ca2.length;cp++) { 500 // clone ca2 to prevent side effects from propegating 501 Atom[] ca2p = StructureTools.cloneAtomArray(ca2); 502 503 //permute one each time. Alters ca2p as a side effect 504 AFPChain currentAlignment = alignPermuted(ca1,ca2p,param,cp); 505 506 // increment progress bar 507 if(debug) System.out.print("."); 508 509 // fix up names, since cloning ca2 wipes it 510 511 if (ca2.length!=0 && ca2[0].getGroup().getChain()!=null && ca2[0].getGroup().getChain().getStructure()!=null) { 512 currentAlignment.setName2(ca2[0].getGroup().getChain().getStructure().getName()+" CP="+cp); 513 } 514 515 double currentScore = currentAlignment.getAlignScore(); 516 517 if(alignments != null) { 518 alignments[cp] = currentAlignment; 519 } 520 521 if(currentScore>bestAlignment.getAlignScore()) { 522 bestAlignment = currentAlignment; 523 } 524 } 525 if(debug) { 526 long elapsedTime = System.currentTimeMillis()-startTime; 527 System.out.println(); 528 System.out.format("%d alignments took %.4f s (%.1f ms avg)\n", 529 ca2.length, elapsedTime/1000., (double)elapsedTime/ca2.length); 530 } 531 532 533 return bestAlignment; 534 535 } 536 537 538 539 540 public static void main(String[] args){ 541 try { 542 String name1, name2; 543 544 //int[] cps= new int[] {}; 545 546 //Concanavalin 547 //name1 = "2pel.A"; 548 //name2 = "3cna"; 549 //cps = new int[] {122,0,3}; 550 551 //small case 552 name1 = "d1qdmA1"; 553 //name1 = "1QDM.A"; 554 name2 = "d1nklA_"; 555 /*cps = new int[] { 556 //41, // CECP optimum 557 19,59, // unpermuted local minima in TM-score 558 //39, // TM-score optimum 559 0, 560 };*/ 561 562 //1itb selfsymmetry 563 //name1 = "1ITB.A"; 564 //name2 = "1ITB.A"; 565 //cps = new int[] {92}; 566 567 name1=name2 = "2LSQ"; 568 569 OptimalCECPMain ce = new OptimalCECPMain(); 570 CeParameters params = (CeParameters) ce.getParameters(); 571 ce.setParameters(params); 572 573 AtomCache cache = new AtomCache(); 574 575 Atom[] ca1 = cache.getAtoms(name1); 576 Atom[] ca2 = cache.getAtoms(name2); 577 578 AFPChain afpChain; 579 580 581 582 // find optimal solution 583 AFPChain[] alignments = new AFPChain[ca2.length]; 584 afpChain = ce.alignOptimal(ca1, ca2, params, alignments); 585 System.out.format("Optimal Score: %.2f\n", afpChain.getAlignScore()); 586 587 System.out.println("Pos\tScore\tTMScore\tLen\tRMSD\tBlocks"); 588 for(int i = 0; i< alignments.length; i++) { 589 double tm = AFPChainScorer.getTMScore(alignments[i], ca1, ca2); 590 System.out.format("%d\t%.2f\t%.2f\t%d\t%.2f\t%d\n", 591 i, 592 alignments[i].getAlignScore(), 593 tm, 594 alignments[i].getOptLength(), 595 alignments[i].getTotalRmsdOpt(), 596 alignments[i].getBlockNum() 597 ); 598 } 599 600 //displayAlignment(afpChain,ca1,ca2); 601 602 // permuted alignment 603 //for(int cp : cps) { 604 // new copy of ca2, since alignPermuted has side effects 605 //Atom[] ca2clone = cache.getAtoms(name2); 606 //afpChain = ce.alignPermuted(ca1, ca2clone, params, cp); 607 //displayAlignment(afpChain, ca1, ca2); 608 609 //displayAlignment(alignments[cp],ca1,ca2); 610 //} 611 612 // CECP alignment 613 CeCPMain cecp = new CeCPMain(); 614 afpChain = cecp.align(ca1, ca2); 615 displayAlignment(afpChain,ca1,ca2); 616 617 System.out.println("Inspect additional alignments?"); 618 Scanner scanner = new Scanner(System.in); 619 System.out.print("CP location [0,"+ca2.length+"): "); 620 while(scanner.hasNext()) { 621 if(scanner.hasNextInt()) { 622 int cp = scanner.nextInt(); 623 624 if(0<=cp && cp<ca2.length) { 625 alignments[cp].setName1(name1); 626 alignments[cp].setName2(name2+"@"+cp); 627 displayAlignment(alignments[cp],ca1,ca2); 628 Thread.sleep(1000); 629 } 630 } else { 631 //String next = scanner.nextLine(); 632 } 633 System.out.print("CP location [0,"+ca2.length+"): "); 634 } 635 scanner.close(); 636 637 } catch (Exception e) { 638 e.printStackTrace(); 639 } 640 } 641 642 /** 643 * Try showing a the afpChain in a GUI. 644 * 645 * <p>requires additional dependencies biojava-structure-gui and JmolApplet 646 * 647 * @param afpChain 648 * @param ca1 649 * @param ca2 650 * @throws ClassNotFoundException 651 * @throws NoSuchMethodException 652 * @throws InvocationTargetException 653 * @throws IllegalAccessException 654 * @throws StructureException 655 */ 656 private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException { 657 Atom[] ca1clone = StructureTools.cloneAtomArray(ca1); 658 Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); 659 if (! GuiWrapper.isGuiModuleInstalled()) { 660 System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!"); 661 // display alignment in console 662 System.out.println(afpChain.toCE(ca1clone, ca2clone)); 663 } else { 664 Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone); 665 GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol); 666 } 667 } 668}