001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.internal; 022 023import java.io.FileWriter; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collections; 027import java.util.List; 028import java.util.Random; 029 030import org.biojava.nbio.structure.Atom; 031import org.biojava.nbio.structure.StructureException; 032import org.biojava.nbio.structure.align.multiple.Block; 033import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 034import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 035import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 036import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentTools; 037import org.biojava.nbio.structure.jama.Matrix; 038import org.biojava.nbio.structure.symmetry.utils.SymmetryTools; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042/** 043 * Optimizes a symmetry alignment by a Monte Carlo score optimization of the 044 * repeat multiple alignment. The superposition of the repeats is not free 045 * (felxible), because it is constrained on the symmetry axes found in the 046 * structure. This is the main difference to the {@link MultipleMC} algorithm in 047 * biojava. Another major difference is that the free Pool is shared for all 048 * repeats, so that no residue can appear to more than one repeat at a time. 049 * <p> 050 * This algorithm does not use a unfiform distribution for selecting moves, 051 * farther residues have more probability to be shrinked or gapped. This 052 * modification of the algorithm improves convergence and running time. 053 * <p> 054 * Use call method to parallelize optimizations, or use optimize method instead. 055 * Because gaps are allowed in the repeats, a {@link MultipleAlignment} format 056 * is returned. 057 * 058 * @author Aleix Lafita 059 * @since 4.1.1 060 * 061 */ 062public class SymmOptimizer { 063 064 private static final Logger logger = LoggerFactory 065 .getLogger(SymmOptimizer.class); 066 067 private Random rnd; 068 069 // Optimization parameters 070 private int Rmin = 2; // min aligned repeats per column 071 private int Lmin; // min repeat length 072 private int maxIter; // max iterations 073 private double C = 20; // probability of accept bad moves constant 074 075 // Score function parameters 076 private static final double Gopen = 20.0; // Penalty for opening gap 077 private static final double Gextend = 10.0; // Penalty for extending gaps 078 private double dCutoff; 079 080 // Alignment Information 081 private MultipleAlignment msa; 082 private SymmetryAxes axes; 083 private Atom[] atoms; 084 private int order; 085 private int length; // total alignment columns (block size) 086 private int repeatCore; // core length (without gaps) 087 088 // Aligned Residues and Score 089 private List<List<Integer>> block; // residues aligned 090 private List<Integer> freePool; // residues not aligned 091 private double mcScore; // alignment score to optimize 092 093 // Variables that store the history of the optimization - slower if on 094 private static final boolean history = false; 095 private static final int saveStep = 100; 096 private static final String pathToHistory = "results/symm-opt/"; 097 private List<Long> timeHistory; 098 private List<Integer> lengthHistory; 099 private List<Double> rmsdHistory; 100 private List<Double> tmScoreHistory; 101 private List<Double> mcScoreHistory; 102 103 /** 104 * Constructor with a seed MultipleAligment storing a refined symmetry 105 * alignment of the repeats. To perform the optimization use the call or 106 * optimize methods after instantiation. 107 * 108 * @param symmResult 109 * CeSymmResult with all the information 110 * @throws RefinerFailedException 111 * @throws StructureException 112 */ 113 public SymmOptimizer(CeSymmResult symmResult) { 114 115 this.axes = symmResult.getAxes(); 116 this.rnd = new Random(symmResult.getParams().getRndSeed()); 117 this.Lmin = symmResult.getParams().getMinCoreLength(); 118 this.dCutoff = symmResult.getParams().getDistanceCutoff(); 119 120 MultipleAlignmentEnsemble e = symmResult.getMultipleAlignment() 121 .getEnsemble().clone(); 122 this.msa = e.getMultipleAlignment(0); 123 124 this.atoms = msa.getAtomArrays().get(0); 125 this.order = msa.size(); 126 this.repeatCore = msa.getCoreLength(); 127 128 // 50% of the structures aligned (minimum) or all (no gaps) 129 if (symmResult.getParams().isGaps()) 130 Rmin = Math.max(order / 2, 2); 131 else 132 Rmin = order; 133 134 maxIter = symmResult.getParams().getOptimizationSteps(); 135 if (maxIter < 1) 136 maxIter = 100 * atoms.length; 137 } 138 139 private void initialize() throws StructureException, RefinerFailedException { 140 141 if (order == 1) 142 throw new RefinerFailedException( 143 "Non-symmetric seed alignment: order = 1"); 144 if (repeatCore < 1) 145 throw new RefinerFailedException( 146 "Seed alignment too short: repeat core length < 1"); 147 148 // Initialize the history variables 149 timeHistory = new ArrayList<Long>(); 150 lengthHistory = new ArrayList<Integer>(); 151 rmsdHistory = new ArrayList<Double>(); 152 mcScoreHistory = new ArrayList<Double>(); 153 tmScoreHistory = new ArrayList<Double>(); 154 155 C = 20 * order; 156 157 // Initialize alignment variables 158 block = msa.getBlock(0).getAlignRes(); 159 freePool = new ArrayList<Integer>(); 160 length = block.get(0).size(); 161 162 // Store the residues aligned in the block 163 List<Integer> aligned = new ArrayList<Integer>(); 164 for (int su = 0; su < order; su++) 165 aligned.addAll(block.get(su)); 166 167 // Add any residue not aligned to the free pool 168 for (int i = 0; i < atoms.length; i++) { 169 if (!aligned.contains(i)) 170 freePool.add(i); 171 } 172 checkGaps(); 173 174 // Set the MC score of the initial state (seed alignment) 175 updateMultipleAlignment(); 176 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 177 dCutoff); 178 } 179 180 /** 181 * Optimization method based in a Monte-Carlo approach. Starting from the 182 * refined alignment uses 4 types of moves: 183 * <p> 184 * <li>1- Shift Row: if there are enough freePool residues available. 185 * <li>2- Expand Block: add another alignment column. 186 * <li>3- Shrink Block: move a block column to the freePool. 187 * <li>4- Insert gap: insert a gap in a position of the alignment. 188 * 189 * @throws StructureException 190 * @throws RefinerFailedException 191 * if the alignment is not symmetric or too short. 192 */ 193 public MultipleAlignment optimize() throws StructureException, 194 RefinerFailedException { 195 196 initialize(); 197 198 // Save the optimal alignment 199 List<List<Integer>> optBlock = new ArrayList<List<Integer>>(); 200 List<Integer> optFreePool = new ArrayList<Integer>(); 201 optFreePool.addAll(freePool); 202 for (int k = 0; k < order; k++) { 203 List<Integer> b = new ArrayList<Integer>(); 204 b.addAll(block.get(k)); 205 optBlock.add(b); 206 } 207 double optScore = mcScore; 208 209 int conv = 0; // Number of steps without an alignment improvement 210 int i = 1; 211 int stepsToConverge = Math.max(maxIter / 50, 1000); 212 long initialTime = System.nanoTime()/1000000; 213 214 while (i < maxIter && conv < stepsToConverge) { 215 216 // Save the state of the system 217 List<List<Integer>> lastBlock = new ArrayList<List<Integer>>(); 218 List<Integer> lastFreePool = new ArrayList<Integer>(); 219 lastFreePool.addAll(freePool); 220 for (int k = 0; k < order; k++) { 221 List<Integer> b = new ArrayList<Integer>(); 222 b.addAll(block.get(k)); 223 lastBlock.add(b); 224 } 225 double lastScore = mcScore; 226 int lastRepeatCore = repeatCore; 227 228 boolean moved = false; 229 230 while (!moved) { 231 // Randomly select one of the steps to modify the alignment. 232 // Because of biased moves, the probabilities are not the same 233 double move = rnd.nextDouble(); 234 if (move < 0.4) { 235 moved = shiftRow(); 236 logger.debug("did shift"); 237 } else if (move < 0.7) { 238 moved = expandBlock(); 239 logger.debug("did expand"); 240 } else if (move < 0.85) { 241 moved = shrinkBlock(); 242 logger.debug("did shrink"); 243 } else { 244 moved = insertGap(); 245 logger.debug("did insert gap"); 246 } 247 } 248 249 // Get the properties of the new alignment 250 updateMultipleAlignment(); 251 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 252 dCutoff); 253 254 // Calculate change in the optimization Score 255 double AS = mcScore - lastScore; 256 double prob = 1.0; 257 258 if (AS < 0) { 259 260 // Probability of accepting bad move 261 prob = probabilityFunction(AS, i, maxIter); 262 double p = rnd.nextDouble(); 263 264 // Reject the move 265 if (p > prob) { 266 block = lastBlock; 267 freePool = lastFreePool; 268 length = block.get(0).size(); 269 repeatCore = lastRepeatCore; 270 mcScore = lastScore; 271 conv++; // no change in score if rejected 272 273 } else 274 conv = 0; // if accepted 275 276 } else 277 conv = 0; // if positive change 278 279 logger.debug(i + ": --prob: " + prob + ", --score: " + AS 280 + ", --conv: " + conv); 281 282 // Store as the optimal alignment if better 283 if (mcScore > optScore) { 284 optBlock = new ArrayList<List<Integer>>(); 285 optFreePool = new ArrayList<Integer>(); 286 optFreePool.addAll(freePool); 287 for (int k = 0; k < order; k++) { 288 List<Integer> b = new ArrayList<Integer>(); 289 b.addAll(block.get(k)); 290 optBlock.add(b); 291 } 292 optScore = mcScore; 293 } 294 295 if (history) { 296 if (i % saveStep == 1) { 297 // Get the correct superposition again 298 updateMultipleAlignment(); 299 300 timeHistory.add(System.nanoTime()/1000000 - initialTime); 301 lengthHistory.add(length); 302 rmsdHistory.add(msa.getScore(MultipleAlignmentScorer.RMSD)); 303 tmScoreHistory.add(msa 304 .getScore(MultipleAlignmentScorer.AVGTM_SCORE)); 305 mcScoreHistory.add(mcScore); 306 } 307 } 308 309 i++; 310 } 311 312 // Use the optimal alignment of the trajectory 313 block = optBlock; 314 freePool = optFreePool; 315 mcScore = optScore; 316 317 // Superimpose and calculate final scores 318 updateMultipleAlignment(); 319 msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore); 320 321 // Save the history to the results folder of the symmetry project 322 if (history) { 323 try { 324 saveHistory(pathToHistory); 325 } catch (Exception e) { 326 logger.warn("Could not save history file: " + e.getMessage()); 327 } 328 } 329 330 return msa; 331 } 332 333 /** 334 * This method translates the internal data structures to a 335 * MultipleAlignment of the repeats in order to use the methods to score 336 * MultipleAlignments. 337 * 338 * @throws StructureException 339 * @throws RefinerFailedException 340 */ 341 private void updateMultipleAlignment() throws StructureException, 342 RefinerFailedException { 343 344 msa.clear(); 345 346 // Override the alignment with the new information 347 Block b = msa.getBlock(0); 348 b.setAlignRes(block); 349 repeatCore = b.getCoreLength(); 350 if (repeatCore < 1) 351 throw new RefinerFailedException( 352 "Optimization converged to length 0"); 353 354 SymmetryTools.updateSymmetryTransformation(axes, msa); 355 } 356 357 /** 358 * Method that loops through all the alignment columns and checks that there 359 * are no more gaps than the maximum allowed: Rmin. 360 * <p> 361 * There must be at least Rmin residues different than null in every 362 * alignment column. In case there is a column with more gaps than allowed 363 * it will be shrinked (moved to freePool). 364 * 365 * @return true if any columns has been shrinked and false otherwise 366 */ 367 private boolean checkGaps() { 368 369 List<Integer> shrinkColumns = new ArrayList<Integer>(); 370 // Loop for each column 371 for (int res = 0; res < length; res++) { 372 int gapCount = 0; 373 // Loop for each repeat and count the gaps 374 for (int su = 0; su < order; su++) { 375 if (block.get(su).get(res) == null) 376 gapCount++; 377 } 378 if ((order - gapCount) < Rmin) { 379 // Add the column to the shrink list 380 shrinkColumns.add(res); 381 } 382 } 383 384 // Shrink the columns that have more gaps than allowed 385 for (int col = shrinkColumns.size() - 1; col >= 0; col--) { 386 for (int su = 0; su < order; su++) { 387 Integer residue = block.get(su).get(shrinkColumns.get(col)); 388 block.get(su).remove((int) shrinkColumns.get(col)); 389 if (residue != null) 390 freePool.add(residue); 391 Collections.sort(freePool); 392 } 393 length--; 394 } 395 396 if (shrinkColumns.size() != 0) 397 return true; 398 else 399 return false; 400 } 401 402 /** 403 * Insert a gap in one of the repeats into selected position (by higher 404 * distances) in the alignment. Calculates the average residue distance to 405 * make the choice. A gap is a null in the block. 406 * 407 * @throws StructureException 408 * @throws RefinerFailedException 409 */ 410 private boolean insertGap() throws StructureException, 411 RefinerFailedException { 412 413 // Let gaps only if the repeat is larger than the minimum length 414 if (repeatCore <= Lmin) 415 return false; 416 417 // Select residue by maximum distance 418 updateMultipleAlignment(); 419 Matrix residueDistances = MultipleAlignmentTools 420 .getAverageResidueDistances(msa); 421 422 double maxDist = Double.MIN_VALUE; 423 int su = 0; 424 int res = 0; 425 for (int col = 0; col < length; col++) { 426 for (int s = 0; s < order; s++) { 427 if (residueDistances.get(s, col) != -1) { 428 if (residueDistances.get(s, col) > maxDist) { 429 // geometric distribution 430 if (rnd.nextDouble() > 0.5) { 431 su = s; 432 res = col; 433 maxDist = residueDistances.get(s, col); 434 } 435 } 436 } 437 } 438 } 439 440 // Insert the gap at the position 441 Integer residueL = block.get(su).get(res); 442 if (residueL != null) { 443 freePool.add(residueL); 444 Collections.sort(freePool); 445 } else 446 return false; // If there was a gap already in the position. 447 448 block.get(su).set(res, null); 449 checkGaps(); 450 return true; 451 } 452 453 /** 454 * Move all the block residues of one repeat one position to the left or 455 * right and move the corresponding boundary residues from the freePool to 456 * the block, and viceversa. 457 * <p> 458 * The boundaries are determined by any irregularity (either a gap or a 459 * discontinuity in the alignment. 460 */ 461 private boolean shiftRow() { 462 463 int su = rnd.nextInt(order); // Select the repeat 464 int rl = rnd.nextInt(2); // Select between moving right (0) or left (1) 465 int res = rnd.nextInt(length); // Residue as a pivot 466 467 // When the pivot residue is null try to add a residue from the freePool 468 if (block.get(su).get(res) == null) { 469 470 int right = res; 471 int left = res; 472 // Find the boundary to the right abd left 473 while (block.get(su).get(right) == null && right < length - 1) { 474 right++; 475 } 476 while (block.get(su).get(left) == null && left > 0) { 477 left--; 478 } 479 480 // If they both are null the whole block is null 481 if (block.get(su).get(left) == null 482 && block.get(su).get(right) == null) { 483 return false; 484 } else if (block.get(su).get(left) == null) { 485 // Choose the sequentially previous residue of the known one 486 Integer residue = block.get(su).get(right) - 1; 487 if (freePool.contains(residue)) { 488 block.get(su).set(res, residue); 489 freePool.remove(residue); 490 } else 491 return false; 492 } else if (block.get(su).get(right) == null) { 493 // Choose the sequentially next residue of the known one 494 Integer residue = block.get(su).get(left) + 1; 495 if (freePool.contains(residue)) { 496 block.get(su).set(res, residue); 497 freePool.remove(residue); 498 } else 499 return false; 500 } else { 501 // If boundaries are consecutive swap null and position (R or L) 502 if (block.get(su).get(right) == block.get(su).get(left) + 1) { 503 switch (rl) { 504 case 0: // to the right 505 block.get(su).set(right - 1, block.get(su).get(right)); 506 block.get(su).set(right, null); 507 break; 508 case 1: // to the left 509 block.get(su).set(left + 1, block.get(su).get(left)); 510 block.get(su).set(left, null); 511 break; 512 } 513 } else { 514 // Choose randomly a residue in between left and right to 515 // add 516 Integer residue = rnd.nextInt(block.get(su).get(right) 517 - block.get(su).get(left) - 1) 518 + block.get(su).get(left) + 1; 519 520 if (freePool.contains(residue)) { 521 block.get(su).set(res, residue); 522 freePool.remove(residue); 523 } 524 } 525 } 526 return true; 527 } 528 529 // When the residue is different than null 530 switch (rl) { 531 case 0: // Move to the right 532 533 int leftBoundary = res - 1; 534 int leftPrevRes = res; 535 while (true) { 536 if (leftBoundary < 0) 537 break; 538 else { 539 if (block.get(su).get(leftBoundary) == null) { 540 break; // gap 541 } else if (block.get(su).get(leftPrevRes) > block.get(su) 542 .get(leftBoundary) + 1) { 543 break; // discontinuity 544 } 545 } 546 leftPrevRes = leftBoundary; 547 leftBoundary--; 548 } 549 leftBoundary++; 550 551 int rightBoundary = res + 1; 552 int rightPrevRes = res; 553 while (true) { 554 if (rightBoundary == length) 555 break; 556 else { 557 if (block.get(su).get(rightBoundary) == null) { 558 break; // gap 559 } else if (block.get(su).get(rightPrevRes) + 1 < block.get( 560 su).get(rightBoundary)) { 561 break; // discontinuity 562 } 563 } 564 rightPrevRes = rightBoundary; 565 rightBoundary++; 566 } 567 rightBoundary--; 568 569 // Residues at the boundary 570 Integer residueR0 = block.get(su).get(rightBoundary); 571 Integer residueL0 = block.get(su).get(leftBoundary); 572 573 // Remove residue at the right of the block and add to the freePool 574 block.get(su).remove(rightBoundary); 575 if (residueR0 != null) { 576 freePool.add(residueR0); 577 Collections.sort(freePool); 578 } 579 580 // Add the residue at the left of the block 581 residueL0 -= 1; // cannot be null, throw exception if it is 582 if (freePool.contains(residueL0)) { 583 block.get(su).add(leftBoundary, residueL0); 584 freePool.remove(residueL0); 585 } else { 586 block.get(su).add(leftBoundary, null); 587 } 588 break; 589 590 case 1: // Move to the left 591 592 int leftBoundary1 = res - 1; 593 int leftPrevRes1 = res; 594 while (true) { 595 if (leftBoundary1 < 0) 596 break; 597 else { 598 if (block.get(su).get(leftBoundary1) == null) { 599 break; // gap 600 } else if (block.get(su).get(leftPrevRes1) > block.get(su) 601 .get(leftBoundary1) + 1) { 602 break; // discontinuity 603 } 604 } 605 leftPrevRes1 = leftBoundary1; 606 leftBoundary1--; 607 } 608 leftBoundary1++; 609 610 int rightBoundary1 = res + 1; 611 int rightPrevRes1 = res; 612 while (true) { 613 if (rightBoundary1 == length) 614 break; 615 else { 616 if (block.get(su).get(rightBoundary1) == null) { 617 break; // gap 618 } else if (block.get(su).get(rightPrevRes1) + 1 < block 619 .get(su).get(rightBoundary1)) { 620 break; // discontinuity 621 } 622 } 623 rightPrevRes1 = rightBoundary1; 624 rightBoundary1++; 625 } 626 rightBoundary1--; 627 628 // Residues at the boundary 629 Integer residueR1 = block.get(su).get(rightBoundary1); 630 Integer residueL1 = block.get(su).get(leftBoundary1); 631 632 // Add the residue at the right of the block 633 residueR1 += 1; // cannot be null 634 if (freePool.contains(residueR1)) { 635 if (rightBoundary1 == length - 1) 636 block.get(su).add(residueR1); 637 else 638 block.get(su).add(rightBoundary1 + 1, residueR1); 639 freePool.remove(residueR1); 640 } else { 641 block.get(su).add(rightBoundary1 + 1, null); 642 } 643 644 // Remove the residue at the left of the block 645 block.get(su).remove(leftBoundary1); 646 freePool.add(residueL1); 647 Collections.sort(freePool); 648 break; 649 } 650 checkGaps(); 651 return true; 652 } 653 654 /** 655 * It extends the alignment one position to the right or to the left of a 656 * randomly selected position by moving the consecutive residues of each 657 * repeat (if present) from the freePool to the block. 658 * <p> 659 * If there are not enough residues in the freePool it introduces gaps. 660 */ 661 private boolean expandBlock() { 662 663 boolean moved = false; 664 665 int rl = rnd.nextInt(2); // Select between right (0) or left (1) 666 int res = rnd.nextInt(length); // Residue as a pivot 667 668 switch (rl) { 669 case 0: 670 671 int rightBoundary = res; 672 int[] previousPos = new int[order]; 673 for (int su = 0; su < order; su++) 674 previousPos[su] = -1; 675 676 // Search a position to the right that has at minimum Rmin 677 while (length - 1 > rightBoundary) { 678 int noncontinuous = 0; 679 for (int su = 0; su < order; su++) { 680 if (block.get(su).get(rightBoundary) == null) { 681 continue; 682 } else if (previousPos[su] == -1) { 683 previousPos[su] = block.get(su).get(rightBoundary); 684 } else if (block.get(su).get(rightBoundary) > previousPos[su] + 1) { 685 noncontinuous++; 686 } 687 } 688 if (noncontinuous < Rmin) 689 rightBoundary++; 690 else 691 break; 692 } 693 if (rightBoundary > 0) 694 rightBoundary--; 695 696 // Expand the block with the residues at the repeat boundaries 697 for (int su = 0; su < order; su++) { 698 Integer residueR = block.get(su).get(rightBoundary); 699 if (residueR == null) { 700 if (rightBoundary == length - 1) 701 block.get(su).add(null); 702 else 703 block.get(su).add(rightBoundary + 1, null); 704 } else if (freePool.contains(residueR + 1)) { 705 Integer residueAdd = residueR + 1; 706 if (rightBoundary == length - 1) { 707 block.get(su).add(residueAdd); 708 } else 709 block.get(su).add(rightBoundary + 1, residueAdd); 710 freePool.remove(residueAdd); 711 } else { 712 if (rightBoundary == length - 1) 713 block.get(su).add(null); 714 else 715 block.get(su).add(rightBoundary + 1, null); 716 } 717 } 718 length++; 719 moved = true; 720 break; 721 722 case 1: 723 724 int leftBoundary = res; 725 int[] nextPos = new int[order]; 726 for (int su = 0; su < order; su++) 727 nextPos[su] = -1; 728 729 // Search a position to the right that has at minimum Rmin 730 while (leftBoundary > 0) { 731 int noncontinuous = 0; 732 for (int su = 0; su < order; su++) { 733 if (block.get(su).get(leftBoundary) == null) { 734 continue; 735 } else if (nextPos[su] == -1) { 736 nextPos[su] = block.get(su).get(leftBoundary); 737 } else if (block.get(su).get(leftBoundary) < nextPos[su] - 1) { 738 noncontinuous++; 739 } 740 } 741 if (noncontinuous < Rmin) 742 leftBoundary--; 743 else 744 break; 745 } 746 747 // Expand the block with the residues at the repeat boundaries 748 for (int su = 0; su < order; su++) { 749 Integer residueL = block.get(su).get(leftBoundary); 750 if (residueL == null) { 751 block.get(su).add(leftBoundary, null); 752 } else if (freePool.contains(residueL - 1)) { 753 Integer residueAdd = residueL - 1; 754 block.get(su).add(leftBoundary, residueAdd); 755 freePool.remove(residueAdd); 756 } else { 757 block.get(su).add(leftBoundary, null); 758 } 759 } 760 length++; 761 moved = true; 762 break; 763 } 764 if (moved) 765 return !checkGaps(); 766 return moved; 767 } 768 769 /** 770 * Deletes an alignment column at a randomly selected position. 771 * 772 * @throws StructureException 773 * @throws RefinerFailedException 774 */ 775 private boolean shrinkBlock() throws StructureException, 776 RefinerFailedException { 777 778 // Let shrink moves only if the repeat is larger enough 779 if (repeatCore <= Lmin) 780 return false; 781 782 // Select column by maximum distance 783 updateMultipleAlignment(); 784 Matrix residueDistances = MultipleAlignmentTools 785 .getAverageResidueDistances(msa); 786 787 double maxDist = Double.MIN_VALUE; 788 double[] colDistances = new double[length]; 789 int res = 0; 790 for (int col = 0; col < length; col++) { 791 int normalize = 0; 792 for (int s = 0; s < order; s++) { 793 if (residueDistances.get(s, col) != -1) { 794 colDistances[col] += residueDistances.get(s, col); 795 normalize++; 796 } 797 } 798 colDistances[col] /= normalize; 799 if (colDistances[col] > maxDist) { 800 // geometric distribution 801 if (rnd.nextDouble() > 0.5) { 802 maxDist = colDistances[col]; 803 res = col; 804 } 805 } 806 } 807 808 for (int su = 0; su < order; su++) { 809 Integer residue = block.get(su).get(res); 810 block.get(su).remove(res); 811 if (residue != null) 812 freePool.add(residue); 813 Collections.sort(freePool); 814 } 815 length--; 816 checkGaps(); 817 return true; 818 } 819 820 /** 821 * Calculates the probability of accepting a bad move given the iteration 822 * step and the score change. 823 * <p> 824 * Function: p=(C-AS)/(C*sqrt(step)) Added a normalization factor so that 825 * the probability approaches 0 when the maxIter is reached. 826 */ 827 private double probabilityFunction(double AS, int m, int maxIter) { 828 829 double prob = (C + AS) / (C * Math.sqrt(m)); 830 double norm = (1 - (m * 1.0) / maxIter); // Normalization factor 831 return Math.min(Math.max(prob * norm, 0.0), 1.0); 832 } 833 834 /** 835 * Save the evolution of the optimization process as a csv file. 836 */ 837 private void saveHistory(String folder) throws IOException { 838 839 String name = msa.getStructureIdentifier(0).getIdentifier(); 840 FileWriter writer = new FileWriter(folder + name 841 + "-symm_opt.csv"); 842 writer.append("Step,Time,RepeatLength,RMSD,TMscore,MCscore\n"); 843 844 for (int i = 0; i < lengthHistory.size(); i++) { 845 writer.append(i * saveStep + ","); 846 writer.append(timeHistory.get(i) + ","); 847 writer.append(lengthHistory.get(i) + ","); 848 writer.append(rmsdHistory.get(i) + ","); 849 writer.append(tmScoreHistory.get(i) + ","); 850 writer.append(mcScoreHistory.get(i) + "\n"); 851 } 852 853 writer.flush(); 854 writer.close(); 855 } 856 857}