001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.internal; 022 023import java.io.FileWriter; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collections; 027import java.util.List; 028import java.util.Random; 029 030import org.biojava.nbio.structure.Atom; 031import org.biojava.nbio.structure.StructureException; 032import org.biojava.nbio.structure.align.multiple.Block; 033import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 034import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 035import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 036import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentTools; 037import org.biojava.nbio.structure.jama.Matrix; 038import org.biojava.nbio.structure.symmetry.utils.SymmetryTools; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042/** 043 * Optimizes a symmetry alignment by a Monte Carlo score optimization of the 044 * repeat multiple alignment. The superposition of the repeats is not free 045 * (felxible), because it is constrained on the symmetry axes found in the 046 * structure. This is the main difference to the {@link org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain} algorithm in 047 * biojava. Another major difference is that the free Pool is shared for all 048 * repeats, so that no residue can appear to more than one repeat at a time. 049 * <p> 050 * This algorithm does not use a unfiform distribution for selecting moves, 051 * farther residues have more probability to be shrinked or gapped. This 052 * modification of the algorithm improves convergence and running time. 053 * <p> 054 * Use call method to parallelize optimizations, or use optimize method instead. 055 * Because gaps are allowed in the repeats, a {@link MultipleAlignment} format 056 * is returned. 057 * 058 * @author Aleix Lafita 059 * @since 4.1.1 060 * 061 */ 062public class SymmOptimizer { 063 064 private static final Logger logger = LoggerFactory 065 .getLogger(SymmOptimizer.class); 066 067 private Random rnd; 068 069 // Optimization parameters 070 private int Rmin = 2; // min aligned repeats per column 071 private int Lmin; // min repeat length 072 private int maxIter; // max iterations 073 private double C = 20; // probability of accept bad moves constant 074 075 // Score function parameters 076 private static final double Gopen = 20.0; // Penalty for opening gap 077 private static final double Gextend = 10.0; // Penalty for extending gaps 078 private double dCutoff; 079 080 // Alignment Information 081 private MultipleAlignment msa; 082 private SymmetryAxes axes; 083 private Atom[] atoms; 084 private int order; 085 private int length; // total alignment columns (block size) 086 private int repeatCore; // core length (without gaps) 087 088 // Aligned Residues and Score 089 private List<List<Integer>> block; // residues aligned 090 private List<Integer> freePool; // residues not aligned 091 private double mcScore; // alignment score to optimize 092 093 // Variables that store the history of the optimization - slower if on 094 private static final boolean history = false; 095 private static final int saveStep = 100; 096 private static final String pathToHistory = "results/symm-opt/"; 097 private List<Long> timeHistory; 098 private List<Integer> lengthHistory; 099 private List<Double> rmsdHistory; 100 private List<Double> tmScoreHistory; 101 private List<Double> mcScoreHistory; 102 103 /** 104 * Constructor with a seed MultipleAlignment storing a refined symmetry 105 * alignment of the repeats. To perform the optimization use the call or 106 * optimize methods after instantiation. 107 * 108 * @param symmResult 109 * CeSymmResult with all the information 110 */ 111 public SymmOptimizer(CeSymmResult symmResult) { 112 113 this.axes = symmResult.getAxes(); 114 this.rnd = new Random(symmResult.getParams().getRndSeed()); 115 this.Lmin = symmResult.getParams().getMinCoreLength(); 116 this.dCutoff = symmResult.getParams().getDistanceCutoff(); 117 118 MultipleAlignmentEnsemble e = symmResult.getMultipleAlignment() 119 .getEnsemble().clone(); 120 this.msa = e.getMultipleAlignment(0); 121 122 this.atoms = msa.getAtomArrays().get(0); 123 this.order = msa.size(); 124 this.repeatCore = msa.getCoreLength(); 125 126 // 50% of the structures aligned (minimum) or all (no gaps) 127 if (symmResult.getParams().isGaps()) 128 Rmin = Math.max(order / 2, 2); 129 else 130 Rmin = order; 131 132 maxIter = symmResult.getParams().getOptimizationSteps(); 133 if (maxIter < 1) 134 maxIter = 100 * atoms.length; 135 } 136 137 private void initialize() throws StructureException, RefinerFailedException { 138 139 if (order == 1) 140 throw new RefinerFailedException( 141 "Non-symmetric seed alignment: order = 1"); 142 if (repeatCore < 1) 143 throw new RefinerFailedException( 144 "Seed alignment too short: repeat core length < 1"); 145 146 // Initialize the history variables 147 timeHistory = new ArrayList<>(); 148 lengthHistory = new ArrayList<>(); 149 rmsdHistory = new ArrayList<>(); 150 mcScoreHistory = new ArrayList<>(); 151 tmScoreHistory = new ArrayList<>(); 152 153 C = 20 * order; 154 155 // Initialize alignment variables 156 block = msa.getBlock(0).getAlignRes(); 157 freePool = new ArrayList<>(); 158 length = block.get(0).size(); 159 160 // Store the residues aligned in the block 161 List<Integer> aligned = new ArrayList<>(); 162 for (int su = 0; su < order; su++) 163 aligned.addAll(block.get(su)); 164 165 // Add any residue not aligned to the free pool 166 for (int i = 0; i < atoms.length; i++) { 167 if (!aligned.contains(i)) 168 freePool.add(i); 169 } 170 checkGaps(); 171 172 // Set the MC score of the initial state (seed alignment) 173 updateMultipleAlignment(); 174 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 175 dCutoff); 176 } 177 178 /** 179 * Optimization method based in a Monte-Carlo approach. Starting from the 180 * refined alignment uses 4 types of moves: 181 * <p> 182 * <ul> 183 * <li>1- Shift Row: if there are enough freePool residues available. 184 * <li>2- Expand Block: add another alignment column. 185 * <li>3- Shrink Block: move a block column to the freePool. 186 * <li>4- Insert gap: insert a gap in a position of the alignment. 187 * </ul> 188 * @throws StructureException 189 * @throws RefinerFailedException 190 * if the alignment is not symmetric or too short. 191 */ 192 public MultipleAlignment optimize() throws StructureException, 193 RefinerFailedException { 194 195 initialize(); 196 197 // Save the optimal alignment 198 List<List<Integer>> optBlock = new ArrayList<>(); 199 List<Integer> optFreePool = new ArrayList<>(); 200 optFreePool.addAll(freePool); 201 for (int k = 0; k < order; k++) { 202 List<Integer> b = new ArrayList<>(); 203 b.addAll(block.get(k)); 204 optBlock.add(b); 205 } 206 double optScore = mcScore; 207 208 int conv = 0; // Number of steps without an alignment improvement 209 int i = 1; 210 int stepsToConverge = Math.max(maxIter / 50, 1000); 211 long initialTime = System.nanoTime()/1000000; 212 213 while (i < maxIter && conv < stepsToConverge) { 214 215 // Save the state of the system 216 List<List<Integer>> lastBlock = new ArrayList<>(); 217 List<Integer> lastFreePool = new ArrayList<>(); 218 lastFreePool.addAll(freePool); 219 for (int k = 0; k < order; k++) { 220 List<Integer> b = new ArrayList<>(); 221 b.addAll(block.get(k)); 222 lastBlock.add(b); 223 } 224 double lastScore = mcScore; 225 int lastRepeatCore = repeatCore; 226 227 boolean moved = false; 228 229 while (!moved) { 230 // Randomly select one of the steps to modify the alignment. 231 // Because of biased moves, the probabilities are not the same 232 double move = rnd.nextDouble(); 233 if (move < 0.4) { 234 moved = shiftRow(); 235 logger.debug("did shift"); 236 } else if (move < 0.7) { 237 moved = expandBlock(); 238 logger.debug("did expand"); 239 } else if (move < 0.85) { 240 moved = shrinkBlock(); 241 logger.debug("did shrink"); 242 } else { 243 moved = insertGap(); 244 logger.debug("did insert gap"); 245 } 246 } 247 248 // Get the properties of the new alignment 249 updateMultipleAlignment(); 250 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 251 dCutoff); 252 253 // Calculate change in the optimization Score 254 double AS = mcScore - lastScore; 255 double prob = 1.0; 256 257 if (AS < 0) { 258 259 // Probability of accepting bad move 260 prob = probabilityFunction(AS, i, maxIter); 261 double p = rnd.nextDouble(); 262 263 // Reject the move 264 if (p > prob) { 265 block = lastBlock; 266 freePool = lastFreePool; 267 length = block.get(0).size(); 268 repeatCore = lastRepeatCore; 269 mcScore = lastScore; 270 conv++; // no change in score if rejected 271 272 } else 273 conv = 0; // if accepted 274 275 } else 276 conv = 0; // if positive change 277 278 logger.debug(i + ": --prob: " + prob + ", --score: " + AS 279 + ", --conv: " + conv); 280 281 // Store as the optimal alignment if better 282 if (mcScore > optScore) { 283 optBlock = new ArrayList<>(); 284 optFreePool = new ArrayList<>(); 285 optFreePool.addAll(freePool); 286 for (int k = 0; k < order; k++) { 287 List<Integer> b = new ArrayList<>(); 288 b.addAll(block.get(k)); 289 optBlock.add(b); 290 } 291 optScore = mcScore; 292 } 293 294 if (history) { 295 if (i % saveStep == 1) { 296 // Get the correct superposition again 297 updateMultipleAlignment(); 298 299 timeHistory.add(System.nanoTime()/1000000 - initialTime); 300 lengthHistory.add(length); 301 rmsdHistory.add(msa.getScore(MultipleAlignmentScorer.RMSD)); 302 tmScoreHistory.add(msa 303 .getScore(MultipleAlignmentScorer.AVGTM_SCORE)); 304 mcScoreHistory.add(mcScore); 305 } 306 } 307 308 i++; 309 } 310 311 // Use the optimal alignment of the trajectory 312 block = optBlock; 313 freePool = optFreePool; 314 mcScore = optScore; 315 316 // Superimpose and calculate final scores 317 updateMultipleAlignment(); 318 msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore); 319 320 // Save the history to the results folder of the symmetry project 321 if (history) { 322 try { 323 saveHistory(pathToHistory); 324 } catch (Exception e) { 325 logger.warn("Could not save history file: " + e.getMessage()); 326 } 327 } 328 329 return msa; 330 } 331 332 /** 333 * This method translates the internal data structures to a 334 * MultipleAlignment of the repeats in order to use the methods to score 335 * MultipleAlignments. 336 * 337 * @throws StructureException 338 * @throws RefinerFailedException 339 */ 340 private void updateMultipleAlignment() throws StructureException, 341 RefinerFailedException { 342 343 msa.clear(); 344 345 // Override the alignment with the new information 346 Block b = msa.getBlock(0); 347 b.setAlignRes(block); 348 repeatCore = b.getCoreLength(); 349 if (repeatCore < 1) 350 throw new RefinerFailedException( 351 "Optimization converged to length 0"); 352 353 SymmetryTools.updateSymmetryTransformation(axes, msa); 354 } 355 356 /** 357 * Method that loops through all the alignment columns and checks that there 358 * are no more gaps than the maximum allowed: Rmin. 359 * <p> 360 * There must be at least Rmin residues different than null in every 361 * alignment column. In case there is a column with more gaps than allowed 362 * it will be shrinked (moved to freePool). 363 * 364 * @return true if any columns has been shrinked and false otherwise 365 */ 366 private boolean checkGaps() { 367 368 List<Integer> shrinkColumns = new ArrayList<>(); 369 // Loop for each column 370 for (int res = 0; res < length; res++) { 371 int gapCount = 0; 372 // Loop for each repeat and count the gaps 373 for (int su = 0; su < order; su++) { 374 if (block.get(su).get(res) == null) 375 gapCount++; 376 } 377 if ((order - gapCount) < Rmin) { 378 // Add the column to the shrink list 379 shrinkColumns.add(res); 380 } 381 } 382 383 // Shrink the columns that have more gaps than allowed 384 for (int col = shrinkColumns.size() - 1; col >= 0; col--) { 385 for (int su = 0; su < order; su++) { 386 Integer residue = block.get(su).get(shrinkColumns.get(col)); 387 block.get(su).remove((int) shrinkColumns.get(col)); 388 if (residue != null) 389 freePool.add(residue); 390 Collections.sort(freePool); 391 } 392 length--; 393 } 394 395 if (shrinkColumns.size() != 0) 396 return true; 397 else 398 return false; 399 } 400 401 /** 402 * Insert a gap in one of the repeats into selected position (by higher 403 * distances) in the alignment. Calculates the average residue distance to 404 * make the choice. A gap is a null in the block. 405 * 406 * @throws StructureException 407 * @throws RefinerFailedException 408 */ 409 private boolean insertGap() throws StructureException, 410 RefinerFailedException { 411 412 // Let gaps only if the repeat is larger than the minimum length 413 if (repeatCore <= Lmin) 414 return false; 415 416 // Select residue by maximum distance 417 updateMultipleAlignment(); 418 Matrix residueDistances = MultipleAlignmentTools 419 .getAverageResidueDistances(msa); 420 421 double maxDist = Double.MIN_VALUE; 422 int su = 0; 423 int res = 0; 424 for (int col = 0; col < length; col++) { 425 for (int s = 0; s < order; s++) { 426 if (residueDistances.get(s, col) != -1) { 427 if (residueDistances.get(s, col) > maxDist) { 428 // geometric distribution 429 if (rnd.nextDouble() > 0.5) { 430 su = s; 431 res = col; 432 maxDist = residueDistances.get(s, col); 433 } 434 } 435 } 436 } 437 } 438 439 // Insert the gap at the position 440 Integer residueL = block.get(su).get(res); 441 if (residueL != null) { 442 freePool.add(residueL); 443 Collections.sort(freePool); 444 } else 445 return false; // If there was a gap already in the position. 446 447 block.get(su).set(res, null); 448 checkGaps(); 449 return true; 450 } 451 452 /** 453 * Move all the block residues of one repeat one position to the left or 454 * right and move the corresponding boundary residues from the freePool to 455 * the block, and viceversa. 456 * <p> 457 * The boundaries are determined by any irregularity (either a gap or a 458 * discontinuity in the alignment. 459 */ 460 private boolean shiftRow() { 461 462 int su = rnd.nextInt(order); // Select the repeat 463 int rl = rnd.nextInt(2); // Select between moving right (0) or left (1) 464 int res = rnd.nextInt(length); // Residue as a pivot 465 466 // When the pivot residue is null try to add a residue from the freePool 467 if (block.get(su).get(res) == null) { 468 469 int right = res; 470 int left = res; 471 // Find the boundary to the right abd left 472 while (block.get(su).get(right) == null && right < length - 1) { 473 right++; 474 } 475 while (block.get(su).get(left) == null && left > 0) { 476 left--; 477 } 478 479 // If they both are null the whole block is null 480 if (block.get(su).get(left) == null 481 && block.get(su).get(right) == null) { 482 return false; 483 } else if (block.get(su).get(left) == null) { 484 // Choose the sequentially previous residue of the known one 485 Integer residue = block.get(su).get(right) - 1; 486 if (freePool.contains(residue)) { 487 block.get(su).set(res, residue); 488 freePool.remove(residue); 489 } else 490 return false; 491 } else if (block.get(su).get(right) == null) { 492 // Choose the sequentially next residue of the known one 493 Integer residue = block.get(su).get(left) + 1; 494 if (freePool.contains(residue)) { 495 block.get(su).set(res, residue); 496 freePool.remove(residue); 497 } else 498 return false; 499 } else { 500 // If boundaries are consecutive swap null and position (R or L) 501 if (block.get(su).get(right) == block.get(su).get(left) + 1) { 502 switch (rl) { 503 case 0: // to the right 504 block.get(su).set(right - 1, block.get(su).get(right)); 505 block.get(su).set(right, null); 506 break; 507 case 1: // to the left 508 block.get(su).set(left + 1, block.get(su).get(left)); 509 block.get(su).set(left, null); 510 break; 511 } 512 } else { 513 // Choose randomly a residue in between left and right to 514 // add 515 Integer residue = rnd.nextInt(block.get(su).get(right) 516 - block.get(su).get(left) - 1) 517 + block.get(su).get(left) + 1; 518 519 if (freePool.contains(residue)) { 520 block.get(su).set(res, residue); 521 freePool.remove(residue); 522 } 523 } 524 } 525 return true; 526 } 527 528 // When the residue is different than null 529 switch (rl) { 530 case 0: // Move to the right 531 532 int leftBoundary = res - 1; 533 int leftPrevRes = res; 534 while (true) { 535 if (leftBoundary < 0) 536 break; 537 else { 538 if (block.get(su).get(leftBoundary) == null) { 539 break; // gap 540 } else if (block.get(su).get(leftPrevRes) > block.get(su) 541 .get(leftBoundary) + 1) { 542 break; // discontinuity 543 } 544 } 545 leftPrevRes = leftBoundary; 546 leftBoundary--; 547 } 548 leftBoundary++; 549 550 int rightBoundary = res + 1; 551 int rightPrevRes = res; 552 while (true) { 553 if (rightBoundary == length) 554 break; 555 else { 556 if (block.get(su).get(rightBoundary) == null) { 557 break; // gap 558 } else if (block.get(su).get(rightPrevRes) + 1 < block.get( 559 su).get(rightBoundary)) { 560 break; // discontinuity 561 } 562 } 563 rightPrevRes = rightBoundary; 564 rightBoundary++; 565 } 566 rightBoundary--; 567 568 // Residues at the boundary 569 Integer residueR0 = block.get(su).get(rightBoundary); 570 Integer residueL0 = block.get(su).get(leftBoundary); 571 572 // Remove residue at the right of the block and add to the freePool 573 block.get(su).remove(rightBoundary); 574 if (residueR0 != null) { 575 freePool.add(residueR0); 576 Collections.sort(freePool); 577 } 578 579 // Add the residue at the left of the block 580 residueL0 -= 1; // cannot be null, throw exception if it is 581 if (freePool.contains(residueL0)) { 582 block.get(su).add(leftBoundary, residueL0); 583 freePool.remove(residueL0); 584 } else { 585 block.get(su).add(leftBoundary, null); 586 } 587 break; 588 589 case 1: // Move to the left 590 591 int leftBoundary1 = res - 1; 592 int leftPrevRes1 = res; 593 while (true) { 594 if (leftBoundary1 < 0) 595 break; 596 else { 597 if (block.get(su).get(leftBoundary1) == null) { 598 break; // gap 599 } else if (block.get(su).get(leftPrevRes1) > block.get(su) 600 .get(leftBoundary1) + 1) { 601 break; // discontinuity 602 } 603 } 604 leftPrevRes1 = leftBoundary1; 605 leftBoundary1--; 606 } 607 leftBoundary1++; 608 609 int rightBoundary1 = res + 1; 610 int rightPrevRes1 = res; 611 while (true) { 612 if (rightBoundary1 == length) 613 break; 614 else { 615 if (block.get(su).get(rightBoundary1) == null) { 616 break; // gap 617 } else if (block.get(su).get(rightPrevRes1) + 1 < block 618 .get(su).get(rightBoundary1)) { 619 break; // discontinuity 620 } 621 } 622 rightPrevRes1 = rightBoundary1; 623 rightBoundary1++; 624 } 625 rightBoundary1--; 626 627 // Residues at the boundary 628 Integer residueR1 = block.get(su).get(rightBoundary1); 629 Integer residueL1 = block.get(su).get(leftBoundary1); 630 631 // Add the residue at the right of the block 632 residueR1 += 1; // cannot be null 633 if (freePool.contains(residueR1)) { 634 if (rightBoundary1 == length - 1) 635 block.get(su).add(residueR1); 636 else 637 block.get(su).add(rightBoundary1 + 1, residueR1); 638 freePool.remove(residueR1); 639 } else { 640 block.get(su).add(rightBoundary1 + 1, null); 641 } 642 643 // Remove the residue at the left of the block 644 block.get(su).remove(leftBoundary1); 645 freePool.add(residueL1); 646 Collections.sort(freePool); 647 break; 648 } 649 checkGaps(); 650 return true; 651 } 652 653 /** 654 * It extends the alignment one position to the right or to the left of a 655 * randomly selected position by moving the consecutive residues of each 656 * repeat (if present) from the freePool to the block. 657 * <p> 658 * If there are not enough residues in the freePool it introduces gaps. 659 */ 660 private boolean expandBlock() { 661 662 boolean moved = false; 663 664 int rl = rnd.nextInt(2); // Select between right (0) or left (1) 665 int res = rnd.nextInt(length); // Residue as a pivot 666 667 switch (rl) { 668 case 0: 669 670 int rightBoundary = res; 671 int[] previousPos = new int[order]; 672 for (int su = 0; su < order; su++) 673 previousPos[su] = -1; 674 675 // Search a position to the right that has at minimum Rmin 676 while (length - 1 > rightBoundary) { 677 int noncontinuous = 0; 678 for (int su = 0; su < order; su++) { 679 if (block.get(su).get(rightBoundary) == null) { 680 continue; 681 } else if (previousPos[su] == -1) { 682 previousPos[su] = block.get(su).get(rightBoundary); 683 } else if (block.get(su).get(rightBoundary) > previousPos[su] + 1) { 684 noncontinuous++; 685 } 686 } 687 if (noncontinuous < Rmin) 688 rightBoundary++; 689 else 690 break; 691 } 692 if (rightBoundary > 0) 693 rightBoundary--; 694 695 // Expand the block with the residues at the repeat boundaries 696 for (int su = 0; su < order; su++) { 697 Integer residueR = block.get(su).get(rightBoundary); 698 if (residueR == null) { 699 if (rightBoundary == length - 1) 700 block.get(su).add(null); 701 else 702 block.get(su).add(rightBoundary + 1, null); 703 } else if (freePool.contains(residueR + 1)) { 704 Integer residueAdd = residueR + 1; 705 if (rightBoundary == length - 1) { 706 block.get(su).add(residueAdd); 707 } else 708 block.get(su).add(rightBoundary + 1, residueAdd); 709 freePool.remove(residueAdd); 710 } else { 711 if (rightBoundary == length - 1) 712 block.get(su).add(null); 713 else 714 block.get(su).add(rightBoundary + 1, null); 715 } 716 } 717 length++; 718 moved = true; 719 break; 720 721 case 1: 722 723 int leftBoundary = res; 724 int[] nextPos = new int[order]; 725 for (int su = 0; su < order; su++) 726 nextPos[su] = -1; 727 728 // Search a position to the right that has at minimum Rmin 729 while (leftBoundary > 0) { 730 int noncontinuous = 0; 731 for (int su = 0; su < order; su++) { 732 if (block.get(su).get(leftBoundary) == null) { 733 continue; 734 } else if (nextPos[su] == -1) { 735 nextPos[su] = block.get(su).get(leftBoundary); 736 } else if (block.get(su).get(leftBoundary) < nextPos[su] - 1) { 737 noncontinuous++; 738 } 739 } 740 if (noncontinuous < Rmin) 741 leftBoundary--; 742 else 743 break; 744 } 745 746 // Expand the block with the residues at the repeat boundaries 747 for (int su = 0; su < order; su++) { 748 Integer residueL = block.get(su).get(leftBoundary); 749 if (residueL == null) { 750 block.get(su).add(leftBoundary, null); 751 } else if (freePool.contains(residueL - 1)) { 752 Integer residueAdd = residueL - 1; 753 block.get(su).add(leftBoundary, residueAdd); 754 freePool.remove(residueAdd); 755 } else { 756 block.get(su).add(leftBoundary, null); 757 } 758 } 759 length++; 760 moved = true; 761 break; 762 } 763 if (moved) 764 return !checkGaps(); 765 return moved; 766 } 767 768 /** 769 * Deletes an alignment column at a randomly selected position. 770 * 771 * @throws StructureException 772 * @throws RefinerFailedException 773 */ 774 private boolean shrinkBlock() throws StructureException, 775 RefinerFailedException { 776 777 // Let shrink moves only if the repeat is larger enough 778 if (repeatCore <= Lmin) 779 return false; 780 781 // Select column by maximum distance 782 updateMultipleAlignment(); 783 Matrix residueDistances = MultipleAlignmentTools 784 .getAverageResidueDistances(msa); 785 786 double maxDist = Double.MIN_VALUE; 787 double[] colDistances = new double[length]; 788 int res = 0; 789 for (int col = 0; col < length; col++) { 790 int normalize = 0; 791 for (int s = 0; s < order; s++) { 792 if (residueDistances.get(s, col) != -1) { 793 colDistances[col] += residueDistances.get(s, col); 794 normalize++; 795 } 796 } 797 colDistances[col] /= normalize; 798 if (colDistances[col] > maxDist) { 799 // geometric distribution 800 if (rnd.nextDouble() > 0.5) { 801 maxDist = colDistances[col]; 802 res = col; 803 } 804 } 805 } 806 807 for (int su = 0; su < order; su++) { 808 Integer residue = block.get(su).get(res); 809 block.get(su).remove(res); 810 if (residue != null) 811 freePool.add(residue); 812 Collections.sort(freePool); 813 } 814 length--; 815 checkGaps(); 816 return true; 817 } 818 819 /** 820 * Calculates the probability of accepting a bad move given the iteration 821 * step and the score change. 822 * <p> 823 * Function: p=(C-AS)/(C*sqrt(step)) Added a normalization factor so that 824 * the probability approaches 0 when the maxIter is reached. 825 */ 826 private double probabilityFunction(double AS, int m, int maxIter) { 827 828 double prob = (C + AS) / (C * Math.sqrt(m)); 829 double norm = (1 - (m * 1.0) / maxIter); // Normalization factor 830 return Math.min(Math.max(prob * norm, 0.0), 1.0); 831 } 832 833 /** 834 * Save the evolution of the optimization process as a csv file. 835 */ 836 private void saveHistory(String folder) throws IOException { 837 838 String name = msa.getStructureIdentifier(0).getIdentifier(); 839 FileWriter writer = new FileWriter(folder + name 840 + "-symm_opt.csv"); 841 writer.append("Step,Time,RepeatLength,RMSD,TMscore,MCscore\n"); 842 843 for (int i = 0; i < lengthHistory.size(); i++) { 844 writer.append(i * saveStep + ","); 845 writer.append(timeHistory.get(i) + ","); 846 writer.append(lengthHistory.get(i) + ","); 847 writer.append(rmsdHistory.get(i) + ","); 848 writer.append(tmScoreHistory.get(i) + ","); 849 writer.append(mcScoreHistory.get(i) + "\n"); 850 } 851 852 writer.flush(); 853 writer.close(); 854 } 855 856}