001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.mc; 022 023import java.io.FileWriter; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collections; 027import java.util.List; 028import java.util.Random; 029import java.util.SortedSet; 030import java.util.TreeSet; 031import java.util.concurrent.Callable; 032 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.StructureException; 035import org.biojava.nbio.structure.align.multiple.Block; 036import org.biojava.nbio.structure.align.multiple.BlockSet; 037import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 038import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 039import org.biojava.nbio.structure.align.multiple.util.CoreSuperimposer; 040import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 041import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentTools; 042import org.biojava.nbio.structure.align.multiple.util.MultipleSuperimposer; 043import org.biojava.nbio.structure.jama.Matrix; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * This class takes a MultipleAlignment seed previously generated and runs a 049 * Monte Carlo optimization in order to improve the overall score and highlight 050 * common structural motifs. 051 * <p> 052 * The seed alignment can be flexible, non-topological or include CP, but this 053 * optimization will not change the number of flexible parts {@link BlockSet}s 054 * or non-topological regions {@link Block}. Thus, the definition of those parts 055 * depend exclusively on the pairwise alignment (or user alignment) used to 056 * generate the seed multiple alignment. 057 * <p> 058 * This class implements Callable, because multiple instances of the 059 * optimization can be run in parallel. 060 * 061 * @author Aleix Lafita 062 * @since 4.1.0 063 * 064 */ 065public class MultipleMcOptimizer implements Callable<MultipleAlignment> { 066 067 private static final Logger logger = LoggerFactory 068 .getLogger(MultipleMcOptimizer.class); 069 070 private Random rnd; 071 private MultipleSuperimposer imposer; 072 073 // Optimization parameters 074 private int Rmin; // number of aligned structures without a gap 075 private int Lmin; // Minimum alignment length of a Block 076 private int convergenceSteps; // Steps without score change before stopping 077 private double C; // Probability function constant 078 079 // Score function parameters - they are defined by the user 080 private double Gopen; // Penalty for opening gap 081 private double Gextend; // Penalty for extending gaps 082 private double dCutoff; // max allowed residue distance 083 084 // Alignment Information 085 private MultipleAlignment msa; // Alignment to optimize 086 private List<SortedSet<Integer>> freePool; // unaligned residues 087 private List<Atom[]> atomArrays; 088 089 // Alignment Properties 090 private int size; // number of structures in the alignment 091 private int blockNr; // the number of Blocks in the alignment 092 private double mcScore; // Optimization score, objective function 093 094 // Variables that store the history of the optimization - slower if on 095 private static final boolean history = false; 096 private static final String pathToHistory = "McOptHistory.csv"; 097 private List<Integer> lengthHistory; 098 private List<Double> rmsdHistory; 099 private List<Double> scoreHistory; 100 101 /** 102 * Constructor. Sets the internal variables from the parameters. To run the 103 * optimization use the call (in a different thread) or optimize methods. 104 * 105 * @param seedAln 106 * MultipleAlignment to be optimized. 107 * @param params 108 * the parameter beam 109 * @param reference 110 * the index of the most similar structure to all others 111 */ 112 public MultipleMcOptimizer(MultipleAlignment seedAln, 113 MultipleMcParameters params, int reference) { 114 115 MultipleAlignmentEnsemble e = seedAln.getEnsemble().clone(); 116 msa = e.getMultipleAlignment(0); 117 atomArrays = msa.getAtomArrays(); 118 size = seedAln.size(); 119 120 rnd = new Random(params.getRandomSeed()); 121 Gopen = params.getGapOpen(); 122 Gextend = params.getGapExtension(); 123 dCutoff = params.getDistanceCutoff(); 124 imposer = new CoreSuperimposer(reference); 125 126 if (params.getConvergenceSteps() == 0) { 127 List<Integer> lens = new ArrayList<>(); 128 for (int i = 0; i < size; i++) 129 lens.add(atomArrays.get(i).length); 130 convergenceSteps = Collections.min(lens) * size; 131 } else 132 convergenceSteps = params.getConvergenceSteps(); 133 134 if (params.getMinAlignedStructures() == 0) { 135 Rmin = Math.max(size / 3, 2); // 33% of the structures aligned 136 } else { 137 Rmin = Math 138 .min(Math.max(params.getMinAlignedStructures(), 2), size); 139 } 140 C = 20 * size; 141 Lmin = params.getMinBlockLen(); 142 143 // Delete all shorter than Lmin blocks, and empty blocksets 144 List<Block> toDelete = new ArrayList<>(); 145 List<BlockSet> emptyBs = new ArrayList<>(); 146 147 for (Block b : msa.getBlocks()) { 148 if (b.getCoreLength() < Lmin) { 149 toDelete.add(b); 150 logger.warn("Deleting a Block: coreLength < Lmin."); 151 } 152 } 153 for (Block b : toDelete) { 154 for (BlockSet bs : msa.getBlockSets()) { 155 bs.getBlocks().remove(b); 156 if (bs.getBlocks().size() == 0) 157 emptyBs.add(bs); 158 } 159 } 160 for (BlockSet bs : emptyBs) { 161 msa.getBlockSets().remove(bs); 162 } 163 164 blockNr = msa.getBlocks().size(); 165 if (blockNr < 1) { 166 throw new IllegalArgumentException( 167 "Optimization: empty seed alignment, no Blocks found."); 168 } 169 } 170 171 @Override 172 public MultipleAlignment call() throws Exception { 173 return optimize(); 174 } 175 176 /** 177 * Initialize the freePool and all the variables needed for the 178 * optimization. 179 * 180 * @throws StructureException 181 */ 182 private void initialize() throws StructureException { 183 184 // Initialize alignment variables 185 freePool = new ArrayList<>(); 186 List<List<Integer>> aligned = new ArrayList<>(); 187 188 // Generate freePool residues from the ones not aligned 189 for (int i = 0; i < size; i++) { 190 List<Integer> residues = new ArrayList<>(); 191 for (BlockSet bs : msa.getBlockSets()) { 192 for (Block b : bs.getBlocks()) { 193 for (int l = 0; l < b.length(); l++) { 194 Integer residue = b.getAlignRes().get(i).get(l); 195 if (residue != null) 196 residues.add(residue); 197 } 198 } 199 } 200 aligned.add(residues); 201 freePool.add(new TreeSet<Integer>()); 202 } 203 204 // Add any residue not aligned to the free pool for every structure 205 for (int i = 0; i < size; i++) { 206 for (int k = 0; k < atomArrays.get(i).length; k++) { 207 if (!aligned.get(i).contains(k)) 208 freePool.get(i).add(k); 209 } 210 } 211 212 // Set the superposition and score for the seed alignment 213 checkGaps(); 214 msa.clear(); 215 imposer.superimpose(msa); 216 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 217 dCutoff); 218 219 // Initialize the history variables 220 if (history) { 221 lengthHistory = new ArrayList<>(); 222 rmsdHistory = new ArrayList<>(); 223 scoreHistory = new ArrayList<>(); 224 } 225 } 226 227 /** 228 * Optimization method based in a Monte-Carlo approach. Starting from the 229 * refined alignment uses 4 types of moves: 230 * <p> 231 * <ul> 232 * <li>Shift Row: if there are enough freePool residues available. 233 * <li>Expand Block: add another alignment column. 234 * <li>Shrink Block: move a block column to the freePool. 235 * <li>Insert gap: insert a gap in a random position of the alignment. 236 * </ul> 237 * 238 */ 239 public MultipleAlignment optimize() throws StructureException { 240 241 initialize(); 242 243 int conv = 0; // Number of steps without an alignment improvement 244 int i = 1; 245 int maxIter = convergenceSteps * 100; 246 247 while (i < maxIter && conv < convergenceSteps) { 248 249 // Save the state of the system 250 MultipleAlignment lastMSA = msa.clone(); 251 List<SortedSet<Integer>> lastFreePool = new ArrayList<>(); 252 for (int k = 0; k < size; k++) { 253 SortedSet<Integer> p = new TreeSet<>(); 254 for (Integer l : freePool.get(k)) 255 p.add(l); 256 lastFreePool.add(p); 257 } 258 double lastScore = mcScore; 259 260 boolean moved = false; 261 262 while (!moved) { 263 // Randomly select one of the steps to modify the alignment 264 double move = rnd.nextDouble(); 265 if (move < 0.4) { 266 moved = shiftRow(); 267 logger.debug("did shift"); 268 } else if (move < 0.7) { 269 moved = expandBlock(); 270 logger.debug("did expand"); 271 } else if (move < 0.85) { 272 moved = shrinkBlock(); 273 logger.debug("did shrink"); 274 } else { 275 moved = insertGap(); 276 logger.debug("did insert gap"); 277 } 278 } 279 280 // Get the score of the new alignment 281 msa.clear(); 282 imposer.superimpose(msa); 283 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 284 dCutoff); 285 286 double AS = mcScore - lastScore; 287 double prob = 1.0; 288 289 if (AS < 0) { 290 291 // Probability of accepting the move 292 prob = probabilityFunction(AS, i, maxIter); 293 double p = rnd.nextDouble(); 294 // Reject the move 295 if (p > prob) { 296 msa = lastMSA; 297 freePool = lastFreePool; 298 mcScore = lastScore; 299 conv++; 300 301 } else 302 conv = 0; 303 304 } else 305 conv = 0; 306 307 logger.debug("Step: " + i + ": --prob: " + prob 308 + ", --score change: " + AS + ", --conv: " + conv); 309 310 if (history) { 311 if (i % 100 == 1) { 312 lengthHistory.add(msa.length()); 313 rmsdHistory.add(MultipleAlignmentScorer.getRMSD(msa)); 314 scoreHistory.add(mcScore); 315 } 316 } 317 318 i++; 319 } 320 321 // Return Multiple Alignment 322 imposer.superimpose(msa); 323 MultipleAlignmentScorer.calculateScores(msa); 324 msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore); 325 326 if (history) { 327 try { 328 saveHistory(pathToHistory); 329 } catch (Exception e) { 330 logger.warn("Could not save history file: " + e.getMessage()); 331 } 332 } 333 334 return msa; 335 } 336 337 /** 338 * Method that loops through all the alignment columns and checks that there 339 * are no more gaps than the maximum allowed, Rmin. 340 * <p> 341 * There must be at least Rmin residues different than null in every 342 * alignment column. In case there is a column with more gaps it will be 343 * shrinked (moved to freePool). 344 * 345 * @return true if any columns has been shrinked and false otherwise 346 */ 347 private boolean checkGaps() { 348 349 boolean shrinkedAny = false; 350 351 List<List<Integer>> shrinkColumns = new ArrayList<>(); 352 // Loop for each Block 353 for (Block b : msa.getBlocks()) { 354 List<Integer> shrinkCol = new ArrayList<>(); 355 // Loop for each column in the Block 356 for (int res = 0; res < b.length(); res++) { 357 int gapCount = 0; 358 // count the gaps in the column 359 for (int su = 0; su < size; su++) { 360 if (b.getAlignRes().get(su).get(res) == null) 361 gapCount++; 362 } 363 if ((size - gapCount) < Rmin) { 364 // Add the column to the shrink list 365 shrinkCol.add(res); 366 } 367 } 368 shrinkColumns.add(shrinkCol); 369 } 370 // Shrink columns that have more gaps than allowed 371 for (int b = 0; b < blockNr; b++) { 372 for (int col = shrinkColumns.get(b).size() - 1; col >= 0; col--) { 373 for (int str = 0; str < size; str++) { 374 Block bk = msa.getBlock(b); 375 Integer residue = bk.getAlignRes().get(str) 376 .get(shrinkColumns.get(b).get(col)); 377 bk.getAlignRes().get(str) 378 .remove((int) shrinkColumns.get(b).get(col)); 379 if (residue != null) { 380 freePool.get(str).add(residue); 381 } 382 } 383 shrinkedAny = true; 384 } 385 } 386 return shrinkedAny; 387 } 388 389 /** 390 * Insert a gap in one of the structures in a random position of the 391 * alignment. 392 * <p> 393 * The distribution is not uniform, because positions with higher average 394 * distance to their aligned neighbors are more likely to be gapped. 395 * <p> 396 * A gap is a null in the Block position. 397 * 398 * @return true if the alignment has been changed, false otherwise. 399 */ 400 private boolean insertGap() { 401 402 // Select residue by maximum distance 403 Matrix residueDistances = MultipleAlignmentTools 404 .getAverageResidueDistances(msa); 405 double maxDist = Double.MIN_VALUE; 406 int structure = 0; 407 int block = 0; 408 int position = 0; 409 int column = 0; 410 for (int b = 0; b < blockNr; b++) { 411 for (int col = 0; col < msa.getBlock(b).length(); col++) { 412 for (int str = 0; str < size; str++) { 413 if (residueDistances.get(str, column) != -1) { 414 if (residueDistances.get(str, column) > maxDist) { 415 // Geometric distribution 416 if (rnd.nextDouble() > 0.5) { 417 structure = str; 418 block = b; 419 position = col; 420 maxDist = residueDistances.get(str, column); 421 } 422 } 423 } 424 } 425 column++; 426 } 427 } 428 Block bk = msa.getBlock(block); 429 if (bk.getCoreLength() <= Lmin) 430 return false; 431 432 // Insert the gap at the position 433 Integer residueL = bk.getAlignRes().get(structure).get(position); 434 if (residueL != null) { 435 freePool.get(structure).add(residueL); 436 } else 437 return false; // If there was a gap already in the position. 438 439 bk.getAlignRes().get(structure).set(position, null); 440 checkGaps(); 441 return true; 442 } 443 444 /** 445 * Move all the block residues of one subunit one position to the left or to 446 * the right and move the corresponding boundary residues from the freePool 447 * to the block. 448 * <p> 449 * The boundaries are determined by any irregularity (either a null or a 450 * discontinuity in the alignment). 451 * 452 * @return true if the alignment has been changed, false otherwise. 453 */ 454 private boolean shiftRow() { 455 456 int str = rnd.nextInt(size); // Select randomly the subunit 457 int rl = rnd.nextInt(2); // Select between moving right (0) or left (1) 458 int bk = rnd.nextInt(blockNr); // Select randomly the Block 459 int res = rnd.nextInt(msa.getBlock(bk).length()); 460 461 Block block = msa.getBlock(bk); 462 if (block.getCoreLength() <= Lmin) 463 return false; 464 465 // When the pivot residue is null try to add a residue from the freePool 466 if (block.getAlignRes().get(str).get(res) == null) { 467 // Residues not null at the right and left of the pivot null residue 468 int rightRes = res; 469 int leftRes = res; 470 // Find the boundary to the right abd left 471 while (block.getAlignRes().get(str).get(rightRes) == null 472 && rightRes < block.length() - 1) { 473 rightRes++; 474 } 475 while (block.getAlignRes().get(str).get(leftRes) == null 476 && leftRes > 0) { 477 leftRes--; 478 } 479 480 // If both are null return because the block is empty 481 if (block.getAlignRes().get(str).get(leftRes) == null 482 && block.getAlignRes().get(str).get(rightRes) == null) { 483 return false; 484 } else if (block.getAlignRes().get(str).get(leftRes) == null) { 485 // Choose the sequentially previous residue of the known one 486 Integer residue = block.getAlignRes().get(str).get(rightRes) - 1; 487 if (freePool.get(str).contains(residue)) { 488 block.getAlignRes().get(str).set(res, residue); 489 freePool.get(str).remove(residue); 490 } else 491 return false; 492 } else if (block.getAlignRes().get(str).get(rightRes) == null) { 493 // Choose the sequentially next residue of the known one 494 Integer residue = block.getAlignRes().get(str).get(leftRes) + 1; 495 if (freePool.get(str).contains(residue)) { 496 block.getAlignRes().get(str).set(res, residue); 497 freePool.get(str).remove(residue); 498 } else 499 return false; 500 } else { 501 // If boundaries are consecutive no residue can be added 502 if (block.getAlignRes().get(str).get(rightRes) == block 503 .getAlignRes().get(str).get(leftRes) + 1) { 504 return false; 505 } else { 506 // Choose randomly a residue in between left and right 507 Integer residue = rnd.nextInt(block.getAlignRes().get(str) 508 .get(rightRes) 509 - block.getAlignRes().get(str).get(leftRes) - 1) 510 + block.getAlignRes().get(str).get(leftRes) + 1; 511 512 if (freePool.get(str).contains(residue)) { 513 block.getAlignRes().get(str).set(res, residue); 514 freePool.get(str).remove(residue); 515 } 516 } 517 } 518 return true; 519 } 520 521 // When residue different than null shift the whole block 522 switch (rl) { 523 case 0: // Move to the right 524 525 // Find the nearest boundary to the left of the pivot 526 int leftBoundary = res - 1; 527 int leftPrevRes = res; 528 while (true) { 529 if (leftBoundary < 0) 530 break; 531 else { 532 if (block.getAlignRes().get(str).get(leftBoundary) == null) 533 break; // Break if there is a gap (this is the boundary) 534 else if (block.getAlignRes().get(str).get(leftPrevRes) > block 535 .getAlignRes().get(str).get(leftBoundary) + 1) 536 break; // Break if there is a discontinuity 537 } 538 leftPrevRes = leftBoundary; 539 leftBoundary--; 540 } 541 leftBoundary++; 542 543 // Find the nearest boundary to the right of the pivot 544 int rightBoundary = res + 1; 545 int rightPrevRes = res; 546 while (true) { 547 if (rightBoundary == block.length()) 548 break; 549 else { 550 if (block.getAlignRes().get(str).get(rightBoundary) == null) 551 break; // Break if there is a gap 552 else if (block.getAlignRes().get(str).get(rightPrevRes) + 1 < block 553 .getAlignRes().get(str).get(rightBoundary)) 554 break; // Discontinuity 555 } 556 rightPrevRes = rightBoundary; 557 rightBoundary++; 558 } 559 rightBoundary--; 560 561 // Residues at the boundary 562 Integer resR0 = block.getAlignRes().get(str).get(rightBoundary); 563 Integer resL0 = block.getAlignRes().get(str).get(leftBoundary); 564 565 // Remove the residue at the right of the block 566 block.getAlignRes().get(str).remove(rightBoundary); 567 if (resR0 != null) 568 freePool.get(str).add(resR0); 569 570 // Add the residue at the left of the block 571 if (resL0 != null) 572 resL0 -= 1; 573 574 if (freePool.get(str).contains(resL0)) { 575 block.getAlignRes().get(str).add(leftBoundary, resL0); 576 freePool.get(str).remove(resL0); 577 } else 578 block.getAlignRes().get(str).add(leftBoundary, null); 579 580 break; 581 582 case 1: // Move to the left 583 584 // Find the nearest boundary to the left of the pivot 585 int leftBoundary1 = res - 1; 586 int leftPrevRes1 = res; 587 while (true) { 588 if (leftBoundary1 < 0) 589 break; 590 else { 591 if (block.getAlignRes().get(str).get(leftBoundary1) == null) 592 break; // Break if there is a gap (this is the boundary) 593 else if (block.getAlignRes().get(str).get(leftPrevRes1) > block 594 .getAlignRes().get(str).get(leftBoundary1) + 1) 595 break; // Break if there is a discontinuity 596 } 597 leftPrevRes1 = leftBoundary1; 598 leftBoundary1--; 599 } 600 leftBoundary1++; 601 602 // Find the nearest boundary to the right of the pivot 603 int rightBoundary1 = res + 1; 604 int rightPrevRes1 = res; 605 while (true) { 606 if (rightBoundary1 == block.length()) 607 break; 608 else { 609 if (block.getAlignRes().get(str).get(rightBoundary1) == null) 610 break; // Break if there is a gap 611 else if (block.getAlignRes().get(str).get(rightPrevRes1) + 1 < block 612 .getAlignRes().get(str).get(rightBoundary1)) 613 break; // Discontinuity 614 } 615 rightPrevRes1 = rightBoundary1; 616 rightBoundary1++; 617 } 618 rightBoundary1--; 619 620 // Residues at the boundary 621 Integer resR1 = block.getAlignRes().get(str).get(rightBoundary1); 622 Integer resL1 = block.getAlignRes().get(str).get(leftBoundary1); 623 624 // Add the residue at the right of the block 625 if (resR1 != null) 626 resR1 += 1; 627 628 if (freePool.get(str).contains(resR1)) { 629 if (rightBoundary1 == block.length() - 1) { 630 block.getAlignRes().get(str).add(resR1); 631 } else 632 block.getAlignRes().get(str).add(rightBoundary1 + 1, resR1); 633 634 freePool.get(str).remove(resR1); 635 } else 636 block.getAlignRes().get(str).add(rightBoundary1 + 1, null); 637 638 // Remove the residue at the left of the block 639 block.getAlignRes().get(str).remove(leftBoundary1); 640 if (resL1 != null) 641 freePool.get(str).add(resL1); 642 643 break; 644 } 645 checkGaps(); 646 return true; 647 } 648 649 /** 650 * It extends the alignment one position to the right or to the left of a 651 * randomly selected position by moving the consecutive residues of each 652 * subunit (if enough) from the freePool to the block. 653 * <p> 654 * If there are not enough residues in the freePool it introduces gaps. 655 */ 656 private boolean expandBlock() { 657 658 int rl = rnd.nextInt(2); // Select expanding right (0) or left (1) 659 int bk = rnd.nextInt(blockNr); // Select randomly the Block 660 int res = rnd.nextInt(msa.getBlock(bk).length()); 661 662 Block block = msa.getBlock(bk); 663 int gaps = 0; // store the number of gaps in the expansion 664 665 switch (rl) { 666 case 0: 667 668 int rightBound = res; 669 int[] previousPos = new int[size]; 670 for (int str = 0; str < size; str++) 671 previousPos[str] = -1; 672 673 // Search t the right for >Rmin non consecutive residues 674 while (block.length() - 1 > rightBound) { 675 int noncontinuous = 0; 676 for (int str = 0; str < size; str++) { 677 if (block.getAlignRes().get(str).get(rightBound) == null) { 678 continue; 679 } else if (previousPos[str] == -1) { 680 previousPos[str] = block.getAlignRes().get(str) 681 .get(rightBound); 682 } else if (block.getAlignRes().get(str).get(rightBound) > previousPos[str] + 1) { 683 noncontinuous++; 684 } 685 } 686 if (noncontinuous < Rmin) 687 rightBound++; 688 else 689 break; 690 } 691 if (rightBound > 0) 692 rightBound--; 693 694 // Expand the block with the residues at the subunit boundaries 695 for (int str = 0; str < size; str++) { 696 Integer residueR = block.getAlignRes().get(str).get(rightBound); 697 if (residueR == null) { 698 if (rightBound == block.length() - 1) { 699 block.getAlignRes().get(str).add(null); 700 } else 701 block.getAlignRes().get(str).add(rightBound + 1, null); 702 gaps++; 703 } else if (freePool.get(str).contains(residueR + 1)) { 704 Integer residueAdd = residueR + 1; 705 if (rightBound == block.length() - 1) { 706 block.getAlignRes().get(str).add(residueAdd); 707 } else { 708 block.getAlignRes().get(str) 709 .add(rightBound + 1, residueAdd); 710 } 711 freePool.get(str).remove(residueAdd); 712 } else { 713 if (rightBound == block.length() - 1) 714 block.getAlignRes().get(str).add(null); 715 else 716 block.getAlignRes().get(str).add(rightBound + 1, null); 717 gaps++; 718 } 719 } 720 break; 721 722 case 1: 723 724 int leftBoundary = res; 725 int[] nextPos = new int[size]; 726 for (int str = 0; str < size; str++) 727 nextPos[str] = -1; 728 729 // Search position to the right with >Rmin non consecutive residues 730 while (leftBoundary > 0) { 731 int noncontinuous = 0; 732 for (int str = 0; str < size; str++) { 733 if (block.getAlignRes().get(str).get(leftBoundary) == null) 734 continue; 735 else if (nextPos[str] == -1) { 736 nextPos[str] = block.getAlignRes().get(str) 737 .get(leftBoundary); 738 } else if (block.getAlignRes().get(str).get(leftBoundary) < nextPos[str] - 1) { 739 noncontinuous++; 740 } 741 } 742 if (noncontinuous < Rmin) 743 leftBoundary--; 744 else 745 break; 746 } 747 748 // Expand the block with the residues at the subunit boundaries 749 for (int str = 0; str < size; str++) { 750 Integer residueL = block.getAlignRes().get(str) 751 .get(leftBoundary); 752 if (residueL == null) { 753 block.getAlignRes().get(str).add(leftBoundary, null); 754 gaps++; 755 } else if (freePool.get(str).contains(residueL - 1)) { 756 Integer residueAdd = residueL - 1; 757 block.getAlignRes().get(str).add(leftBoundary, residueAdd); 758 freePool.get(str).remove(residueAdd); 759 } else { 760 block.getAlignRes().get(str).add(leftBoundary, null); 761 gaps++; 762 } 763 } 764 break; 765 } 766 if (size - gaps >= Rmin) 767 return true; 768 else 769 checkGaps(); 770 return false; 771 } 772 773 /** 774 * Deletes an alignment column at a randomly selected position. 775 */ 776 private boolean shrinkBlock() { 777 778 // Select column by maximum distance 779 Matrix residueDistances = MultipleAlignmentTools 780 .getAverageResidueDistances(msa); 781 double[] colDistances = new double[msa.length()]; 782 double maxDist = Double.MIN_VALUE; 783 int position = 0; 784 int block = 0; 785 int column = 0; 786 for (int b = 0; b < msa.getBlocks().size(); b++) { 787 for (int col = 0; col < msa.getBlock(b).length(); col++) { 788 int normalize = 0; 789 for (int s = 0; s < size; s++) { 790 if (residueDistances.get(s, column) != -1) { 791 colDistances[column] += residueDistances.get(s, column); 792 normalize++; 793 } 794 } 795 colDistances[column] /= normalize; 796 if (colDistances[column] > maxDist) { 797 if (rnd.nextDouble() > 0.5) { 798 maxDist = colDistances[column]; 799 position = col; 800 block = b; 801 } 802 } 803 column++; 804 } 805 } 806 Block currentBlock = msa.getBlock(block); 807 if (currentBlock.getCoreLength() <= Lmin) 808 return false; 809 810 for (int str = 0; str < size; str++) { 811 Integer residue = currentBlock.getAlignRes().get(str).get(position); 812 813 currentBlock.getAlignRes().get(str).remove(position); 814 if (residue != null) 815 freePool.get(str).add(residue); 816 } 817 return true; 818 } 819 820 /** 821 * Calculates the probability of accepting a bad move given the iteration 822 * step and the score change. 823 * <p> 824 * Function: p=(C-AS)/(m*C) , slightly different from the CEMC algorithm. 825 * <p> 826 * Added a normalization factor so that the probability approaches 0 as the 827 * final of the optimization gets closer. 828 */ 829 private double probabilityFunction(double AS, int m, int maxIter) { 830 831 double prob = (C + AS) / (m * C); 832 double norm = (1 - (m * 1.0) / maxIter); // Normalization factor 833 return Math.min(Math.max(prob * norm, 0.0), 1.0); 834 } 835 836 /** 837 * Save the evolution of the optimization process as a csv file. 838 */ 839 private void saveHistory(String filePath) throws IOException { 840 841 FileWriter writer = new FileWriter(filePath); 842 writer.append("Step,Length,RMSD,Score\n"); 843 844 for (int i = 0; i < lengthHistory.size(); i++) { 845 writer.append(String.valueOf(i * 100)); 846 writer.append("," + lengthHistory.get(i)); 847 writer.append("," + rmsdHistory.get(i)); 848 writer.append("," + scoreHistory.get(i) + "\n"); 849 } 850 writer.flush(); 851 writer.close(); 852 } 853}