001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.mc; 022 023import java.io.FileWriter; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collections; 027import java.util.List; 028import java.util.Random; 029import java.util.SortedSet; 030import java.util.TreeSet; 031import java.util.concurrent.Callable; 032 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.StructureException; 035import org.biojava.nbio.structure.align.multiple.Block; 036import org.biojava.nbio.structure.align.multiple.BlockSet; 037import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 038import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 039import org.biojava.nbio.structure.align.multiple.util.CoreSuperimposer; 040import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 041import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentTools; 042import org.biojava.nbio.structure.align.multiple.util.MultipleSuperimposer; 043import org.biojava.nbio.structure.jama.Matrix; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * This class takes a MultipleAlignment seed previously generated and runs a 049 * Monte Carlo optimization in order to improve the overall score and highlight 050 * common structural motifs. 051 * <p> 052 * The seed alignment can be flexible, non-topological or include CP, but this 053 * optimization will not change the number of flexible parts {@link BlockSet}s 054 * or non-topological regions {@link Block}. Thus, the definition of those parts 055 * depend exclusively on the pairwise alignment (or user alignment) used to 056 * generate the seed multiple alignment. 057 * <p> 058 * This class implements Callable, because multiple instances of the 059 * optimization can be run in parallel. 060 * 061 * @author Aleix Lafita 062 * @since 4.1.0 063 * 064 */ 065public class MultipleMcOptimizer implements Callable<MultipleAlignment> { 066 067 private static final Logger logger = LoggerFactory 068 .getLogger(MultipleMcOptimizer.class); 069 070 private Random rnd; 071 private MultipleSuperimposer imposer; 072 073 // Optimization parameters 074 private int Rmin; // number of aligned structures without a gap 075 private int Lmin; // Minimum alignment length of a Block 076 private int convergenceSteps; // Steps without score change before stopping 077 private double C; // Probability function constant 078 079 // Score function parameters - they are defined by the user 080 private double Gopen; // Penalty for opening gap 081 private double Gextend; // Penalty for extending gaps 082 private double dCutoff; // max allowed residue distance 083 084 // Alignment Information 085 private MultipleAlignment msa; // Alignment to optimize 086 private List<SortedSet<Integer>> freePool; // unaligned residues 087 private List<Atom[]> atomArrays; 088 089 // Alignment Properties 090 private int size; // number of structures in the alignment 091 private int blockNr; // the number of Blocks in the alignment 092 private double mcScore; // Optimization score, objective function 093 094 // Variables that store the history of the optimization - slower if on 095 private static final boolean history = false; 096 private static final String pathToHistory = "McOptHistory.csv"; 097 private List<Integer> lengthHistory; 098 private List<Double> rmsdHistory; 099 private List<Double> scoreHistory; 100 101 /** 102 * Constructor. Sets the internal variables from the parameters. To run the 103 * optimization use the call (in a different thread) or optimize methods. 104 * 105 * @param seedAln 106 * MultipleAlignment to be optimized. 107 * @param params 108 * the parameter beam 109 * @param reference 110 * the index of the most similar structure to all others 111 * @throws StructureException 112 */ 113 public MultipleMcOptimizer(MultipleAlignment seedAln, 114 MultipleMcParameters params, int reference) { 115 116 MultipleAlignmentEnsemble e = seedAln.getEnsemble().clone(); 117 msa = e.getMultipleAlignment(0); 118 atomArrays = msa.getAtomArrays(); 119 size = seedAln.size(); 120 121 rnd = new Random(params.getRandomSeed()); 122 Gopen = params.getGapOpen(); 123 Gextend = params.getGapExtension(); 124 dCutoff = params.getDistanceCutoff(); 125 imposer = new CoreSuperimposer(reference); 126 127 if (params.getConvergenceSteps() == 0) { 128 List<Integer> lens = new ArrayList<Integer>(); 129 for (int i = 0; i < size; i++) 130 lens.add(atomArrays.get(i).length); 131 convergenceSteps = Collections.min(lens) * size; 132 } else 133 convergenceSteps = params.getConvergenceSteps(); 134 135 if (params.getMinAlignedStructures() == 0) { 136 Rmin = Math.max(size / 3, 2); // 33% of the structures aligned 137 } else { 138 Rmin = Math 139 .min(Math.max(params.getMinAlignedStructures(), 2), size); 140 } 141 C = 20 * size; 142 Lmin = params.getMinBlockLen(); 143 144 // Delete all shorter than Lmin blocks, and empty blocksets 145 List<Block> toDelete = new ArrayList<Block>(); 146 List<BlockSet> emptyBs = new ArrayList<BlockSet>(); 147 148 for (Block b : msa.getBlocks()) { 149 if (b.getCoreLength() < Lmin) { 150 toDelete.add(b); 151 logger.warn("Deleting a Block: coreLength < Lmin."); 152 } 153 } 154 for (Block b : toDelete) { 155 for (BlockSet bs : msa.getBlockSets()) { 156 bs.getBlocks().remove(b); 157 if (bs.getBlocks().size() == 0) 158 emptyBs.add(bs); 159 } 160 } 161 for (BlockSet bs : emptyBs) { 162 msa.getBlockSets().remove(bs); 163 } 164 165 blockNr = msa.getBlocks().size(); 166 if (blockNr < 1) { 167 throw new IllegalArgumentException( 168 "Optimization: empty seed alignment, no Blocks found."); 169 } 170 } 171 172 @Override 173 public MultipleAlignment call() throws Exception { 174 return optimize(); 175 } 176 177 /** 178 * Initialize the freePool and all the variables needed for the 179 * optimization. 180 * 181 * @throws StructureException 182 */ 183 private void initialize() throws StructureException { 184 185 // Initialize alignment variables 186 freePool = new ArrayList<SortedSet<Integer>>(); 187 List<List<Integer>> aligned = new ArrayList<List<Integer>>(); 188 189 // Generate freePool residues from the ones not aligned 190 for (int i = 0; i < size; i++) { 191 List<Integer> residues = new ArrayList<Integer>(); 192 for (BlockSet bs : msa.getBlockSets()) { 193 for (Block b : bs.getBlocks()) { 194 for (int l = 0; l < b.length(); l++) { 195 Integer residue = b.getAlignRes().get(i).get(l); 196 if (residue != null) 197 residues.add(residue); 198 } 199 } 200 } 201 aligned.add(residues); 202 freePool.add(new TreeSet<Integer>()); 203 } 204 205 // Add any residue not aligned to the free pool for every structure 206 for (int i = 0; i < size; i++) { 207 for (int k = 0; k < atomArrays.get(i).length; k++) { 208 if (!aligned.get(i).contains(k)) 209 freePool.get(i).add(k); 210 } 211 } 212 213 // Set the superposition and score for the seed aligment 214 checkGaps(); 215 msa.clear(); 216 imposer.superimpose(msa); 217 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 218 dCutoff); 219 220 // Initialize the history variables 221 if (history) { 222 lengthHistory = new ArrayList<Integer>(); 223 rmsdHistory = new ArrayList<Double>(); 224 scoreHistory = new ArrayList<Double>(); 225 } 226 } 227 228 /** 229 * Optimization method based in a Monte-Carlo approach. Starting from the 230 * refined alignment uses 4 types of moves: 231 * <p> 232 * <ul> 233 * <li>Shift Row: if there are enough freePool residues available. 234 * <li>Expand Block: add another alignment column. 235 * <li>Shrink Block: move a block column to the freePool. 236 * <li>Insert gap: insert a gap in a random position of the alignment. 237 * </ul> 238 * </li> 239 */ 240 public MultipleAlignment optimize() throws StructureException { 241 242 initialize(); 243 244 int conv = 0; // Number of steps without an alignment improvement 245 int i = 1; 246 int maxIter = convergenceSteps * 100; 247 248 while (i < maxIter && conv < convergenceSteps) { 249 250 // Save the state of the system 251 MultipleAlignment lastMSA = msa.clone(); 252 List<SortedSet<Integer>> lastFreePool = new ArrayList<SortedSet<Integer>>(); 253 for (int k = 0; k < size; k++) { 254 SortedSet<Integer> p = new TreeSet<Integer>(); 255 for (Integer l : freePool.get(k)) 256 p.add(l); 257 lastFreePool.add(p); 258 } 259 double lastScore = mcScore; 260 261 boolean moved = false; 262 263 while (!moved) { 264 // Randomly select one of the steps to modify the alignment 265 double move = rnd.nextDouble(); 266 if (move < 0.4) { 267 moved = shiftRow(); 268 logger.debug("did shift"); 269 } else if (move < 0.7) { 270 moved = expandBlock(); 271 logger.debug("did expand"); 272 } else if (move < 0.85) { 273 moved = shrinkBlock(); 274 logger.debug("did shrink"); 275 } else { 276 moved = insertGap(); 277 logger.debug("did insert gap"); 278 } 279 } 280 281 // Get the score of the new alignment 282 msa.clear(); 283 imposer.superimpose(msa); 284 mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, 285 dCutoff); 286 287 double AS = mcScore - lastScore; 288 double prob = 1.0; 289 290 if (AS < 0) { 291 292 // Probability of accepting the move 293 prob = probabilityFunction(AS, i, maxIter); 294 double p = rnd.nextDouble(); 295 // Reject the move 296 if (p > prob) { 297 msa = lastMSA; 298 freePool = lastFreePool; 299 mcScore = lastScore; 300 conv++; 301 302 } else 303 conv = 0; 304 305 } else 306 conv = 0; 307 308 logger.debug("Step: " + i + ": --prob: " + prob 309 + ", --score change: " + AS + ", --conv: " + conv); 310 311 if (history) { 312 if (i % 100 == 1) { 313 lengthHistory.add(msa.length()); 314 rmsdHistory.add(MultipleAlignmentScorer.getRMSD(msa)); 315 scoreHistory.add(mcScore); 316 } 317 } 318 319 i++; 320 } 321 322 // Return Multiple Alignment 323 imposer.superimpose(msa); 324 MultipleAlignmentScorer.calculateScores(msa); 325 msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore); 326 327 if (history) { 328 try { 329 saveHistory(pathToHistory); 330 } catch (Exception e) { 331 logger.warn("Could not save history file: " + e.getMessage()); 332 } 333 } 334 335 return msa; 336 } 337 338 /** 339 * Method that loops through all the alignment columns and checks that there 340 * are no more gaps than the maximum allowed, Rmin. 341 * <p> 342 * There must be at least Rmin residues different than null in every 343 * alignment column. In case there is a column with more gaps it will be 344 * shrinked (moved to freePool). 345 * 346 * @return true if any columns has been shrinked and false otherwise 347 */ 348 private boolean checkGaps() { 349 350 boolean shrinkedAny = false; 351 352 List<List<Integer>> shrinkColumns = new ArrayList<List<Integer>>(); 353 // Loop for each Block 354 for (Block b : msa.getBlocks()) { 355 List<Integer> shrinkCol = new ArrayList<Integer>(); 356 // Loop for each column in the Block 357 for (int res = 0; res < b.length(); res++) { 358 int gapCount = 0; 359 // count the gaps in the column 360 for (int su = 0; su < size; su++) { 361 if (b.getAlignRes().get(su).get(res) == null) 362 gapCount++; 363 } 364 if ((size - gapCount) < Rmin) { 365 // Add the column to the shrink list 366 shrinkCol.add(res); 367 } 368 } 369 shrinkColumns.add(shrinkCol); 370 } 371 // Shrink columns that have more gaps than allowed 372 for (int b = 0; b < blockNr; b++) { 373 for (int col = shrinkColumns.get(b).size() - 1; col >= 0; col--) { 374 for (int str = 0; str < size; str++) { 375 Block bk = msa.getBlock(b); 376 Integer residue = bk.getAlignRes().get(str) 377 .get(shrinkColumns.get(b).get(col)); 378 bk.getAlignRes().get(str) 379 .remove((int) shrinkColumns.get(b).get(col)); 380 if (residue != null) { 381 freePool.get(str).add(residue); 382 } 383 } 384 shrinkedAny = true; 385 } 386 } 387 return shrinkedAny; 388 } 389 390 /** 391 * Insert a gap in one of the structures in a random position of the 392 * alignment. 393 * <p> 394 * The distribution is not uniform, because positions with higher average 395 * distance to their aligned neighbors are more likely to be gapped. 396 * <p> 397 * A gap is a null in the Block position. 398 * 399 * @return true if the alignment has been changed, false otherwise. 400 */ 401 private boolean insertGap() { 402 403 // Select residue by maximum distance 404 Matrix residueDistances = MultipleAlignmentTools 405 .getAverageResidueDistances(msa); 406 double maxDist = Double.MIN_VALUE; 407 int structure = 0; 408 int block = 0; 409 int position = 0; 410 int column = 0; 411 for (int b = 0; b < blockNr; b++) { 412 for (int col = 0; col < msa.getBlock(b).length(); col++) { 413 for (int str = 0; str < size; str++) { 414 if (residueDistances.get(str, column) != -1) { 415 if (residueDistances.get(str, column) > maxDist) { 416 // Geometric distribution 417 if (rnd.nextDouble() > 0.5) { 418 structure = str; 419 block = b; 420 position = col; 421 maxDist = residueDistances.get(str, column); 422 } 423 } 424 } 425 } 426 column++; 427 } 428 } 429 Block bk = msa.getBlock(block); 430 if (bk.getCoreLength() <= Lmin) 431 return false; 432 433 // Insert the gap at the position 434 Integer residueL = bk.getAlignRes().get(structure).get(position); 435 if (residueL != null) { 436 freePool.get(structure).add(residueL); 437 } else 438 return false; // If there was a gap already in the position. 439 440 bk.getAlignRes().get(structure).set(position, null); 441 checkGaps(); 442 return true; 443 } 444 445 /** 446 * Move all the block residues of one subunit one position to the left or to 447 * the right and move the corresponding boundary residues from the freePool 448 * to the block. 449 * <p> 450 * The boundaries are determined by any irregularity (either a null or a 451 * discontinuity in the alignment). 452 * 453 * @return true if the alignment has been changed, false otherwise. 454 */ 455 private boolean shiftRow() { 456 457 int str = rnd.nextInt(size); // Select randomly the subunit 458 int rl = rnd.nextInt(2); // Select between moving right (0) or left (1) 459 int bk = rnd.nextInt(blockNr); // Select randomly the Block 460 int res = rnd.nextInt(msa.getBlock(bk).length()); 461 462 Block block = msa.getBlock(bk); 463 if (block.getCoreLength() <= Lmin) 464 return false; 465 466 // When the pivot residue is null try to add a residue from the freePool 467 if (block.getAlignRes().get(str).get(res) == null) { 468 // Residues not null at the right and left of the pivot null residue 469 int rightRes = res; 470 int leftRes = res; 471 // Find the boundary to the right abd left 472 while (block.getAlignRes().get(str).get(rightRes) == null 473 && rightRes < block.length() - 1) { 474 rightRes++; 475 } 476 while (block.getAlignRes().get(str).get(leftRes) == null 477 && leftRes > 0) { 478 leftRes--; 479 } 480 481 // If both are null return because the block is empty 482 if (block.getAlignRes().get(str).get(leftRes) == null 483 && block.getAlignRes().get(str).get(rightRes) == null) { 484 return false; 485 } else if (block.getAlignRes().get(str).get(leftRes) == null) { 486 // Choose the sequentially previous residue of the known one 487 Integer residue = block.getAlignRes().get(str).get(rightRes) - 1; 488 if (freePool.get(str).contains(residue)) { 489 block.getAlignRes().get(str).set(res, residue); 490 freePool.get(str).remove(residue); 491 } else 492 return false; 493 } else if (block.getAlignRes().get(str).get(rightRes) == null) { 494 // Choose the sequentially next residue of the known one 495 Integer residue = block.getAlignRes().get(str).get(leftRes) + 1; 496 if (freePool.get(str).contains(residue)) { 497 block.getAlignRes().get(str).set(res, residue); 498 freePool.get(str).remove(residue); 499 } else 500 return false; 501 } else { 502 // If boundaries are consecutive no residue can be added 503 if (block.getAlignRes().get(str).get(rightRes) == block 504 .getAlignRes().get(str).get(leftRes) + 1) { 505 return false; 506 } else { 507 // Choose randomly a residue in between left and right 508 Integer residue = rnd.nextInt(block.getAlignRes().get(str) 509 .get(rightRes) 510 - block.getAlignRes().get(str).get(leftRes) - 1) 511 + block.getAlignRes().get(str).get(leftRes) + 1; 512 513 if (freePool.get(str).contains(residue)) { 514 block.getAlignRes().get(str).set(res, residue); 515 freePool.get(str).remove(residue); 516 } 517 } 518 } 519 return true; 520 } 521 522 // When residue different than null shift the whole block 523 switch (rl) { 524 case 0: // Move to the right 525 526 // Find the nearest boundary to the left of the pivot 527 int leftBoundary = res - 1; 528 int leftPrevRes = res; 529 while (true) { 530 if (leftBoundary < 0) 531 break; 532 else { 533 if (block.getAlignRes().get(str).get(leftBoundary) == null) 534 break; // Break if there is a gap (this is the boundary) 535 else if (block.getAlignRes().get(str).get(leftPrevRes) > block 536 .getAlignRes().get(str).get(leftBoundary) + 1) 537 break; // Break if there is a discontinuity 538 } 539 leftPrevRes = leftBoundary; 540 leftBoundary--; 541 } 542 leftBoundary++; 543 544 // Find the nearest boundary to the right of the pivot 545 int rightBoundary = res + 1; 546 int rightPrevRes = res; 547 while (true) { 548 if (rightBoundary == block.length()) 549 break; 550 else { 551 if (block.getAlignRes().get(str).get(rightBoundary) == null) 552 break; // Break if there is a gap 553 else if (block.getAlignRes().get(str).get(rightPrevRes) + 1 < block 554 .getAlignRes().get(str).get(rightBoundary)) 555 break; // Discontinuity 556 } 557 rightPrevRes = rightBoundary; 558 rightBoundary++; 559 } 560 rightBoundary--; 561 562 // Residues at the boundary 563 Integer resR0 = block.getAlignRes().get(str).get(rightBoundary); 564 Integer resL0 = block.getAlignRes().get(str).get(leftBoundary); 565 566 // Remove the residue at the right of the block 567 block.getAlignRes().get(str).remove(rightBoundary); 568 if (resR0 != null) 569 freePool.get(str).add(resR0); 570 571 // Add the residue at the left of the block 572 if (resL0 != null) 573 resL0 -= 1; 574 575 if (freePool.get(str).contains(resL0)) { 576 block.getAlignRes().get(str).add(leftBoundary, resL0); 577 freePool.get(str).remove(resL0); 578 } else 579 block.getAlignRes().get(str).add(leftBoundary, null); 580 581 break; 582 583 case 1: // Move to the left 584 585 // Find the nearest boundary to the left of the pivot 586 int leftBoundary1 = res - 1; 587 int leftPrevRes1 = res; 588 while (true) { 589 if (leftBoundary1 < 0) 590 break; 591 else { 592 if (block.getAlignRes().get(str).get(leftBoundary1) == null) 593 break; // Break if there is a gap (this is the boundary) 594 else if (block.getAlignRes().get(str).get(leftPrevRes1) > block 595 .getAlignRes().get(str).get(leftBoundary1) + 1) 596 break; // Break if there is a discontinuity 597 } 598 leftPrevRes1 = leftBoundary1; 599 leftBoundary1--; 600 } 601 leftBoundary1++; 602 603 // Find the nearest boundary to the right of the pivot 604 int rightBoundary1 = res + 1; 605 int rightPrevRes1 = res; 606 while (true) { 607 if (rightBoundary1 == block.length()) 608 break; 609 else { 610 if (block.getAlignRes().get(str).get(rightBoundary1) == null) 611 break; // Break if there is a gap 612 else if (block.getAlignRes().get(str).get(rightPrevRes1) + 1 < block 613 .getAlignRes().get(str).get(rightBoundary1)) 614 break; // Discontinuity 615 } 616 rightPrevRes1 = rightBoundary1; 617 rightBoundary1++; 618 } 619 rightBoundary1--; 620 621 // Residues at the boundary 622 Integer resR1 = block.getAlignRes().get(str).get(rightBoundary1); 623 Integer resL1 = block.getAlignRes().get(str).get(leftBoundary1); 624 625 // Add the residue at the right of the block 626 if (resR1 != null) 627 resR1 += 1; 628 629 if (freePool.get(str).contains(resR1)) { 630 if (rightBoundary1 == block.length() - 1) { 631 block.getAlignRes().get(str).add(resR1); 632 } else 633 block.getAlignRes().get(str).add(rightBoundary1 + 1, resR1); 634 635 freePool.get(str).remove(resR1); 636 } else 637 block.getAlignRes().get(str).add(rightBoundary1 + 1, null); 638 639 // Remove the residue at the left of the block 640 block.getAlignRes().get(str).remove(leftBoundary1); 641 if (resL1 != null) 642 freePool.get(str).add(resL1); 643 644 break; 645 } 646 checkGaps(); 647 return true; 648 } 649 650 /** 651 * It extends the alignment one position to the right or to the left of a 652 * randomly selected position by moving the consecutive residues of each 653 * subunit (if enough) from the freePool to the block. 654 * <p> 655 * If there are not enough residues in the freePool it introduces gaps. 656 */ 657 private boolean expandBlock() { 658 659 int rl = rnd.nextInt(2); // Select expanding right (0) or left (1) 660 int bk = rnd.nextInt(blockNr); // Select randomly the Block 661 int res = rnd.nextInt(msa.getBlock(bk).length()); 662 663 Block block = msa.getBlock(bk); 664 int gaps = 0; // store the number of gaps in the expansion 665 666 switch (rl) { 667 case 0: 668 669 int rightBound = res; 670 int[] previousPos = new int[size]; 671 for (int str = 0; str < size; str++) 672 previousPos[str] = -1; 673 674 // Search t the right for >Rmin non consecutive residues 675 while (block.length() - 1 > rightBound) { 676 int noncontinuous = 0; 677 for (int str = 0; str < size; str++) { 678 if (block.getAlignRes().get(str).get(rightBound) == null) { 679 continue; 680 } else if (previousPos[str] == -1) { 681 previousPos[str] = block.getAlignRes().get(str) 682 .get(rightBound); 683 } else if (block.getAlignRes().get(str).get(rightBound) > previousPos[str] + 1) { 684 noncontinuous++; 685 } 686 } 687 if (noncontinuous < Rmin) 688 rightBound++; 689 else 690 break; 691 } 692 if (rightBound > 0) 693 rightBound--; 694 695 // Expand the block with the residues at the subunit boundaries 696 for (int str = 0; str < size; str++) { 697 Integer residueR = block.getAlignRes().get(str).get(rightBound); 698 if (residueR == null) { 699 if (rightBound == block.length() - 1) { 700 block.getAlignRes().get(str).add(null); 701 } else 702 block.getAlignRes().get(str).add(rightBound + 1, null); 703 gaps++; 704 } else if (freePool.get(str).contains(residueR + 1)) { 705 Integer residueAdd = residueR + 1; 706 if (rightBound == block.length() - 1) { 707 block.getAlignRes().get(str).add(residueAdd); 708 } else { 709 block.getAlignRes().get(str) 710 .add(rightBound + 1, residueAdd); 711 } 712 freePool.get(str).remove(residueAdd); 713 } else { 714 if (rightBound == block.length() - 1) 715 block.getAlignRes().get(str).add(null); 716 else 717 block.getAlignRes().get(str).add(rightBound + 1, null); 718 gaps++; 719 } 720 } 721 break; 722 723 case 1: 724 725 int leftBoundary = res; 726 int[] nextPos = new int[size]; 727 for (int str = 0; str < size; str++) 728 nextPos[str] = -1; 729 730 // Search position to the right with >Rmin non consecutive residues 731 while (leftBoundary > 0) { 732 int noncontinuous = 0; 733 for (int str = 0; str < size; str++) { 734 if (block.getAlignRes().get(str).get(leftBoundary) == null) 735 continue; 736 else if (nextPos[str] == -1) { 737 nextPos[str] = block.getAlignRes().get(str) 738 .get(leftBoundary); 739 } else if (block.getAlignRes().get(str).get(leftBoundary) < nextPos[str] - 1) { 740 noncontinuous++; 741 } 742 } 743 if (noncontinuous < Rmin) 744 leftBoundary--; 745 else 746 break; 747 } 748 749 // Expand the block with the residues at the subunit boundaries 750 for (int str = 0; str < size; str++) { 751 Integer residueL = block.getAlignRes().get(str) 752 .get(leftBoundary); 753 if (residueL == null) { 754 block.getAlignRes().get(str).add(leftBoundary, null); 755 gaps++; 756 } else if (freePool.get(str).contains(residueL - 1)) { 757 Integer residueAdd = residueL - 1; 758 block.getAlignRes().get(str).add(leftBoundary, residueAdd); 759 freePool.get(str).remove(residueAdd); 760 } else { 761 block.getAlignRes().get(str).add(leftBoundary, null); 762 gaps++; 763 } 764 } 765 break; 766 } 767 if (size - gaps >= Rmin) 768 return true; 769 else 770 checkGaps(); 771 return false; 772 } 773 774 /** 775 * Deletes an alignment column at a randomly selected position. 776 */ 777 private boolean shrinkBlock() { 778 779 // Select column by maximum distance 780 Matrix residueDistances = MultipleAlignmentTools 781 .getAverageResidueDistances(msa); 782 double[] colDistances = new double[msa.length()]; 783 double maxDist = Double.MIN_VALUE; 784 int position = 0; 785 int block = 0; 786 int column = 0; 787 for (int b = 0; b < msa.getBlocks().size(); b++) { 788 for (int col = 0; col < msa.getBlock(b).length(); col++) { 789 int normalize = 0; 790 for (int s = 0; s < size; s++) { 791 if (residueDistances.get(s, column) != -1) { 792 colDistances[column] += residueDistances.get(s, column); 793 normalize++; 794 } 795 } 796 colDistances[column] /= normalize; 797 if (colDistances[column] > maxDist) { 798 if (rnd.nextDouble() > 0.5) { 799 maxDist = colDistances[column]; 800 position = col; 801 block = b; 802 } 803 } 804 column++; 805 } 806 } 807 Block currentBlock = msa.getBlock(block); 808 if (currentBlock.getCoreLength() <= Lmin) 809 return false; 810 811 for (int str = 0; str < size; str++) { 812 Integer residue = currentBlock.getAlignRes().get(str).get(position); 813 814 currentBlock.getAlignRes().get(str).remove(position); 815 if (residue != null) 816 freePool.get(str).add(residue); 817 } 818 return true; 819 } 820 821 /** 822 * Calculates the probability of accepting a bad move given the iteration 823 * step and the score change. 824 * <p> 825 * Function: p=(C-AS)/(m*C) , slightly different from the CEMC algorithm. 826 * <p> 827 * Added a normalization factor so that the probability approaches 0 as the 828 * final of the optimization gets closer. 829 */ 830 private double probabilityFunction(double AS, int m, int maxIter) { 831 832 double prob = (C + AS) / (m * C); 833 double norm = (1 - (m * 1.0) / maxIter); // Normalization factor 834 return Math.min(Math.max(prob * norm, 0.0), 1.0); 835 } 836 837 /** 838 * Save the evolution of the optimization process as a csv file. 839 */ 840 private void saveHistory(String filePath) throws IOException { 841 842 FileWriter writer = new FileWriter(filePath); 843 writer.append("Step,Length,RMSD,Score\n"); 844 845 for (int i = 0; i < lengthHistory.size(); i++) { 846 writer.append(String.valueOf(i * 100)); 847 writer.append("," + lengthHistory.get(i)); 848 writer.append("," + rmsdHistory.get(i)); 849 writer.append("," + scoreHistory.get(i) + "\n"); 850 } 851 writer.flush(); 852 writer.close(); 853 } 854}