001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.multiple.mc;
022
023import java.io.FileWriter;
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collections;
027import java.util.List;
028import java.util.Random;
029import java.util.SortedSet;
030import java.util.TreeSet;
031import java.util.concurrent.Callable;
032
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.StructureException;
035import org.biojava.nbio.structure.align.multiple.Block;
036import org.biojava.nbio.structure.align.multiple.BlockSet;
037import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
038import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
039import org.biojava.nbio.structure.align.multiple.util.CoreSuperimposer;
040import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
041import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentTools;
042import org.biojava.nbio.structure.align.multiple.util.MultipleSuperimposer;
043import org.biojava.nbio.structure.jama.Matrix;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 * This class takes a MultipleAlignment seed previously generated and runs a
049 * Monte Carlo optimization in order to improve the overall score and highlight
050 * common structural motifs.
051 * <p>
052 * The seed alignment can be flexible, non-topological or include CP, but this
053 * optimization will not change the number of flexible parts {@link BlockSet}s
054 * or non-topological regions {@link Block}. Thus, the definition of those parts
055 * depend exclusively on the pairwise alignment (or user alignment) used to
056 * generate the seed multiple alignment.
057 * <p>
058 * This class implements Callable, because multiple instances of the
059 * optimization can be run in parallel.
060 *
061 * @author Aleix Lafita
062 * @since 4.1.0
063 *
064 */
065public class MultipleMcOptimizer implements Callable<MultipleAlignment> {
066
067        private static final Logger logger = LoggerFactory
068                        .getLogger(MultipleMcOptimizer.class);
069
070        private Random rnd;
071        private MultipleSuperimposer imposer;
072
073        // Optimization parameters
074        private int Rmin; // number of aligned structures without a gap
075        private int Lmin; // Minimum alignment length of a Block
076        private int convergenceSteps; // Steps without score change before stopping
077        private double C; // Probability function constant
078
079        // Score function parameters - they are defined by the user
080        private double Gopen; // Penalty for opening gap
081        private double Gextend; // Penalty for extending gaps
082        private double dCutoff; // max allowed residue distance
083
084        // Alignment Information
085        private MultipleAlignment msa; // Alignment to optimize
086        private List<SortedSet<Integer>> freePool; // unaligned residues
087        private List<Atom[]> atomArrays;
088
089        // Alignment Properties
090        private int size; // number of structures in the alignment
091        private int blockNr; // the number of Blocks in the alignment
092        private double mcScore; // Optimization score, objective function
093
094        // Variables that store the history of the optimization - slower if on
095        private static final boolean history = false;
096        private static final String pathToHistory = "McOptHistory.csv";
097        private List<Integer> lengthHistory;
098        private List<Double> rmsdHistory;
099        private List<Double> scoreHistory;
100
101        /**
102         * Constructor. Sets the internal variables from the parameters. To run the
103         * optimization use the call (in a different thread) or optimize methods.
104         *
105         * @param seedAln
106         *            MultipleAlignment to be optimized.
107         * @param params
108         *            the parameter beam
109         * @param reference
110         *            the index of the most similar structure to all others
111         * @throws StructureException
112         */
113        public MultipleMcOptimizer(MultipleAlignment seedAln,
114                        MultipleMcParameters params, int reference) {
115
116                MultipleAlignmentEnsemble e = seedAln.getEnsemble().clone();
117                msa = e.getMultipleAlignment(0);
118                atomArrays = msa.getAtomArrays();
119                size = seedAln.size();
120
121                rnd = new Random(params.getRandomSeed());
122                Gopen = params.getGapOpen();
123                Gextend = params.getGapExtension();
124                dCutoff = params.getDistanceCutoff();
125                imposer = new CoreSuperimposer(reference);
126
127                if (params.getConvergenceSteps() == 0) {
128                        List<Integer> lens = new ArrayList<Integer>();
129                        for (int i = 0; i < size; i++)
130                                lens.add(atomArrays.get(i).length);
131                        convergenceSteps = Collections.min(lens) * size;
132                } else
133                        convergenceSteps = params.getConvergenceSteps();
134
135                if (params.getMinAlignedStructures() == 0) {
136                        Rmin = Math.max(size / 3, 2); // 33% of the structures aligned
137                } else {
138                        Rmin = Math
139                                        .min(Math.max(params.getMinAlignedStructures(), 2), size);
140                }
141                C = 20 * size;
142                Lmin = params.getMinBlockLen();
143
144                // Delete all shorter than Lmin blocks, and empty blocksets
145                List<Block> toDelete = new ArrayList<Block>();
146                List<BlockSet> emptyBs = new ArrayList<BlockSet>();
147
148                for (Block b : msa.getBlocks()) {
149                        if (b.getCoreLength() < Lmin) {
150                                toDelete.add(b);
151                                logger.warn("Deleting a Block: coreLength < Lmin.");
152                        }
153                }
154                for (Block b : toDelete) {
155                        for (BlockSet bs : msa.getBlockSets()) {
156                                bs.getBlocks().remove(b);
157                                if (bs.getBlocks().size() == 0)
158                                        emptyBs.add(bs);
159                        }
160                }
161                for (BlockSet bs : emptyBs) {
162                        msa.getBlockSets().remove(bs);
163                }
164
165                blockNr = msa.getBlocks().size();
166                if (blockNr < 1) {
167                        throw new IllegalArgumentException(
168                                        "Optimization: empty seed alignment, no Blocks found.");
169                }
170        }
171
172        @Override
173        public MultipleAlignment call() throws Exception {
174                return optimize();
175        }
176
177        /**
178         * Initialize the freePool and all the variables needed for the
179         * optimization.
180         *
181         * @throws StructureException
182         */
183        private void initialize() throws StructureException {
184
185                // Initialize alignment variables
186                freePool = new ArrayList<SortedSet<Integer>>();
187                List<List<Integer>> aligned = new ArrayList<List<Integer>>();
188
189                // Generate freePool residues from the ones not aligned
190                for (int i = 0; i < size; i++) {
191                        List<Integer> residues = new ArrayList<Integer>();
192                        for (BlockSet bs : msa.getBlockSets()) {
193                                for (Block b : bs.getBlocks()) {
194                                        for (int l = 0; l < b.length(); l++) {
195                                                Integer residue = b.getAlignRes().get(i).get(l);
196                                                if (residue != null)
197                                                        residues.add(residue);
198                                        }
199                                }
200                        }
201                        aligned.add(residues);
202                        freePool.add(new TreeSet<Integer>());
203                }
204
205                // Add any residue not aligned to the free pool for every structure
206                for (int i = 0; i < size; i++) {
207                        for (int k = 0; k < atomArrays.get(i).length; k++) {
208                                if (!aligned.get(i).contains(k))
209                                        freePool.get(i).add(k);
210                        }
211                }
212
213                // Set the superposition and score for the seed aligment
214                checkGaps();
215                msa.clear();
216                imposer.superimpose(msa);
217                mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend,
218                                dCutoff);
219
220                // Initialize the history variables
221                if (history) {
222                        lengthHistory = new ArrayList<Integer>();
223                        rmsdHistory = new ArrayList<Double>();
224                        scoreHistory = new ArrayList<Double>();
225                }
226        }
227
228        /**
229         * Optimization method based in a Monte-Carlo approach. Starting from the
230         * refined alignment uses 4 types of moves:
231         * <p>
232         * <ul>
233         * <li>Shift Row: if there are enough freePool residues available.
234         * <li>Expand Block: add another alignment column.
235         * <li>Shrink Block: move a block column to the freePool.
236         * <li>Insert gap: insert a gap in a random position of the alignment.
237         * </ul>
238         * </li>
239         */
240        public MultipleAlignment optimize() throws StructureException {
241
242                initialize();
243
244                int conv = 0; // Number of steps without an alignment improvement
245                int i = 1;
246                int maxIter = convergenceSteps * 100;
247
248                while (i < maxIter && conv < convergenceSteps) {
249
250                        // Save the state of the system
251                        MultipleAlignment lastMSA = msa.clone();
252                        List<SortedSet<Integer>> lastFreePool = new ArrayList<SortedSet<Integer>>();
253                        for (int k = 0; k < size; k++) {
254                                SortedSet<Integer> p = new TreeSet<Integer>();
255                                for (Integer l : freePool.get(k))
256                                        p.add(l);
257                                lastFreePool.add(p);
258                        }
259                        double lastScore = mcScore;
260
261                        boolean moved = false;
262
263                        while (!moved) {
264                                // Randomly select one of the steps to modify the alignment
265                                double move = rnd.nextDouble();
266                                if (move < 0.4) {
267                                        moved = shiftRow();
268                                        logger.debug("did shift");
269                                } else if (move < 0.7) {
270                                        moved = expandBlock();
271                                        logger.debug("did expand");
272                                } else if (move < 0.85) {
273                                        moved = shrinkBlock();
274                                        logger.debug("did shrink");
275                                } else {
276                                        moved = insertGap();
277                                        logger.debug("did insert gap");
278                                }
279                        }
280
281                        // Get the score of the new alignment
282                        msa.clear();
283                        imposer.superimpose(msa);
284                        mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend,
285                                        dCutoff);
286
287                        double AS = mcScore - lastScore;
288                        double prob = 1.0;
289
290                        if (AS < 0) {
291
292                                // Probability of accepting the move
293                                prob = probabilityFunction(AS, i, maxIter);
294                                double p = rnd.nextDouble();
295                                // Reject the move
296                                if (p > prob) {
297                                        msa = lastMSA;
298                                        freePool = lastFreePool;
299                                        mcScore = lastScore;
300                                        conv++;
301
302                                } else
303                                        conv = 0;
304
305                        } else
306                                conv = 0;
307
308                        logger.debug("Step: " + i + ": --prob: " + prob
309                                        + ", --score change: " + AS + ", --conv: " + conv);
310
311                        if (history) {
312                                if (i % 100 == 1) {
313                                        lengthHistory.add(msa.length());
314                                        rmsdHistory.add(MultipleAlignmentScorer.getRMSD(msa));
315                                        scoreHistory.add(mcScore);
316                                }
317                        }
318
319                        i++;
320                }
321
322                // Return Multiple Alignment
323                imposer.superimpose(msa);
324                MultipleAlignmentScorer.calculateScores(msa);
325                msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore);
326
327                if (history) {
328                        try {
329                                saveHistory(pathToHistory);
330                        } catch (Exception e) {
331                                logger.warn("Could not save history file: " + e.getMessage());
332                        }
333                }
334
335                return msa;
336        }
337
338        /**
339         * Method that loops through all the alignment columns and checks that there
340         * are no more gaps than the maximum allowed, Rmin.
341         * <p>
342         * There must be at least Rmin residues different than null in every
343         * alignment column. In case there is a column with more gaps it will be
344         * shrinked (moved to freePool).
345         *
346         * @return true if any columns has been shrinked and false otherwise
347         */
348        private boolean checkGaps() {
349
350                boolean shrinkedAny = false;
351
352                List<List<Integer>> shrinkColumns = new ArrayList<List<Integer>>();
353                // Loop for each Block
354                for (Block b : msa.getBlocks()) {
355                        List<Integer> shrinkCol = new ArrayList<Integer>();
356                        // Loop for each column in the Block
357                        for (int res = 0; res < b.length(); res++) {
358                                int gapCount = 0;
359                                // count the gaps in the column
360                                for (int su = 0; su < size; su++) {
361                                        if (b.getAlignRes().get(su).get(res) == null)
362                                                gapCount++;
363                                }
364                                if ((size - gapCount) < Rmin) {
365                                        // Add the column to the shrink list
366                                        shrinkCol.add(res);
367                                }
368                        }
369                        shrinkColumns.add(shrinkCol);
370                }
371                // Shrink columns that have more gaps than allowed
372                for (int b = 0; b < blockNr; b++) {
373                        for (int col = shrinkColumns.get(b).size() - 1; col >= 0; col--) {
374                                for (int str = 0; str < size; str++) {
375                                        Block bk = msa.getBlock(b);
376                                        Integer residue = bk.getAlignRes().get(str)
377                                                        .get(shrinkColumns.get(b).get(col));
378                                        bk.getAlignRes().get(str)
379                                                        .remove((int) shrinkColumns.get(b).get(col));
380                                        if (residue != null) {
381                                                freePool.get(str).add(residue);
382                                        }
383                                }
384                                shrinkedAny = true;
385                        }
386                }
387                return shrinkedAny;
388        }
389
390        /**
391         * Insert a gap in one of the structures in a random position of the
392         * alignment.
393         * <p>
394         * The distribution is not uniform, because positions with higher average
395         * distance to their aligned neighbors are more likely to be gapped.
396         * <p>
397         * A gap is a null in the Block position.
398         *
399         * @return true if the alignment has been changed, false otherwise.
400         */
401        private boolean insertGap() {
402
403                // Select residue by maximum distance
404                Matrix residueDistances = MultipleAlignmentTools
405                                .getAverageResidueDistances(msa);
406                double maxDist = Double.MIN_VALUE;
407                int structure = 0;
408                int block = 0;
409                int position = 0;
410                int column = 0;
411                for (int b = 0; b < blockNr; b++) {
412                        for (int col = 0; col < msa.getBlock(b).length(); col++) {
413                                for (int str = 0; str < size; str++) {
414                                        if (residueDistances.get(str, column) != -1) {
415                                                if (residueDistances.get(str, column) > maxDist) {
416                                                        // Geometric distribution
417                                                        if (rnd.nextDouble() > 0.5) {
418                                                                structure = str;
419                                                                block = b;
420                                                                position = col;
421                                                                maxDist = residueDistances.get(str, column);
422                                                        }
423                                                }
424                                        }
425                                }
426                                column++;
427                        }
428                }
429                Block bk = msa.getBlock(block);
430                if (bk.getCoreLength() <= Lmin)
431                        return false;
432
433                // Insert the gap at the position
434                Integer residueL = bk.getAlignRes().get(structure).get(position);
435                if (residueL != null) {
436                        freePool.get(structure).add(residueL);
437                } else
438                        return false; // If there was a gap already in the position.
439
440                bk.getAlignRes().get(structure).set(position, null);
441                checkGaps();
442                return true;
443        }
444
445        /**
446         * Move all the block residues of one subunit one position to the left or to
447         * the right and move the corresponding boundary residues from the freePool
448         * to the block.
449         * <p>
450         * The boundaries are determined by any irregularity (either a null or a
451         * discontinuity in the alignment).
452         *
453         * @return true if the alignment has been changed, false otherwise.
454         */
455        private boolean shiftRow() {
456
457                int str = rnd.nextInt(size); // Select randomly the subunit
458                int rl = rnd.nextInt(2); // Select between moving right (0) or left (1)
459                int bk = rnd.nextInt(blockNr); // Select randomly the Block
460                int res = rnd.nextInt(msa.getBlock(bk).length());
461
462                Block block = msa.getBlock(bk);
463                if (block.getCoreLength() <= Lmin)
464                        return false;
465
466                // When the pivot residue is null try to add a residue from the freePool
467                if (block.getAlignRes().get(str).get(res) == null) {
468                        // Residues not null at the right and left of the pivot null residue
469                        int rightRes = res;
470                        int leftRes = res;
471                        // Find the boundary to the right abd left
472                        while (block.getAlignRes().get(str).get(rightRes) == null
473                                        && rightRes < block.length() - 1) {
474                                rightRes++;
475                        }
476                        while (block.getAlignRes().get(str).get(leftRes) == null
477                                        && leftRes > 0) {
478                                leftRes--;
479                        }
480
481                        // If both are null return because the block is empty
482                        if (block.getAlignRes().get(str).get(leftRes) == null
483                                        && block.getAlignRes().get(str).get(rightRes) == null) {
484                                return false;
485                        } else if (block.getAlignRes().get(str).get(leftRes) == null) {
486                                // Choose the sequentially previous residue of the known one
487                                Integer residue = block.getAlignRes().get(str).get(rightRes) - 1;
488                                if (freePool.get(str).contains(residue)) {
489                                        block.getAlignRes().get(str).set(res, residue);
490                                        freePool.get(str).remove(residue);
491                                } else
492                                        return false;
493                        } else if (block.getAlignRes().get(str).get(rightRes) == null) {
494                                // Choose the sequentially next residue of the known one
495                                Integer residue = block.getAlignRes().get(str).get(leftRes) + 1;
496                                if (freePool.get(str).contains(residue)) {
497                                        block.getAlignRes().get(str).set(res, residue);
498                                        freePool.get(str).remove(residue);
499                                } else
500                                        return false;
501                        } else {
502                                // If boundaries are consecutive no residue can be added
503                                if (block.getAlignRes().get(str).get(rightRes) == block
504                                                .getAlignRes().get(str).get(leftRes) + 1) {
505                                        return false;
506                                } else {
507                                        // Choose randomly a residue in between left and right
508                                        Integer residue = rnd.nextInt(block.getAlignRes().get(str)
509                                                        .get(rightRes)
510                                                        - block.getAlignRes().get(str).get(leftRes) - 1)
511                                                        + block.getAlignRes().get(str).get(leftRes) + 1;
512
513                                        if (freePool.get(str).contains(residue)) {
514                                                block.getAlignRes().get(str).set(res, residue);
515                                                freePool.get(str).remove(residue);
516                                        }
517                                }
518                        }
519                        return true;
520                }
521
522                // When residue different than null shift the whole block
523                switch (rl) {
524                case 0: // Move to the right
525
526                        // Find the nearest boundary to the left of the pivot
527                        int leftBoundary = res - 1;
528                        int leftPrevRes = res;
529                        while (true) {
530                                if (leftBoundary < 0)
531                                        break;
532                                else {
533                                        if (block.getAlignRes().get(str).get(leftBoundary) == null)
534                                                break; // Break if there is a gap (this is the boundary)
535                                        else if (block.getAlignRes().get(str).get(leftPrevRes) > block
536                                                        .getAlignRes().get(str).get(leftBoundary) + 1)
537                                                break; // Break if there is a discontinuity
538                                }
539                                leftPrevRes = leftBoundary;
540                                leftBoundary--;
541                        }
542                        leftBoundary++;
543
544                        // Find the nearest boundary to the right of the pivot
545                        int rightBoundary = res + 1;
546                        int rightPrevRes = res;
547                        while (true) {
548                                if (rightBoundary == block.length())
549                                        break;
550                                else {
551                                        if (block.getAlignRes().get(str).get(rightBoundary) == null)
552                                                break; // Break if there is a gap
553                                        else if (block.getAlignRes().get(str).get(rightPrevRes) + 1 < block
554                                                        .getAlignRes().get(str).get(rightBoundary))
555                                                break; // Discontinuity
556                                }
557                                rightPrevRes = rightBoundary;
558                                rightBoundary++;
559                        }
560                        rightBoundary--;
561
562                        // Residues at the boundary
563                        Integer resR0 = block.getAlignRes().get(str).get(rightBoundary);
564                        Integer resL0 = block.getAlignRes().get(str).get(leftBoundary);
565
566                        // Remove the residue at the right of the block
567                        block.getAlignRes().get(str).remove(rightBoundary);
568                        if (resR0 != null)
569                                freePool.get(str).add(resR0);
570
571                        // Add the residue at the left of the block
572                        if (resL0 != null)
573                                resL0 -= 1;
574
575                        if (freePool.get(str).contains(resL0)) {
576                                block.getAlignRes().get(str).add(leftBoundary, resL0);
577                                freePool.get(str).remove(resL0);
578                        } else
579                                block.getAlignRes().get(str).add(leftBoundary, null);
580
581                        break;
582
583                case 1: // Move to the left
584
585                        // Find the nearest boundary to the left of the pivot
586                        int leftBoundary1 = res - 1;
587                        int leftPrevRes1 = res;
588                        while (true) {
589                                if (leftBoundary1 < 0)
590                                        break;
591                                else {
592                                        if (block.getAlignRes().get(str).get(leftBoundary1) == null)
593                                                break; // Break if there is a gap (this is the boundary)
594                                        else if (block.getAlignRes().get(str).get(leftPrevRes1) > block
595                                                        .getAlignRes().get(str).get(leftBoundary1) + 1)
596                                                break; // Break if there is a discontinuity
597                                }
598                                leftPrevRes1 = leftBoundary1;
599                                leftBoundary1--;
600                        }
601                        leftBoundary1++;
602
603                        // Find the nearest boundary to the right of the pivot
604                        int rightBoundary1 = res + 1;
605                        int rightPrevRes1 = res;
606                        while (true) {
607                                if (rightBoundary1 == block.length())
608                                        break;
609                                else {
610                                        if (block.getAlignRes().get(str).get(rightBoundary1) == null)
611                                                break; // Break if there is a gap
612                                        else if (block.getAlignRes().get(str).get(rightPrevRes1) + 1 < block
613                                                        .getAlignRes().get(str).get(rightBoundary1))
614                                                break; // Discontinuity
615                                }
616                                rightPrevRes1 = rightBoundary1;
617                                rightBoundary1++;
618                        }
619                        rightBoundary1--;
620
621                        // Residues at the boundary
622                        Integer resR1 = block.getAlignRes().get(str).get(rightBoundary1);
623                        Integer resL1 = block.getAlignRes().get(str).get(leftBoundary1);
624
625                        // Add the residue at the right of the block
626                        if (resR1 != null)
627                                resR1 += 1;
628
629                        if (freePool.get(str).contains(resR1)) {
630                                if (rightBoundary1 == block.length() - 1) {
631                                        block.getAlignRes().get(str).add(resR1);
632                                } else
633                                        block.getAlignRes().get(str).add(rightBoundary1 + 1, resR1);
634
635                                freePool.get(str).remove(resR1);
636                        } else
637                                block.getAlignRes().get(str).add(rightBoundary1 + 1, null);
638
639                        // Remove the residue at the left of the block
640                        block.getAlignRes().get(str).remove(leftBoundary1);
641                        if (resL1 != null)
642                                freePool.get(str).add(resL1);
643
644                        break;
645                }
646                checkGaps();
647                return true;
648        }
649
650        /**
651         * It extends the alignment one position to the right or to the left of a
652         * randomly selected position by moving the consecutive residues of each
653         * subunit (if enough) from the freePool to the block.
654         * <p>
655         * If there are not enough residues in the freePool it introduces gaps.
656         */
657        private boolean expandBlock() {
658
659                int rl = rnd.nextInt(2); // Select expanding right (0) or left (1)
660                int bk = rnd.nextInt(blockNr); // Select randomly the Block
661                int res = rnd.nextInt(msa.getBlock(bk).length());
662
663                Block block = msa.getBlock(bk);
664                int gaps = 0; // store the number of gaps in the expansion
665
666                switch (rl) {
667                case 0:
668
669                        int rightBound = res;
670                        int[] previousPos = new int[size];
671                        for (int str = 0; str < size; str++)
672                                previousPos[str] = -1;
673
674                        // Search t the right for >Rmin non consecutive residues
675                        while (block.length() - 1 > rightBound) {
676                                int noncontinuous = 0;
677                                for (int str = 0; str < size; str++) {
678                                        if (block.getAlignRes().get(str).get(rightBound) == null) {
679                                                continue;
680                                        } else if (previousPos[str] == -1) {
681                                                previousPos[str] = block.getAlignRes().get(str)
682                                                                .get(rightBound);
683                                        } else if (block.getAlignRes().get(str).get(rightBound) > previousPos[str] + 1) {
684                                                noncontinuous++;
685                                        }
686                                }
687                                if (noncontinuous < Rmin)
688                                        rightBound++;
689                                else
690                                        break;
691                        }
692                        if (rightBound > 0)
693                                rightBound--;
694
695                        // Expand the block with the residues at the subunit boundaries
696                        for (int str = 0; str < size; str++) {
697                                Integer residueR = block.getAlignRes().get(str).get(rightBound);
698                                if (residueR == null) {
699                                        if (rightBound == block.length() - 1) {
700                                                block.getAlignRes().get(str).add(null);
701                                        } else
702                                                block.getAlignRes().get(str).add(rightBound + 1, null);
703                                        gaps++;
704                                } else if (freePool.get(str).contains(residueR + 1)) {
705                                        Integer residueAdd = residueR + 1;
706                                        if (rightBound == block.length() - 1) {
707                                                block.getAlignRes().get(str).add(residueAdd);
708                                        } else {
709                                                block.getAlignRes().get(str)
710                                                                .add(rightBound + 1, residueAdd);
711                                        }
712                                        freePool.get(str).remove(residueAdd);
713                                } else {
714                                        if (rightBound == block.length() - 1)
715                                                block.getAlignRes().get(str).add(null);
716                                        else
717                                                block.getAlignRes().get(str).add(rightBound + 1, null);
718                                        gaps++;
719                                }
720                        }
721                        break;
722
723                case 1:
724
725                        int leftBoundary = res;
726                        int[] nextPos = new int[size];
727                        for (int str = 0; str < size; str++)
728                                nextPos[str] = -1;
729
730                        // Search position to the right with >Rmin non consecutive residues
731                        while (leftBoundary > 0) {
732                                int noncontinuous = 0;
733                                for (int str = 0; str < size; str++) {
734                                        if (block.getAlignRes().get(str).get(leftBoundary) == null)
735                                                continue;
736                                        else if (nextPos[str] == -1) {
737                                                nextPos[str] = block.getAlignRes().get(str)
738                                                                .get(leftBoundary);
739                                        } else if (block.getAlignRes().get(str).get(leftBoundary) < nextPos[str] - 1) {
740                                                noncontinuous++;
741                                        }
742                                }
743                                if (noncontinuous < Rmin)
744                                        leftBoundary--;
745                                else
746                                        break;
747                        }
748
749                        // Expand the block with the residues at the subunit boundaries
750                        for (int str = 0; str < size; str++) {
751                                Integer residueL = block.getAlignRes().get(str)
752                                                .get(leftBoundary);
753                                if (residueL == null) {
754                                        block.getAlignRes().get(str).add(leftBoundary, null);
755                                        gaps++;
756                                } else if (freePool.get(str).contains(residueL - 1)) {
757                                        Integer residueAdd = residueL - 1;
758                                        block.getAlignRes().get(str).add(leftBoundary, residueAdd);
759                                        freePool.get(str).remove(residueAdd);
760                                } else {
761                                        block.getAlignRes().get(str).add(leftBoundary, null);
762                                        gaps++;
763                                }
764                        }
765                        break;
766                }
767                if (size - gaps >= Rmin)
768                        return true;
769                else
770                        checkGaps();
771                return false;
772        }
773
774        /**
775         * Deletes an alignment column at a randomly selected position.
776         */
777        private boolean shrinkBlock() {
778
779                // Select column by maximum distance
780                Matrix residueDistances = MultipleAlignmentTools
781                                .getAverageResidueDistances(msa);
782                double[] colDistances = new double[msa.length()];
783                double maxDist = Double.MIN_VALUE;
784                int position = 0;
785                int block = 0;
786                int column = 0;
787                for (int b = 0; b < msa.getBlocks().size(); b++) {
788                        for (int col = 0; col < msa.getBlock(b).length(); col++) {
789                                int normalize = 0;
790                                for (int s = 0; s < size; s++) {
791                                        if (residueDistances.get(s, column) != -1) {
792                                                colDistances[column] += residueDistances.get(s, column);
793                                                normalize++;
794                                        }
795                                }
796                                colDistances[column] /= normalize;
797                                if (colDistances[column] > maxDist) {
798                                        if (rnd.nextDouble() > 0.5) {
799                                                maxDist = colDistances[column];
800                                                position = col;
801                                                block = b;
802                                        }
803                                }
804                                column++;
805                        }
806                }
807                Block currentBlock = msa.getBlock(block);
808                if (currentBlock.getCoreLength() <= Lmin)
809                        return false;
810
811                for (int str = 0; str < size; str++) {
812                        Integer residue = currentBlock.getAlignRes().get(str).get(position);
813
814                        currentBlock.getAlignRes().get(str).remove(position);
815                        if (residue != null)
816                                freePool.get(str).add(residue);
817                }
818                return true;
819        }
820
821        /**
822         * Calculates the probability of accepting a bad move given the iteration
823         * step and the score change.
824         * <p>
825         * Function: p=(C-AS)/(m*C) , slightly different from the CEMC algorithm.
826         * <p>
827         * Added a normalization factor so that the probability approaches 0 as the
828         * final of the optimization gets closer.
829         */
830        private double probabilityFunction(double AS, int m, int maxIter) {
831
832                double prob = (C + AS) / (m * C);
833                double norm = (1 - (m * 1.0) / maxIter); // Normalization factor
834                return Math.min(Math.max(prob * norm, 0.0), 1.0);
835        }
836
837        /**
838         * Save the evolution of the optimization process as a csv file.
839         */
840        private void saveHistory(String filePath) throws IOException {
841
842                FileWriter writer = new FileWriter(filePath);
843                writer.append("Step,Length,RMSD,Score\n");
844
845                for (int i = 0; i < lengthHistory.size(); i++) {
846                        writer.append(String.valueOf(i * 100));
847                        writer.append("," + lengthHistory.get(i));
848                        writer.append("," + rmsdHistory.get(i));
849                        writer.append("," + scoreHistory.get(i) + "\n");
850                }
851                writer.flush();
852                writer.close();
853        }
854}