001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.multiple.mc;
022
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.SortedSet;
027import java.util.TreeSet;
028import java.util.concurrent.Callable;
029import java.util.concurrent.ExecutionException;
030import java.util.concurrent.ExecutorService;
031import java.util.concurrent.Executors;
032import java.util.concurrent.Future;
033
034import org.biojava.nbio.structure.Atom;
035import org.biojava.nbio.structure.StructureException;
036import org.biojava.nbio.structure.align.CallableStructureAlignment;
037import org.biojava.nbio.structure.align.MultipleStructureAligner;
038import org.biojava.nbio.structure.align.StructureAlignment;
039import org.biojava.nbio.structure.align.ce.CeCPMain;
040import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams;
041import org.biojava.nbio.structure.align.model.AFPChain;
042import org.biojava.nbio.structure.align.multiple.Block;
043import org.biojava.nbio.structure.align.multiple.BlockImpl;
044import org.biojava.nbio.structure.align.multiple.BlockSet;
045import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
046import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
047import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
048import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
049import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053/**
054 * Main class of the Java implementation of the Combinatorial Extension -
055 * Monte Carlo (CEMC) Algorithm,
056 * as it was originally described by C.Guda, E.D.Scheeff, P.E. Bourne and
057 * I.N. Shindyalov (2001).
058 * The original CEMC paper is available from
059 * <a href="http://psb.stanford.edu/psb-online/proceedings/psb01/guda.pdf">
060 * here</a>.
061 * <p>
062 * This implementation is a generalized version that allows any pairwise
063 * structure alignment algorithm as input, thus supporting any non-topological
064 * or flexible alignment. The seed can also directly be the input for the
065 * optimization. For that, look at {@link MultipleMcOptimizer}.
066 * <p>
067 * A Demo on how to use the algorithm can be found in the demo package.
068 *
069 * @author Aleix Lafita
070 * @since 4.1.0
071 *
072 */
073public class MultipleMcMain implements MultipleStructureAligner {
074
075        private static final Logger logger =
076                        LoggerFactory.getLogger(MultipleMcMain.class);
077
078        /**
079         *  Version history:<p>
080         *  1.0 - Initial code implementation from CEMC article.<p>
081         *  1.1 - Support CP, non-topological and flexible seed alignments.<p>
082         */
083        public static final String version = "1.1";
084        public static final String algorithmName = "jMultipleMC";
085
086        private MultipleMcParameters params;
087        private MultipleAlignmentEnsemble ensemble;
088        private StructureAlignment pairwise;
089        private int reference = 0;
090
091        /**
092         * Default constructor.
093         * Default parameters are used.
094         * @param pairwiseAlgo the pairwise structure alignment used to generate the
095         *                      multiple alignment seed.
096         */
097        public MultipleMcMain(StructureAlignment pairwiseAlgo){
098                ensemble = null;
099                params = new MultipleMcParameters();
100                pairwise = pairwiseAlgo;
101                if (pairwise == null) pairwise = new CeCPMain();
102        }
103
104        /**
105         * Creates a MultipleAlignment seed for MC optimization from the
106         * representative Atoms of the structures. If there are N structures:
107         * <ul><li>Generate (N*(N-1))/2 all-to-all alignments in parallel using
108         * the Java API.
109         * <li>Choose the closest structure to all others as the reference.
110         * <li>Generate a MultipleAlignment by combining all the alignments to
111         * the reference, that will be used as a seed for the MC optimization.
112         * </ul>
113         *
114         * @param atomArrays List of Atoms to align of the structures
115         * @return MultipleAlignment seed alignment
116         * @throws ExecutionException
117         * @throws InterruptedException
118         * @throws StructureException
119         */
120        private MultipleAlignment generateSeed(List<Atom[]> atomArrays)
121                        throws InterruptedException,
122                        ExecutionException, StructureException {
123
124                int size = atomArrays.size();
125
126                //List to store the all-to-all alignments
127                List<List<AFPChain>> afpAlignments = new ArrayList<>();
128                for (int i=0; i<size; i++){
129                        afpAlignments.add(new ArrayList<AFPChain>());
130                        for (int j=0; j<size; j++)
131                                afpAlignments.get(i).add(null);
132                }
133
134                int threads = params.getNrThreads();
135                ExecutorService executor = Executors.newFixedThreadPool(threads);
136                List<Future<AFPChain>> afpFuture = new ArrayList<>();
137
138                //Create all the possible protein pairwise combinations
139                //(N*(N-1)/2) and call the pairwise alignment algorithm
140                for (int i=0; i<size; i++){
141                        for (int j=i+1; j<size; j++){
142
143                                Callable<AFPChain> worker = new CallableStructureAlignment(
144                                                atomArrays.get(i), atomArrays.get(j),
145                                                pairwise.getAlgorithmName(), pairwise.getParameters());
146
147                                Future<AFPChain> submit = executor.submit(worker);
148                                afpFuture.add(submit);
149                        }
150                }
151
152                //Store the resulting AFPChains in the 2D List
153                int index = 0;  //the alignment index
154                for (int i=0; i<size; i++){
155                        for (int j=i; j<size; j++){
156                                if (i!=j){
157                                        afpAlignments.get(i).add(j,afpFuture.get(index).get());
158                                        afpAlignments.get(j).add(i,afpFuture.get(index).get());
159                                        index++;
160                                }
161                        }
162                }
163                executor.shutdown();
164
165                reference = chooseReferenceRMSD(afpAlignments);
166                boolean flexible = false;
167                if (pairwise.getAlgorithmName().contains("flexible"))
168                        flexible = true;
169
170                return combineReferenceAlignments(afpAlignments.get(reference),
171                                atomArrays, reference, flexible);
172        }
173
174        /**
175         * This method takes the all-to-all pairwise alignments Matrix (as a
176         * double List of AFPChain) and calculates the structure with the
177         * lowest average RMSD against all others.
178         * The index of this structure is returned.
179         *
180         * @param afpAlignments List double containing all-to-all pairwise alignments
181         * @return int reference index
182         */
183        private static int chooseReferenceRMSD(List<List<AFPChain>> afpAlignments){
184
185                int size = afpAlignments.size();
186
187                List<Double> RMSDs = new ArrayList<>();
188                for (int i=0; i<afpAlignments.size(); i++){
189                        double rmsd=0.0;
190                        for (int j=0; j<size; j++){
191                                if (i!=j)
192                                        rmsd += afpAlignments.get(i).get(j).getTotalRmsdOpt();
193                        }
194                        RMSDs.add(rmsd);
195                }
196                int reference = 0;
197                for (int i=1; i<size; i++){
198                        if (RMSDs.get(i) < RMSDs.get(reference)) reference = i;
199                }
200                logger.info("Reference structure is "+reference);
201                return reference;
202        }
203
204        /**
205         * This method takes a list of pairwise alignments to the reference
206         * structure and calculates a MultipleAlignment resulting from combining
207         * their residue equivalencies.
208         * <p>
209         * It uses the blocks in AFPChain as {@link Block}s in the
210         * MultipleAlignment, so considers non-topological
211         * alignments, if the alignment is rigid. If the alignment is flexible,
212         * it considers the blocks as {@link BlockSet}s.
213         *
214         * @param afpList the list of pairwise alignments to the reference
215         * @param atomArrays List of Atoms of the structures
216         * @param ref index of the reference structure
217         * @param flexible uses BlockSets if true, uses Blocks otherwise
218         * @return MultipleAlignment seed alignment
219         * @throws StructureException
220         */
221        private static MultipleAlignment combineReferenceAlignments(
222                        List<AFPChain> afpList, List<Atom[]> atomArrays,
223                        int ref, boolean flexible) throws StructureException {
224
225                int size = atomArrays.size();
226                int length = 0;  //the number of residues of the reference structure
227                if (ref==0) length = afpList.get(1).getCa1Length();
228                else length = afpList.get(0).getCa2Length();
229                SortedSet<Integer> flexibleBoundaries = new TreeSet<>();
230
231                //Stores the equivalencies of all the structures as a double List
232                List<List<Integer>> equivalencies = new ArrayList<>();
233                for (int str=0; str<size; str++){
234                        equivalencies.add(new ArrayList<Integer>());
235                        for (int res=0; res<length; res++){
236                                if (str==ref) equivalencies.get(str).add(res);  //identity
237                                else equivalencies.get(str).add(null);
238                        }
239                }
240
241                //Now we parse the AFPChains adding the residue equivalencies
242                for (int str=0; str<size; str++){
243                        if (str==ref) continue;  //avoid self-comparison
244                        for (int bk=0; bk<afpList.get(str).getBlockNum(); bk++){
245                                for (int i=0; i<afpList.get(str).getOptLen()[bk]; i++){
246                                        int res1 = 0;  //reference index
247                                        int res2 = 0;
248                                        //The low index is always in the first chain (0)
249                                        if(str>ref){
250                                                res1 = afpList.get(str).getOptAln()[bk][0][i];
251                                                res2 = afpList.get(str).getOptAln()[bk][1][i];
252                                        }
253                                        else if (str<ref){
254                                                res1 = afpList.get(str).getOptAln()[bk][1][i];
255                                                res2 = afpList.get(str).getOptAln()[bk][0][i];
256                                        }
257                                        equivalencies.get(str).set(res1,res2);
258
259                                        //Add the flexible boundaries if flexible
260                                        if(flexible && i==0) flexibleBoundaries.add(res1);
261                                }
262                        }
263                }
264
265                //We have translated the equivalencies, we create the MultipleAlignment
266                MultipleAlignment seed = new MultipleAlignmentImpl();
267                seed.getEnsemble().setAtomArrays(atomArrays);
268                BlockSet blockSet = new BlockSetImpl(seed);
269                new BlockImpl(blockSet);
270
271                //Store last positions in the block different than null to detect CP
272                int[] lastResidues = new int[size];
273                Arrays.fill(lastResidues, -1);
274
275                //We loop through all the equivalencies checking for CP
276                for (int pos=0; pos<length; pos++){
277                        //Start a new BlockSet if the position means a boundary
278                        if (flexibleBoundaries.contains(pos) &&
279                                        blockSet.getBlocks().get(blockSet.getBlocks().size()-1).
280                                        getAlignRes() != null){
281
282                                blockSet = new BlockSetImpl(seed);
283                                new BlockImpl(blockSet);
284                        }
285
286                        boolean cp = false;
287                        for (int str=0; str<size; str++){
288                                if (equivalencies.get(str).get(pos) == null){
289                                        continue;  //there is a gap, ignore position
290                                } else if (equivalencies.get(str).get(pos)<lastResidues[str]){
291                                        cp = true;  //current residue is lower than the last
292                                        break;
293                                } else lastResidues[str] = equivalencies.get(str).get(pos);
294                        }
295                        if (cp){  //if there is a CP create a new Block
296                                new BlockImpl(blockSet);
297                                Arrays.fill(lastResidues,-1);
298                        }
299
300                        //Now add the equivalent residues into the Block AlignRes variable
301                        for (int str=0; str<size; str++){
302                                Block lastB = blockSet.getBlocks().get(
303                                                blockSet.getBlocks().size()-1);
304
305                                if (lastB.getAlignRes() == null){
306                                        //Initialize the aligned residues list
307                                        List<List<Integer>> alnRes =
308                                                        new ArrayList<>(size);
309
310                                        for (int k=0; k<size; k++) {
311                                                alnRes.add(new ArrayList<Integer>());
312                                        }
313                                        lastB.setAlignRes(alnRes);
314                                }
315                                lastB.getAlignRes().get(str).add(
316                                                equivalencies.get(str).get(pos));
317                        }
318                }
319                logger.info("Seed alignment has "+seed.getBlocks()+" Blocks.");
320                return seed;
321        }
322
323        @Override
324        public MultipleAlignment align(List<Atom[]> atomArrays, Object parameters)
325                        throws StructureException {
326
327                MultipleAlignment result = null;
328                ensemble = new MultipleAlignmentEnsembleImpl();
329                ensemble.setAtomArrays(atomArrays);
330                ensemble.setAlgorithmName(algorithmName);
331                ensemble.setVersion(version);
332                ensemble.setIoTime(System.currentTimeMillis());
333                setParameters((ConfigStrucAligParams) parameters);
334
335                //Generate the seed alignment and optimize it
336                try {
337                        result = generateSeed(atomArrays);
338                } catch (InterruptedException e) {
339                        logger.warn("Seed generation failed.",e);
340                } catch (ExecutionException e) {
341                        logger.warn("Seed generation failed.",e);
342                }
343
344                //Repeat the optimization in parallel - DISALLOWED
345                /*int threads = params.getNrThreads();
346                        ExecutorService executor = Executors.newFixedThreadPool(threads);
347                        List<Future<MultipleAlignment>> msaFuture =
348                                        new ArrayList<Future<MultipleAlignment>>();
349
350                        for (int i=0; i<params.getNrThreads(); i++){
351                                //Change the random seed for each parallelization
352                                MultipleMcParameters paramsMC = (MultipleMcParameters) params;
353                                paramsMC.setRandomSeed(paramsMC.getRandomSeed()+i);
354
355                                Callable<MultipleAlignment> worker =
356                                                new MultipleMcOptimizer(
357                                                                result, paramsMC, reference);
358
359                                Future<MultipleAlignment> submit = executor.submit(worker);
360                                msaFuture.add(submit);
361                        }
362
363                        double maxScore = Double.NEGATIVE_INFINITY;
364                        //Take the one with the best result (best MC-Score)
365                        for (int i=0; i<msaFuture.size(); i++){
366                                MultipleAlignment align = msaFuture.get(i).get();
367                                double s = align.getScore(MultipleAlignmentScorer.MC_SCORE);
368                                if (s > maxScore){
369                                        result = align;
370                                        maxScore = s;
371                                }
372                        }
373                        Long runtime = System.currentTimeMillis()-ensemble.getIoTime();
374                        ensemble.setCalculationTime(runtime);
375
376                        result.setEnsemble(ensemble);
377                        ensemble.addMultipleAlignment(result);
378                        executor.shutdown();*/
379
380                MultipleMcOptimizer optimizer = new MultipleMcOptimizer(
381                                result, params, reference);
382
383                Long runtime = System.currentTimeMillis()-ensemble.getIoTime();
384                ensemble.setCalculationTime(runtime);
385
386                result = optimizer.optimize();
387                result.setEnsemble(ensemble);
388                ensemble.addMultipleAlignment(result);
389
390                return result;
391
392        }
393
394        @Override
395        public MultipleAlignment align(List<Atom[]> atomArrays)
396                        throws StructureException {
397
398                if (params == null) {
399                        logger.info("Using DEFAULT MultipleMc Parameters");
400                        params = new MultipleMcParameters();
401                }
402                return align(atomArrays,params);
403        }
404
405        @Override
406        public ConfigStrucAligParams getParameters() {
407                return params;
408        }
409
410        @Override
411        public void setParameters(ConfigStrucAligParams parameters) {
412                if (!(parameters instanceof MultipleMcParameters)){
413                        throw new IllegalArgumentException(
414                                        "Provided parameter object is not of type MultipleMC");
415                }
416                this.params = (MultipleMcParameters) parameters;
417        }
418
419        @Override
420        public String getAlgorithmName() {
421                return algorithmName;
422        }
423
424        @Override
425        public String getVersion() {
426                return version;
427        }
428}