001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Mar 9, 2010
021 * Author: Spencer Bliven
022 *
023 */
024
025package org.biojava.nbio.structure.align.ce;
026
027
028import org.biojava.nbio.structure.Atom;
029import org.biojava.nbio.structure.StructureException;
030import org.biojava.nbio.structure.StructureTools;
031import org.biojava.nbio.structure.align.model.AFPChain;
032import org.biojava.nbio.structure.align.util.AFPChainScorer;
033import org.biojava.nbio.structure.align.util.AtomCache;
034import org.biojava.nbio.structure.jama.Matrix;
035
036import java.lang.reflect.InvocationTargetException;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.List;
040import java.util.Scanner;
041
042/**
043 * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding
044 * circular permutations.
045 * <p>
046 * A circular permutation consists of a single cleavage point and rearrangement
047 * between two structures, for example:
048 * <pre>
049 * ABCDEFG
050 * DEFGABC
051 * </pre>
052 * @author Spencer Bliven.
053 *
054 */
055public class OptimalCECPMain extends CeMain {
056        private static final boolean debug = true;
057
058
059        public static final String algorithmName = "jCE Optimal Circular Permutation";
060
061        /**
062         *  version history:
063         *  1.0 - Initial version
064         */
065        public static final String version = "1.0";
066
067        protected OptimalCECPParameters params;
068
069        /**
070         *
071         */
072        public OptimalCECPMain() {
073                super();
074                params = new OptimalCECPParameters();
075        }
076
077        @Override
078        public String getAlgorithmName() {
079                return OptimalCECPMain.algorithmName;
080        }
081
082        @Override
083        public String getVersion() {
084                return OptimalCECPMain.version;
085        }
086
087        /**
088         * @return an {@link OptimalCECPParameters} object
089         */
090        @Override
091        public ConfigStrucAligParams getParameters() {
092                return params;
093        }
094
095        /**
096         * @param params Should be an {@link OptimalCECPParameters} object specifying alignment options
097         */
098        @Override
099        public void setParameters(ConfigStrucAligParams params){
100                if (! (params instanceof OptimalCECPParameters )){
101                        throw new IllegalArgumentException("provided parameter object is not of type CeParameter");
102                }
103                this.params = (OptimalCECPParameters) params;
104        }
105
106        /**
107         * Circularly permutes arr in place.
108         *
109         * <p>Similar to {@link Collections#rotate(List, int)} but with reversed
110         * direction. Perhaps it would be more efficient to use the Collections version?
111         * @param <T>
112         * @param arr The array to be permuted
113         * @param cp The number of residues to shift leftward, or equivalently, the index of
114         *  the first element after the permutation point.
115         */
116        private static <T> void permuteArray(T[] arr, int cp) {
117                // Allow negative cp points for convenience.
118                if(cp == 0) {
119                        return;
120                }
121                if(cp < 0) {
122                        cp = arr.length+cp;
123                }
124                if(cp < 0 || cp >= arr.length) {
125                        throw new ArrayIndexOutOfBoundsException(
126                                        "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" );
127                }
128
129                List<T> temp = new ArrayList<T>(cp);
130
131                // shift residues left
132                for(int i=0;i<cp;i++) {
133                        temp.add(arr[i]);
134                }
135                for(int j=cp;j<arr.length;j++) {
136                        arr[j-cp]=arr[j];
137                }
138                for(int i=0;i<cp;i++) {
139                        arr[arr.length-cp+i] = temp.get(i);
140                }
141        }
142
143        /**
144         * Circularly permutes arr in place.
145         *
146         * <p>Similar to {@link Collections#rotate(List, int)} but with reversed
147         * direction. Perhaps it would be more efficient to use the Collections version?
148         * @param <T>
149         * @param arr The array to be permuted
150         * @param cp The number of residues to shift leftward, or equivalently, the index of
151         *  the first element after the permutation point.
152         *
153        private static void permuteArray(int[] arr, int cp) {
154                // Allow negative cp points for convenience.
155                if(cp == 0) {
156                        return;
157                }
158                if(cp < 0) {
159                        cp = arr.length+cp;
160                }
161                if(cp < 0 || cp >= arr.length) {
162                        throw new ArrayIndexOutOfBoundsException(
163                                        "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" );
164                }
165
166                List<Integer> temp = new ArrayList<Integer>(cp);
167
168                // shift residues left
169                for(int i=0;i<cp;i++) {
170                        temp.add(arr[i]);
171                }
172                for(int j=cp;j<arr.length;j++) {
173                        arr[j-cp]=arr[j];
174                }
175                for(int i=0;i<cp;i++) {
176                        arr[arr.length-cp+i] = temp.get(i);
177                }
178        }
179        */
180
181        /**
182         * Aligns ca1 with ca2 permuted by <i>cp</i> residues.
183         * <p><strong>WARNING:</strong> Modifies ca2 during the permutation. Be sure
184         * to make a copy before calling this method.
185         *
186         * @param ca1
187         * @param ca2
188         * @param param
189         * @param cp
190         * @return
191         * @throws StructureException
192         */
193        public AFPChain alignPermuted(Atom[] ca1, Atom[] ca2, Object param, int cp) throws StructureException {
194                // initial permutation
195                permuteArray(ca2,cp);
196
197                // perform alignment
198                AFPChain afpChain = super.align(ca1, ca2, param);
199
200                // un-permute alignment
201                permuteAFPChain(afpChain, -cp);
202
203                if(afpChain.getName2() != null) {
204                        afpChain.setName2(afpChain.getName2()+" CP="+cp);
205                }
206
207                // Specify the permuted
208                return afpChain;
209        }
210
211        /**
212         * Permute the second protein of afpChain by the specified number of residues.
213         * @param afpChain Input alignment
214         * @param cp Amount leftwards (or rightward, if negative) to shift the
215         * @return A new alignment equivalent to afpChain after the permutations
216         */
217        private static void permuteAFPChain(AFPChain afpChain, int cp) {
218
219                int ca2len = afpChain.getCa2Length();
220
221                //fix up cp to be positive
222                if(cp == 0) {
223                        return;
224                }
225                if(cp < 0) {
226                        cp = ca2len+cp;
227                }
228                if(cp < 0 || cp >= ca2len) {
229                        throw new ArrayIndexOutOfBoundsException(
230                                        "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" );
231                }
232
233                // Fix up optAln
234                permuteOptAln(afpChain,cp);
235
236                if(afpChain.getBlockNum() > 1)
237                        afpChain.setSequentialAlignment(false);
238                // fix up matrices
239                // ca1 corresponds to row indices, while ca2 corresponds to column indices.
240                afpChain.setDistanceMatrix(permuteMatrix(afpChain.getDistanceMatrix(),0,-cp));
241                // this is square, so permute both
242                afpChain.setDisTable2(permuteMatrix(afpChain.getDisTable2(),-cp,-cp));
243
244                //TODO fix up other AFP parameters?
245
246        }
247
248        /**
249         * Permutes <i>mat</i> by moving the rows of the matrix upwards by <i>cp</i>
250         * rows.
251         * @param mat The original matrix
252         * @param cpRows Number of rows upward to move entries
253         * @param cpCols Number of columns leftward to move entries
254         * @return The permuted matrix
255         */
256        private static Matrix permuteMatrix(Matrix mat, int cpRows, int cpCols) {
257                //fix up cp to be positive
258                if(cpRows == 0 && cpCols == 0) {
259                        return mat.copy();
260                }
261                if(cpRows < 0) {
262                        cpRows = mat.getRowDimension()+cpRows;
263                }
264                if(cpRows < 0 || cpRows >= mat.getRowDimension()) {
265                        throw new ArrayIndexOutOfBoundsException( String.format(
266                                        "Can't permute rows by %d: only %d rows.",
267                                        cpRows, mat.getRowDimension() )
268                        );
269                }
270
271                if(cpCols < 0) {
272                        cpCols = mat.getColumnDimension()+cpCols;
273                }
274                if(cpCols < 0 || cpCols >= mat.getColumnDimension()) {
275                        throw new ArrayIndexOutOfBoundsException( String.format(
276                                        "Can't permute cols by %d: only %d rows.",
277                                        cpCols, mat.getColumnDimension() )
278                        );
279                }
280
281                int[] rows = new int[mat.getRowDimension()];
282                for(int i=0;i<rows.length;i++) {
283                        rows[i] = (i+cpRows)%rows.length;
284                }
285                int[] cols = new int[mat.getColumnDimension()];
286                for(int i=0;i<cols.length;i++) {
287                        cols[i] = (i+cpCols)%cols.length;
288                }
289
290                Matrix newMat = mat.getMatrix(rows, cols);
291                assert(newMat.getRowDimension() == mat.getRowDimension());
292                assert(newMat.getColumnDimension() == mat.getColumnDimension());
293                assert(newMat.get(0, 0) ==
294                        mat.get(cpRows%mat.getRowDimension(), cpCols%mat.getColumnDimension()));
295
296
297                return newMat;
298        }
299
300        /**
301         * Modifies the {@link AFPChain#setOptAln(int[][][]) optAln} of an AFPChain
302         * by permuting the second protein.
303         *
304         * Sets residue numbers in the second protein to <i>(i-cp)%len</i>
305         *
306         * @param afpChain
307         * @param cp Amount leftwards (or rightward, if negative) to shift the
308         */
309        private static void permuteOptAln(AFPChain afpChain, int cp)
310        {
311                int ca2len = afpChain.getCa2Length();
312
313                if( ca2len <= 0) {
314                        throw new IllegalArgumentException("No Ca2Length specified in "+afpChain);
315                }
316
317                // Allow negative cp points for convenience.
318                if(cp == 0) {
319                        return;
320                }
321                if(cp <= -ca2len || cp >= ca2len) {
322                        // could just take cp%ca2len, but probably its a bug if abs(cp)>=ca2len
323                        throw new ArrayIndexOutOfBoundsException( String.format(
324                                        "Permutation point %d must be between %d and %d for %s",
325                                        cp, 1-ca2len,ca2len-1, afpChain.getName2() ) );
326                }
327                if(cp < 0) {
328                        cp = cp + ca2len;
329                }
330
331                // the unprocessed alignment
332                int[][][] optAln = afpChain.getOptAln();
333                int[] optLen = afpChain.getOptLen();
334
335                // the processed alignment
336                List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum()*2);
337
338                //Update residue indices
339                // newi = (oldi-cp) % N
340                for(int block = 0; block < afpChain.getBlockNum(); block++) {
341                        if(optLen[block]<1)
342                                continue;
343
344                        // set up storage for the current block
345                        List<List<Integer>> currBlock = new ArrayList<List<Integer>>(2);
346                        currBlock.add( new ArrayList<Integer>());
347                        currBlock.add( new ArrayList<Integer>());
348                        blocks.add(currBlock);
349
350                        // pos = 0 case
351                        currBlock.get(0).add( optAln[block][0][0] );
352                        currBlock.get(1).add( (optAln[block][1][0]+cp ) % ca2len);
353
354                        for(int pos = 1; pos < optLen[block]; pos++) {
355                                //check if we need to start a new block
356                                //this happens when the new alignment crosses the protein terminus
357                                if( optAln[block][1][pos-1]+cp<ca2len &&
358                                                optAln[block][1][pos]+cp >= ca2len) {
359                                        currBlock = new ArrayList<List<Integer>>(2);
360                                        currBlock.add( new ArrayList<Integer>());
361                                        currBlock.add( new ArrayList<Integer>());
362                                        blocks.add(currBlock);
363                                }
364                                currBlock.get(0).add( optAln[block][0][pos] );
365                                currBlock.get(1).add( (optAln[block][1][pos]+cp ) % ca2len);
366                        }
367                }
368
369                // save permuted blocks to afpChain
370                assignOptAln(afpChain,blocks);
371        }
372
373        /**
374         * Sometimes it's convenient to store an alignment using java collections,
375         * where <tt>blocks.get(blockNum).get(0).get(pos)</tt> specifies the aligned
376         * residue at position <i>pos</i> of block <i>blockNum</i> of the first
377         * protein.
378         *
379         * This method takes such a collection and stores it into the afpChain's
380         * {@link AFPChain#setOptAln(int[][][]) optAln}, setting the associated
381         * length variables as well.
382         *
383         * @param afpChain
384         * @param blocks
385         */
386        private static void assignOptAln(AFPChain afpChain, List<List<List<Integer>>> blocks)
387        {
388
389                int[][][] optAln = new int[blocks.size()][][];
390                int[] optLen = new int[blocks.size()];
391                int optLength = 0;
392                int numBlocks = blocks.size();
393
394                for(int block = 0; block < numBlocks; block++) {
395                        // block should be 2xN rectangular
396                        assert(blocks.get(block).size() == 2);
397                        assert( blocks.get(block).get(0).size() == blocks.get(block).get(1).size());
398
399                        optLen[block] = blocks.get(block).get(0).size();
400                        optLength+=optLen[block];
401
402                        optAln[block] = new int[][] {
403                                        new int[optLen[block]],
404                                        new int[optLen[block]]
405                        };
406                        for(int pos = 0; pos < optLen[block]; pos++) {
407                                optAln[block][0][pos] = blocks.get(block).get(0).get(pos);
408                                optAln[block][1][pos] = blocks.get(block).get(1).get(pos);
409                        }
410                }
411
412
413                afpChain.setBlockNum(numBlocks);
414                afpChain.setOptAln(optAln);
415                afpChain.setOptLen(optLen);
416                afpChain.setOptLength(optLength);
417
418                // TODO I don't know what these do. Should they be set?
419                //afpChain.setBlockSize(blockSize);
420                //afpChain.setBlockResList(blockResList);
421                //afpChain.setChainLen(chainLen);
422
423        }
424
425        /**
426         * Finds the optimal alignment between two proteins allowing for a circular
427         * permutation (CP).
428         *
429         * The precise algorithm is controlled by the
430         * {@link OptimalCECPParameters parameters}. If the parameter
431         * {@link OptimalCECPParameters#isTryAllCPs() tryAllCPs} is true, all possible
432         * CP sites are tried and the optimal site is returned. Otherwise, the
433         * {@link OptimalCECPParameters#getCPPoint() cpPoint} parameter is used to
434         * determine the CP point, greatly reducing the computation required.
435         *
436         * @param ca1 CA atoms of the first protein
437         * @param ca2 CA atoms of the second protein
438         * @param param {@link CeParameters} object
439         * @return The best-scoring alignment
440         * @throws StructureException
441         *
442         * @see #alignOptimal(Atom[], Atom[], Object, AFPChain[])
443         */
444        @Override
445        public AFPChain align(Atom[] ca1, Atom[] ca2, Object param)
446        throws StructureException
447        {
448                if(params.isTryAllCPs()) {
449                        return alignOptimal(ca1,ca2,param,null);
450                } else {
451                        int cpPoint = params.getCPPoint();
452                        return alignPermuted(ca1, ca2, param, cpPoint);
453                }
454        }
455
456        /**
457         * Finds the optimal alignment between two proteins allowing for a circular
458         * permutation (CP).
459         *
460         * This algorithm performs a CE alignment for each possible CP site. This is
461         * quite slow. Use {@link #alignHeuristic(Atom[], Atom[], Object)} for a
462         * faster algorithm.
463         *
464         * @param ca1 CA atoms of the first protein
465         * @param ca2 CA atoms of the second protein
466         * @param param {@link CeParameters} object
467         * @param alignments If not null, should be an empty array of the same length as
468         *  ca2. This will be filled with the alignments from permuting ca2 by
469         *  0 to n-1 residues.
470         * @return The best-scoring alignment
471         * @throws StructureException
472         */
473        public AFPChain alignOptimal(Atom[] ca1, Atom[] ca2, Object param, AFPChain[] alignments)
474        throws StructureException
475        {
476                long startTime = System.currentTimeMillis();
477
478                if(alignments != null && alignments.length != ca2.length) {
479                        throw new IllegalArgumentException("scores param should have same length as ca2");
480                }
481
482                AFPChain unaligned = super.align(ca1, ca2, param);
483                AFPChain bestAlignment = unaligned;
484
485                if(debug) {
486                        // print progress bar header
487                        System.out.print("|");
488                        for(int cp=1;cp<ca2.length-1;cp++) {
489                                System.out.print("=");
490                        }
491                        System.out.println("|");
492                        System.out.print(".");
493                }
494
495                if(alignments != null) {
496                        alignments[0] = unaligned;
497                }
498
499                for(int cp=1;cp<ca2.length;cp++) {
500                        // clone ca2 to prevent side effects from propegating
501                        Atom[] ca2p = StructureTools.cloneAtomArray(ca2);
502
503                        //permute one each time. Alters ca2p as a side effect
504                        AFPChain currentAlignment = alignPermuted(ca1,ca2p,param,cp);
505
506                        // increment progress bar
507                        if(debug) System.out.print(".");
508
509                        // fix up names, since cloning ca2 wipes it
510
511                        if (ca2.length!=0 && ca2[0].getGroup().getChain()!=null && ca2[0].getGroup().getChain().getStructure()!=null) {
512                                currentAlignment.setName2(ca2[0].getGroup().getChain().getStructure().getName()+" CP="+cp);
513                        }
514
515                        double currentScore = currentAlignment.getAlignScore();
516
517                        if(alignments != null) {
518                                alignments[cp] = currentAlignment;
519                        }
520
521                        if(currentScore>bestAlignment.getAlignScore()) {
522                                bestAlignment = currentAlignment;
523                        }
524                }
525                if(debug) {
526                        long elapsedTime = System.currentTimeMillis()-startTime;
527                        System.out.println();
528                        System.out.format("%d alignments took %.4f s (%.1f ms avg)\n",
529                                        ca2.length, elapsedTime/1000., (double)elapsedTime/ca2.length);
530                }
531
532
533                return bestAlignment;
534
535        }
536
537
538
539
540        public static void main(String[] args){
541                try {
542                        String name1, name2;
543
544                        //int[] cps= new int[] {};
545
546                        //Concanavalin
547                        //name1 = "2pel.A";
548                        //name2 = "3cna";
549                        //cps = new int[] {122,0,3};
550
551                        //small case
552                        name1 = "d1qdmA1";
553                        //name1 = "1QDM.A";
554                        name2 = "d1nklA_";
555                        /*cps = new int[] {
556                                        //41, // CECP optimum
557                                        19,59, // unpermuted local minima in TM-score
558                                        //39, // TM-score optimum
559                                        0,
560                        };*/
561
562                        //1itb selfsymmetry
563                        //name1 = "1ITB.A";
564                        //name2 = "1ITB.A";
565                        //cps = new int[] {92};
566
567                        name1=name2 = "2LSQ";
568
569                        OptimalCECPMain ce = new OptimalCECPMain();
570                        CeParameters params = (CeParameters) ce.getParameters();
571                        ce.setParameters(params);
572
573                        AtomCache cache = new AtomCache();
574
575                        Atom[] ca1 = cache.getAtoms(name1);
576                        Atom[] ca2 = cache.getAtoms(name2);
577
578                        AFPChain afpChain;
579
580
581
582                        // find optimal solution
583                        AFPChain[] alignments = new AFPChain[ca2.length];
584                        afpChain = ce.alignOptimal(ca1, ca2, params, alignments);
585                        System.out.format("Optimal Score: %.2f\n", afpChain.getAlignScore());
586
587                        System.out.println("Pos\tScore\tTMScore\tLen\tRMSD\tBlocks");
588                        for(int i = 0; i< alignments.length; i++) {
589                                double tm = AFPChainScorer.getTMScore(alignments[i], ca1, ca2);
590                                System.out.format("%d\t%.2f\t%.2f\t%d\t%.2f\t%d\n",
591                                                i,
592                                                alignments[i].getAlignScore(),
593                                                tm,
594                                                alignments[i].getOptLength(),
595                                                alignments[i].getTotalRmsdOpt(),
596                                                alignments[i].getBlockNum()
597                                );
598                        }
599
600                        //displayAlignment(afpChain,ca1,ca2);
601
602                        // permuted alignment
603                        //for(int cp : cps) {
604                                // new copy of ca2, since alignPermuted has side effects
605                                //Atom[] ca2clone = cache.getAtoms(name2);
606                                //afpChain = ce.alignPermuted(ca1, ca2clone, params, cp);
607                                //displayAlignment(afpChain, ca1, ca2);
608
609                                //displayAlignment(alignments[cp],ca1,ca2);
610                        //}
611
612                        // CECP alignment
613                        CeCPMain cecp = new CeCPMain();
614                        afpChain = cecp.align(ca1, ca2);
615                        displayAlignment(afpChain,ca1,ca2);
616
617                        System.out.println("Inspect additional alignments?");
618                        Scanner scanner = new Scanner(System.in);
619                        System.out.print("CP location [0,"+ca2.length+"): ");
620                        while(scanner.hasNext()) {
621                                if(scanner.hasNextInt()) {
622                                        int cp = scanner.nextInt();
623
624                                        if(0<=cp && cp<ca2.length) {
625                                                alignments[cp].setName1(name1);
626                                                alignments[cp].setName2(name2+"@"+cp);
627                                                displayAlignment(alignments[cp],ca1,ca2);
628                                                Thread.sleep(1000);
629                                        }
630                                } else {
631                                        //String next = scanner.nextLine();
632                                }
633                                System.out.print("CP location [0,"+ca2.length+"): ");
634                        }
635                        scanner.close();
636
637                } catch (Exception e) {
638                        e.printStackTrace();
639                }
640        }
641
642        /**
643         * Try showing a the afpChain in a GUI.
644         *
645         * <p>requires additional dependencies biojava-structure-gui and JmolApplet
646         *
647         * @param afpChain
648         * @param ca1
649         * @param ca2
650         * @throws ClassNotFoundException
651         * @throws NoSuchMethodException
652         * @throws InvocationTargetException
653         * @throws IllegalAccessException
654         * @throws StructureException
655         */
656        private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException {
657                Atom[] ca1clone = StructureTools.cloneAtomArray(ca1);
658                Atom[] ca2clone = StructureTools.cloneAtomArray(ca2);
659                if (! GuiWrapper.isGuiModuleInstalled()) {
660                        System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!");
661                        // display alignment in console
662                        System.out.println(afpChain.toCE(ca1clone, ca2clone));
663                } else {
664                        Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone);
665                        GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol);
666                }
667        }
668}