001/* This class is based on the original FATCAT implementation by
002 * <pre>
003 * Yuzhen Ye & Adam Godzik (2003)
004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists.
005 * Bioinformatics vol.19 suppl. 2. ii246-ii255.
006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198
007 * </pre>
008 *
009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code.
010 *
011 * This code may be freely distributed and modified under the
012 * terms of the GNU Lesser General Public Licence.  This should
013 * be distributed with the code.  If you do not have a copy,
014 * see:
015 *
016 *      http://www.gnu.org/copyleft/lesser.html
017 *
018 * Copyright for this code is held jointly by the individual
019 * authors.  These should be listed in @author doc comments.
020 *
021 *
022 * Created on Jun 17, 2009
023 * Created by Andreas Prlic - RCSB PDB
024 *
025 */
026
027package org.biojava.nbio.structure.align.fatcat.calc;
028
029
030import javax.vecmath.Matrix4d;
031
032import org.biojava.nbio.structure.*;
033import org.biojava.nbio.structure.geometry.SuperPositions;
034
035
036
037public class StructureAlignmentOptimizer
038{
039
040        //private static final boolean showAlig = false;
041
042        int pro1Len;
043        int pro2Len;
044        int maxLen;
045        Atom[] cod1;
046        Atom[] cod2;
047
048        int[][] equSet;
049        int equLen;
050        int equLen0;
051        double[][]sij;
052
053        int maxKeepStep;
054        int keepStep;
055
056        double  Dc;   //the criteria for structural equivalent residues, eg. 3.0 (CE), 6.0(ProSup)
057        double  rmsdCut;//the criteria for stoping optimization
058        double  increase;
059        double  stopLenPer;
060        double  stopRmsdPer;
061        double  stopRmsd;
062
063        double  gapIni;
064        double  gapExt;
065
066        double rmsd;
067
068        private static final boolean debug = FatCatAligner.debug;
069
070        /**
071         * optimize the structural alignment by update the equivalent residues
072         * and then run dynamic programming
073         * input: len1 the length of structure 1; c1: the structure information of 1
074         *        len2 the length of structure 2; c2: the structure information of 2
075         *        iniLen and iniSet is the length and list of initial equivalent residues
076         */
077
078        public StructureAlignmentOptimizer(int b1, int end1, Atom[] c1, int b2, int end2, Atom[] c2,
079                        int iniLen, int[][] iniSet) throws StructureException{
080
081                //System.err.println("optimizing range:" + b1 + "-" + end1 + "("+ (end1-b1) + ") b2:  " + b2 + "-" + end2+ "("+ (end2-b2) + ") iniLen " + iniLen);
082                //System.out.println("ca1: " + c1.length + " ca2: " + c2.length);
083
084                int len1 = end1-b1;
085                int len2 = end2-b2;
086
087                //System.err.println("len1: " + len1 + " len2:" + len2);
088
089                pro1Len = len1;
090                pro2Len = len2;
091
092                cod1 = new Atom[len1];
093                cod2 = new Atom[len2];
094
095                for(int i = 0; i < len1; i ++)      {
096                        Atom a = c1[i+b1];
097                        //cod1[i] = (Atom)a.clone();
098                        Group parent = (Group)a.getGroup().clone();
099                        //cod1[i].setParent(parent);
100                        cod1[i] = parent.getAtom(a.getName());
101                        //cod1[i] = c1[i];
102                }
103                for(int i = 0; i < len2; i ++)      {
104                        Atom a = c2[i+b2];
105                        //cod2[i]= (Atom)a.clone();
106                        Group parent = (Group)a.getGroup().clone();
107                        //cod2[i].setParent(parent);
108                        cod2[i] = parent.getAtom(a.getName());
109                        //cod2[i] = c2[i];
110                }
111
112
113                //initial equivalent sets
114                maxLen = (len1 < len2)?len1:len2;
115
116                equSet = new int[2][maxLen];
117                for(int i = 0; i < iniLen; i ++)    {
118
119                        equSet[0][i] = iniSet[0][i];
120                        equSet[1][i] = iniSet[1][i];
121                        if(iniSet[0][i] > len1 || iniSet[1][i] > len2)  {
122                                throw new RuntimeException(String.format("StructureAlignmentOptimizer: focus exceeds the protein 1 or 2 length!"));
123                        }
124                }
125
126                equLen    = iniLen;
127                equLen0   = equLen;
128
129                setParameters();
130
131                sij =  new double[pro1Len][pro2Len];
132
133//      if (showAlig)
134//         showCurrentAlignment(iniLen, equSet, "initial alignment");
135
136        }
137
138//   private void showCurrentAlignment(int len, int[][]set, String title){
139//      BiojavaJmol jmol = new BiojavaJmol();
140//      jmol.setTitle(title);
141//
142//      Chain c1 = new ChainImpl();
143//      c1.setName("A");
144//
145//      Chain c2 = new ChainImpl();
146//      c2.setName("B");
147//      for(int i = 0; i < len; i ++)    {
148//         Atom a1 = cod1[set[0][i]];
149//         Atom a2 = cod2[set[1][i]];
150//
151//         Group g1 = a1.getParent();
152//         Group g2 = a2.getParent();
153//
154//         try {
155//            Group n1 = new AminoAcidImpl();
156//            n1.setPDBCode(g1.getPDBCode());
157//            n1.setPDBName(g1.getPDBName());
158//            n1.addAtom(a1);
159//
160//            Group n2 = new AminoAcidImpl();
161//            n2.setPDBCode(g2.getPDBCode());
162//            n2.setPDBName(g2.getPDBName());
163//            n2.addAtom(a2);
164//
165//
166//            c1.addGroup(n1);
167//            c2.addGroup(n2);
168//         } catch (Exception e){
169//            //
170//         }
171//
172//      }
173//
174//      Structure s = new StructureImpl();
175//      s.setPDBCode(title);
176//      List<Chain> model1 = new ArrayList<Chain>();
177//      model1.add(c1);
178//      List<Chain> model2 = new ArrayList<Chain>();
179//      model2.add(c2);
180//      s.addModel(model1);
181//      s.addModel(model2);
182//      s.setNmr(true);
183//      jmol.setStructure(s);
184//      jmol.evalString("select *; backbone 0.4; wireframe off; spacefill off; " +
185//      "select not protein and not solvent; spacefill on;");
186//      jmol.evalString("select */1 ; color red; model 1; ");
187//
188//      // now show both models again.
189//      jmol.evalString("model 0;");
190//
191//   }
192
193
194        /** run the optimization
195         *
196         * @param maxi maximum nr. of iterations
197         * @throws StructureException
198         */
199        public void runOptimization(int maxi) throws StructureException{
200                superimposeBySet();
201                if ( debug)
202                        System.err.println("   initial rmsd " + rmsd);
203
204//      if (showAlig)
205//         showCurrentAlignment(equLen, equSet, "after initial superimposeBySet Len:" +equLen + " rmsd:" +rmsd);
206
207                maxKeepStep = 4;
208                keepStep = 0;
209
210                optimize(maxi);
211        }
212
213        /**
214         * refer CE, similarity = Dc - dij, Dc is increased by 0.5 each cycle,
215         * optimization continues until either
216         * i)alignment length is less than 95% of alignment length before optimization
217         * ii)rmsd is less than 110% of rmsd at the cycle when condition i) was first satisfied
218         */
219        private void setParameters()
220        {
221                Dc = 3.0; //Dc = 2.0
222                increase = 0.5;
223                stopLenPer = 0.95;
224                stopRmsdPer = 1.1;
225                stopRmsd = -1.0;
226                rmsdCut = 3.0;
227                gapIni = 5.0;
228                gapExt = 0.5;
229        }
230
231
232        /**
233         * superimpose two structures according to the equivalent residues
234         */
235        private void superimposeBySet ()
236        throws StructureException
237        {
238
239                //extract the coordinations of equivalent residues
240                Atom[] tmp1 = new Atom[equLen];
241                Atom[] tmp2 = new Atom[equLen];
242                int     i,  r1, r2;
243
244
245                for(i = 0; i < equLen; i ++)    {
246                        r1 = equSet[0][i];
247                        r2 = equSet[1][i];
248
249                        tmp1[i] =       cod1[ r1 ];
250                        tmp2[i] = (Atom)cod2[ r2 ].clone(); // have to be cloned!
251                        //tmp2[i] = cod2[ r2 ];
252
253
254                        /*try {
255                                System.out.println("before superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]]));
256                        } catch (Exception e){
257                                e.printStackTrace();
258                        }*/
259                }
260
261                //superimpose the equivalent residues
262                Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(tmp1), 
263                                Calc.atomsToPoints(tmp2));
264
265                Calc.transform(tmp2, trans);
266
267                // weird, why does it take the RMSD before the rotation?
268                // the rmsd is only for the subset contained in the tmp arrays.
269                rmsd = Calc.rmsd(tmp1,tmp2);
270
271                //System.err.println("rmsd after superimpose by set: " + rmsd);
272
273                //transform structure 2 according to the superimposition of the equivalent residues
274                Calc.transform(cod2, trans);
275
276//      for(i = 0; i < equLen; i ++)    {
277//         try {
278//            System.err.println("after superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]]));
279//         } catch (Exception e){
280//            e.printStackTrace();
281//         }
282//      }
283                
284        }
285
286
287        private void optimize(int maxi) throws StructureException
288        {
289                long optStart = System.currentTimeMillis();
290                if ( debug)
291                        System.out.println("Optimizing up to " + maxi + " iterations.. ");
292                boolean ifstop = true;;
293                int     i, alnLen;
294                alnLen = 0;
295
296                int[][]     alnList =  new int[2][maxLen];
297                for(i = 0; i < maxi; i ++)      {
298
299                        //if ( debug){
300                        //   System.out.println("optimize iteration: " + i);
301                        //}
302
303                        calMatrix();
304
305                        FCAlignHelper aln = new FCAlignHelper(sij,pro1Len,pro2Len,gapIni, gapExt);
306
307                        //ALIGN0 *aln = new ALIGN0(sij, pro1Len, pro2Len, gapIni, gapExt);
308                        alnLen = aln.getAlignPos(alnList);
309                        if(alnLen < 3)  ifstop = true; //very rare, mark by Y.Y on 5/1/03
310                        else    ifstop = defineEquPos(alnLen, alnList);
311
312                        if(ifstop)      break;
313                        Dc += increase;
314
315//         if (showAlig)
316//            if ( i == 0 )
317//               showCurrentAlignment(alnLen, alnList,  "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd);
318                }
319
320                if  (debug){
321                        if(i < maxi)    {
322                                System.out.println(String.format("   optimize converged at %d iterations\n", i));
323                        }
324                        else    System.out.println("   optimize stop without convergence\n");
325                        System.out.println("optimization time: " + (System.currentTimeMillis() - optStart) + " ms.");
326                }
327
328//      if (showAlig)
329//         showCurrentAlignment(alnLen, alnList,  "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd);
330        }
331
332        //--------------------------------------------------------------------------------------------------------
333        //the definition of matrix between residues:
334        //              Sij = Dc^2 - Dij^2 if Dij <= Dc
335        //                    0            else
336        //--------------------------------------------------------------------------------------------------------
337        private void calMatrix() throws StructureException
338        {
339                int     i, j;
340                double  dis;
341                for(i = 0; i < pro1Len; i ++)   {
342                        for(j = 0; j < pro2Len; j ++)   {
343                                dis = Calc.getDistance(cod1[i],cod2[j]);
344
345                                if(dis < Dc) {
346                                        sij[i][j] = Dc - dis;
347                                }
348                                else  {
349                                        sij[i][j] = 0;
350                                }
351                        }
352                }
353        }
354
355        /**
356         * the equivalent residues: residues where Dij &lt;= Dc and i,j is an aligned pair
357         * use the previous superimposing
358         */
359        private boolean defineEquPos(int alnLen, int[][] alnList)
360        throws StructureException
361        {
362                int     i, r1, r2;
363                int     equLenOld = equLen;
364                int[][]    equSetOld = new int[2][equLenOld];
365                for(i = 0; i < equLen; i ++)    {
366                        equSetOld[0][i] = equSet[0][i];
367                        equSetOld[1][i] = equSet[1][i];
368                }
369                double  rmsdOld = rmsd;
370                double  dis;
371                equLen = 0;
372                //if (debug)
373                //   System.out.println(String.format(" OPT: Dc %f, equLenOld %d, rmsdOld %f, alnLen %d", Dc, equLenOld, rmsdOld, alnLen));
374                for(i = 0; i < alnLen; i ++)    {
375                        r1 = alnList[0][i];
376                        r2 = alnList[1][i];
377                        dis = Calc.getDistance(cod1[r1],cod2[r2]);
378                        if(dis <= Dc)   {
379                                //System.out.println(r1 + "-"  + r2 + " d:" + dis);
380                                equSet[0][equLen] = r1;
381                                equSet[1][equLen] = r2;
382                                equLen ++;
383                        }
384                }
385
386                superimposeBySet();
387
388                //if (debug)
389                //   System.out.println(String.format(" OPT: new equLen %d rmsd %f", equLen, rmsd));
390
391                boolean     ifstop = false;
392
393//      if (debug) {
394//         System.out.print(" OPT: rmsd diff: " + Math.abs(rmsd - rmsdOld) + " equLens: " + equLenOld + ":"+ equLen);
395//         if ( Math.abs(rmsd - rmsdOld) < 1e-10)
396//            System.out.println(" NO DIFF!");
397//         else
398//            System.out.println(" DIFF!");
399//      }
400
401                if((Math.abs(rmsd - rmsdOld) < 1e-10 ) && (equLenOld == equLen)) keepStep ++;
402                else    keepStep = 0;
403
404                if(keepStep > maxKeepStep)      {
405                        ifstop = true; //converge
406                } //allowing up to maxKeepStep instead of 1 is essential for some special cases
407                else if(stopRmsd < 0) {
408                        ifstop = false; //condition 1, continue
409                }
410                else if((rmsd <= stopRmsd * stopRmsdPer) || (rmsd < rmsdCut)) {
411                        ifstop = false; //condition 2, continue
412                } //rmsdCut is adopted or not? to be tuned
413                else    {
414                        ifstop = true; //get worse
415                }
416
417
418                if((stopRmsd <0) && (equLen >= stopLenPer * equLen0))    {
419                        //System.err.println("stopRmsd: " + stopRmsd + " setting to rmsd:" + rmsd);
420                        stopRmsd = rmsd; //condition 1
421                }
422
423                return ifstop;
424        }
425
426
427        public double optimizeResult(int[] optLen, int optLenpos, int[][] list)
428        {
429                optLen[optLenpos] = equLen;
430
431                for(int i = 0; i < equLen; i ++)        {
432                        list[0][i] = equSet[0][i];
433                        list[1][i] = equSet[1][i];
434                }
435                return rmsd;
436        }
437
438}