001/* This class is based on the original FATCAT implementation by
002 * <pre>
003 * Yuzhen Ye & Adam Godzik (2003)
004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists.
005 * Bioinformatics vol.19 suppl. 2. ii246-ii255.
006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198
007 * </pre>
008 *
009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code.
010 *
011 * This code may be freely distributed and modified under the
012 * terms of the GNU Lesser General Public Licence.  This should
013 * be distributed with the code.  If you do not have a copy,
014 * see:
015 *
016 *      http://www.gnu.org/copyleft/lesser.html
017 *
018 * Copyright for this code is held jointly by the individual
019 * authors.  These should be listed in @author doc comments.
020 *
021 *
022 * Created on Jun 17, 2009
023 * Created by Andreas Prlic - RCSB PDB
024 *
025 */
026
027package org.biojava.nbio.structure.align.fatcat.calc;
028
029
030import org.biojava.nbio.structure.*;
031import org.biojava.nbio.structure.jama.Matrix;
032
033
034
035public class StructureAlignmentOptimizer
036{
037
038        //private static final boolean showAlig = false;
039
040        int pro1Len;
041        int pro2Len;
042        int maxLen;
043        Atom[] cod1;
044        Atom[] cod2;
045
046        int[][] equSet;
047        int equLen;
048        int equLen0;
049        double[][]sij;
050
051        int maxKeepStep;
052        int keepStep;
053
054        double  Dc;   //the criteria for structural equivalent residues, eg. 3.0 (CE), 6.0(ProSup)
055        double  rmsdCut;//the criteria for stoping optimization
056        double  increase;
057        double  stopLenPer;
058        double  stopRmsdPer;
059        double  stopRmsd;
060
061        double  gapIni;
062        double  gapExt;
063
064        double rmsd;
065
066        private static final boolean debug = FatCatAligner.debug;
067
068        /**
069         * optimize the structural alignment by update the equivalent residues
070         * and then run dynamic programming
071         * input: len1 the length of structure 1; c1: the structure information of 1
072         *        len2 the length of structure 2; c2: the structure information of 2
073         *        iniLen and iniSet is the length and list of initial equivalent residues
074         */
075
076        public StructureAlignmentOptimizer(int b1, int end1, Atom[] c1, int b2, int end2, Atom[] c2,
077                        int iniLen, int[][] iniSet) throws StructureException{
078
079                //System.err.println("optimizing range:" + b1 + "-" + end1 + "("+ (end1-b1) + ") b2:  " + b2 + "-" + end2+ "("+ (end2-b2) + ") iniLen " + iniLen);
080                //System.out.println("ca1: " + c1.length + " ca2: " + c2.length);
081
082                int len1 = end1-b1;
083                int len2 = end2-b2;
084
085                //System.err.println("len1: " + len1 + " len2:" + len2);
086
087                pro1Len = len1;
088                pro2Len = len2;
089
090                cod1 = new Atom[len1];
091                cod2 = new Atom[len2];
092
093                for(int i = 0; i < len1; i ++)      {
094                        Atom a = c1[i+b1];
095                        //cod1[i] = (Atom)a.clone();
096                        Group parent = (Group)a.getGroup().clone();
097                        //cod1[i].setParent(parent);
098                        cod1[i] = parent.getAtom(a.getName());
099                        //cod1[i] = c1[i];
100                }
101                for(int i = 0; i < len2; i ++)      {
102                        Atom a = c2[i+b2];
103                        //cod2[i]= (Atom)a.clone();
104                        Group parent = (Group)a.getGroup().clone();
105                        //cod2[i].setParent(parent);
106                        cod2[i] = parent.getAtom(a.getName());
107                        //cod2[i] = c2[i];
108                }
109
110
111                //initial equivalent sets
112                maxLen = (len1 < len2)?len1:len2;
113
114                equSet = new int[2][maxLen];
115                for(int i = 0; i < iniLen; i ++)    {
116
117                        equSet[0][i] = iniSet[0][i];
118                        equSet[1][i] = iniSet[1][i];
119                        if(iniSet[0][i] > len1 || iniSet[1][i] > len2)  {
120                                throw new RuntimeException(String.format("StructureAlignmentOptimizer: focus exceeds the protein 1 or 2 length!"));
121                        }
122                }
123
124                equLen    = iniLen;
125                equLen0   = equLen;
126
127                setParameters();
128
129                sij =  new double[pro1Len][pro2Len];
130
131//      if (showAlig)
132//         showCurrentAlignment(iniLen, equSet, "initial alignment");
133
134        }
135
136//   private void showCurrentAlignment(int len, int[][]set, String title){
137//      BiojavaJmol jmol = new BiojavaJmol();
138//      jmol.setTitle(title);
139//
140//      Chain c1 = new ChainImpl();
141//      c1.setName("A");
142//
143//      Chain c2 = new ChainImpl();
144//      c2.setName("B");
145//      for(int i = 0; i < len; i ++)    {
146//         Atom a1 = cod1[set[0][i]];
147//         Atom a2 = cod2[set[1][i]];
148//
149//         Group g1 = a1.getParent();
150//         Group g2 = a2.getParent();
151//
152//         try {
153//            Group n1 = new AminoAcidImpl();
154//            n1.setPDBCode(g1.getPDBCode());
155//            n1.setPDBName(g1.getPDBName());
156//            n1.addAtom(a1);
157//
158//            Group n2 = new AminoAcidImpl();
159//            n2.setPDBCode(g2.getPDBCode());
160//            n2.setPDBName(g2.getPDBName());
161//            n2.addAtom(a2);
162//
163//
164//            c1.addGroup(n1);
165//            c2.addGroup(n2);
166//         } catch (Exception e){
167//            //
168//         }
169//
170//      }
171//
172//      Structure s = new StructureImpl();
173//      s.setPDBCode(title);
174//      List<Chain> model1 = new ArrayList<Chain>();
175//      model1.add(c1);
176//      List<Chain> model2 = new ArrayList<Chain>();
177//      model2.add(c2);
178//      s.addModel(model1);
179//      s.addModel(model2);
180//      s.setNmr(true);
181//      jmol.setStructure(s);
182//      jmol.evalString("select *; backbone 0.4; wireframe off; spacefill off; " +
183//      "select not protein and not solvent; spacefill on;");
184//      jmol.evalString("select */1 ; color red; model 1; ");
185//
186//      // now show both models again.
187//      jmol.evalString("model 0;");
188//
189//   }
190
191
192        /** run the optimization
193         *
194         * @param maxi maximum nr. of iterations
195         * @throws StructureException
196         */
197        public void runOptimization(int maxi) throws StructureException{
198                superimposeBySet();
199                if ( debug)
200                        System.err.println("   initial rmsd " + rmsd);
201
202//      if (showAlig)
203//         showCurrentAlignment(equLen, equSet, "after initial superimposeBySet Len:" +equLen + " rmsd:" +rmsd);
204
205                maxKeepStep = 4;
206                keepStep = 0;
207
208                optimize(maxi);
209        }
210
211        /**
212         * refer CE, similarity = Dc - dij, Dc is increased by 0.5 each cycle,
213         * optimization continues until either
214         * i)alignment length is less than 95% of alignment length before optimization
215         * ii)rmsd is less than 110% of rmsd at the cycle when condition i) was first satisfied
216         */
217        private void setParameters()
218        {
219                Dc = 3.0; //Dc = 2.0
220                increase = 0.5;
221                stopLenPer = 0.95;
222                stopRmsdPer = 1.1;
223                stopRmsd = -1.0;
224                rmsdCut = 3.0;
225                gapIni = 5.0;
226                gapExt = 0.5;
227        }
228
229
230        /**
231         * superimpose two structures according to the equivalent residues
232         */
233        private void superimposeBySet ()
234        throws StructureException
235        {
236
237                //extract the coordinations of equivalent residues
238                Atom[] tmp1 = new Atom[equLen];
239                Atom[] tmp2 = new Atom[equLen];
240                int     i,  r1, r2;
241
242
243                for(i = 0; i < equLen; i ++)    {
244                        r1 = equSet[0][i];
245                        r2 = equSet[1][i];
246
247                        tmp1[i] =       cod1[ r1 ];
248                        tmp2[i] = (Atom)cod2[ r2 ].clone(); // have to be cloned!
249                        //tmp2[i] = cod2[ r2 ];
250
251
252                        /*try {
253                                System.out.println("before superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]]));
254                        } catch (Exception e){
255                                e.printStackTrace();
256                        }*/
257                }
258
259                //superimpose the equivalent residues
260                SVDSuperimposer svd = new SVDSuperimposer(tmp1, tmp2);
261
262                Matrix    m = svd.getRotation();
263                Atom      t = svd.getTranslation();
264
265                for (Atom a: tmp2) {
266
267                        Calc.rotate(a,m);
268                        Calc.shift(a,t);
269
270                }
271
272                // weird, why does it take the RMSD before the rotation?
273                // the rmsd is only for the subset contained in the tmp arrays.
274                rmsd = SVDSuperimposer.getRMS(tmp1,tmp2);
275
276                //System.err.println("rmsd after superimpose by set: " + rmsd);
277
278                //transform structure 2 according to the superimposition of the equivalent residues
279                for (i = 0 ; i< cod2.length; i++) {
280                        Atom a = cod2[i];
281                        Calc.rotate(a,m);
282                        Calc.shift(a,t);
283
284                }
285
286
287
288//      for(i = 0; i < equLen; i ++)    {
289//         try {
290//            System.err.println("after superimpos: " + equSet[0][i]+"-"+ equSet[1][i]+ " dist:" + Calc.getDistance(tmp1[i], cod2[equSet[1][i]]));
291//         } catch (Exception e){
292//            e.printStackTrace();
293//         }
294//      }
295
296
297
298        }
299
300
301        private void optimize(int maxi) throws StructureException
302        {
303                long optStart = System.currentTimeMillis();
304                if ( debug)
305                        System.out.println("Optimizing up to " + maxi + " iterations.. ");
306                boolean ifstop = true;;
307                int     i, alnLen;
308                alnLen = 0;
309
310                int[][]     alnList =  new int[2][maxLen];
311                for(i = 0; i < maxi; i ++)      {
312
313                        //if ( debug){
314                        //   System.out.println("optimize iteration: " + i);
315                        //}
316
317                        calMatrix();
318
319                        FCAlignHelper aln = new FCAlignHelper(sij,pro1Len,pro2Len,gapIni, gapExt);
320
321                        //ALIGN0 *aln = new ALIGN0(sij, pro1Len, pro2Len, gapIni, gapExt);
322                        alnLen = aln.getAlignPos(alnList);
323                        if(alnLen < 3)  ifstop = true; //very rare, mark by Y.Y on 5/1/03
324                        else    ifstop = defineEquPos(alnLen, alnList);
325
326                        if(ifstop)      break;
327                        Dc += increase;
328
329//         if (showAlig)
330//            if ( i == 0 )
331//               showCurrentAlignment(alnLen, alnList,  "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd);
332                }
333
334                if  (debug){
335                        if(i < maxi)    {
336                                System.out.println(String.format("   optimize converged at %d iterations\n", i));
337                        }
338                        else    System.out.println("   optimize stop without convergence\n");
339                        System.out.println("optimization time: " + (System.currentTimeMillis() - optStart) + " ms.");
340                }
341
342//      if (showAlig)
343//         showCurrentAlignment(alnLen, alnList,  "optimizing alignment - after " + i + " iterations alnLen:" + alnLen + " rmsd " + rmsd);
344        }
345
346        //--------------------------------------------------------------------------------------------------------
347        //the definition of matrix between residues:
348        //              Sij = Dc^2 - Dij^2 if Dij <= Dc
349        //                    0            else
350        //--------------------------------------------------------------------------------------------------------
351        private void calMatrix() throws StructureException
352        {
353                int     i, j;
354                double  dis;
355                for(i = 0; i < pro1Len; i ++)   {
356                        for(j = 0; j < pro2Len; j ++)   {
357                                dis = Calc.getDistance(cod1[i],cod2[j]);
358
359                                if(dis < Dc) {
360                                        sij[i][j] = Dc - dis;
361                                }
362                                else  {
363                                        sij[i][j] = 0;
364                                }
365                        }
366                }
367        }
368
369        /**
370         * the equivalent residues: residues where Dij &lt;= Dc and i,j is an aligned pair
371         * use the previous superimposing
372         */
373        private boolean defineEquPos(int alnLen, int[][] alnList)
374        throws StructureException
375        {
376                int     i, r1, r2;
377                int     equLenOld = equLen;
378                int[][]    equSetOld = new int[2][equLenOld];
379                for(i = 0; i < equLen; i ++)    {
380                        equSetOld[0][i] = equSet[0][i];
381                        equSetOld[1][i] = equSet[1][i];
382                }
383                double  rmsdOld = rmsd;
384                double  dis;
385                equLen = 0;
386                //if (debug)
387                //   System.out.println(String.format(" OPT: Dc %f, equLenOld %d, rmsdOld %f, alnLen %d", Dc, equLenOld, rmsdOld, alnLen));
388                for(i = 0; i < alnLen; i ++)    {
389                        r1 = alnList[0][i];
390                        r2 = alnList[1][i];
391                        dis = Calc.getDistance(cod1[r1],cod2[r2]);
392                        if(dis <= Dc)   {
393                                //System.out.println(r1 + "-"  + r2 + " d:" + dis);
394                                equSet[0][equLen] = r1;
395                                equSet[1][equLen] = r2;
396                                equLen ++;
397                        }
398                }
399
400                superimposeBySet();
401
402                //if (debug)
403                //   System.out.println(String.format(" OPT: new equLen %d rmsd %f", equLen, rmsd));
404
405                boolean     ifstop = false;
406
407//      if (debug) {
408//         System.out.print(" OPT: rmsd diff: " + Math.abs(rmsd - rmsdOld) + " equLens: " + equLenOld + ":"+ equLen);
409//         if ( Math.abs(rmsd - rmsdOld) < 1e-10)
410//            System.out.println(" NO DIFF!");
411//         else
412//            System.out.println(" DIFF!");
413//      }
414
415                if((Math.abs(rmsd - rmsdOld) < 1e-10 ) && (equLenOld == equLen)) keepStep ++;
416                else    keepStep = 0;
417
418                if(keepStep > maxKeepStep)      {
419                        ifstop = true; //converge
420                } //allowing up to maxKeepStep instead of 1 is essential for some special cases
421                else if(stopRmsd < 0) {
422                        ifstop = false; //condition 1, continue
423                }
424                else if((rmsd <= stopRmsd * stopRmsdPer) || (rmsd < rmsdCut)) {
425                        ifstop = false; //condition 2, continue
426                } //rmsdCut is adopted or not? to be tuned
427                else    {
428                        ifstop = true; //get worse
429                }
430
431
432                if((stopRmsd <0) && (equLen >= stopLenPer * equLen0))    {
433                        //System.err.println("stopRmsd: " + stopRmsd + " setting to rmsd:" + rmsd);
434                        stopRmsd = rmsd; //condition 1
435                }
436
437                return ifstop;
438        }
439
440
441        public double optimizeResult(int[] optLen, int optLenpos, int[][] list)
442        {
443                optLen[optLenpos] = equLen;
444
445                for(int i = 0; i < equLen; i ++)        {
446                        list[0][i] = equSet[0][i];
447                        list[1][i] = equSet[1][i];
448                }
449                return rmsd;
450        }
451
452}