001/* This class is based on the original FATCAT implementation by
002 * <pre>
003 * Yuzhen Ye & Adam Godzik (2003)
004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists.
005 * Bioinformatics vol.19 suppl. 2. ii246-ii255.
006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198
007 * </pre>
008 *
009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code.
010 *
011 * This code may be freely distributed and modified under the
012 * terms of the GNU Lesser General Public Licence.  This should
013 * be distributed with the code.  If you do not have a copy,
014 * see:
015 *
016 *      http://www.gnu.org/copyleft/lesser.html
017 *
018 * Copyright for this code is held jointly by the individual
019 * authors.  These should be listed in @author doc comments.
020 *
021 *
022 * Created on Jun 17, 2009
023 * Created by Andreas Prlic - RCSB PDB
024 *
025 */
026
027package org.biojava.nbio.structure.align.fatcat.calc;
028
029import org.biojava.nbio.structure.align.model.AFPChain;
030
031
032/* CDF: cumultive density function, SF: survial function for EVD distribution, Gumbel maximum function*/
033
034/* EVD (extreme value distribution)
035 * Gumbel maximum distribution (longer right tail) (Type II)
036 * pdf (probability density function) f(x) = 1/b exp(-(x-u)/b)) * exp(-exp(-(x-u)/b)))
037 * cdf (cumulative distribution function)  F(x) = exp(-exp(-x))
038 * sf (survial function)              S(x) = 1 - F(x) = 1 - exp(-exp(-x))
039 * Gumbel minimum distribution (longer left tail) (Type I)
040 * pdf (probability density function) f(x) = 1/b exp((x-u)/b)) * exp(-exp((x-u)/b)))
041 * cdf (cumulative distribution function)  F(x) = 1 - exp(-exp(x))
042 * sf (survial function)              S(x) = 1 - F(x) = exp(-exp(x))
043 * the FATCAT follows Gumbel maximum distribution
044 * The survival function is the probability that the variate takes a value greater than x.
045 */
046public class SigEva
047{
048
049
050        double  mu;
051        double  beta;
052        double  mu_a;
053        double  beta_a;
054        double  mu_b;
055        double  beta_b;
056
057        public SigEva(){
058                getPara(3,0);
059        }
060
061
062        public void getPara(int set, int len)
063        {
064                if(set == 1)    {
065                        mu_a = 0.2461;
066                        mu_b = 17.1530;
067                        beta_a = 0.1284;
068                        beta_b = 1.3756;
069                } //fitting, based on the old benchmark (the fold.list is redundant), old parameters
070                else if(set == 2)       {
071                        mu_a = 1.1137;
072                        mu_b = 6.5574;
073                        beta_a = 0.6448;
074                        beta_b = -12.2793;
075                } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, "NS3"
076                else if(set == 3)       {
077                        mu_a = 0.8440;
078                        mu_b = 30.2160;
079                        beta_a = 0.3525;
080                        beta_b = 18.6652;
081                } //score * sqrt(optLen / (rmsd * (twist + 1))), the new best parameters for flexible FATCAT, "FNS8"
082                else if(set == 4)       {
083                        mu_a = 0.4708;
084                        mu_b = 38.9863;
085                        beta_a = 0.2511;
086                        beta_b = 12.9228;
087                } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, sparse-sampling=2, modified scoring
088                else if(set == 5)       {
089                        mu_a = 1.3794;
090                        mu_b = -14.4778;
091                        beta_a = 0.7465;
092                        beta_b = -22.9452;
093                } //score * sqrt(optLen / RMSD), the new best parameters for flexible FATCAT, sparse-sampling=2, modified scoring 3
094                else if(set == 6)       {
095                        mu_a = 0.6036;
096                        mu_b = 35.4783;
097                        beta_a = 0.3136;
098                        beta_b = 13.3922;
099                } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, sparse-sampling=1
100                else if(set == 7)       {
101                        mu_a = 0.7183;
102                        mu_b = 27.9647;
103                        beta_a = 0.4688;
104                        beta_b = -1.3293;
105                } //score * sqrt(optLen / RMSD), the new best parameters for flexible FATCAT, sparse-sampling=1
106                else if(set == 8)       {
107                        mu_a = 0.4813;
108                        mu_b = 34.2051;
109                        beta_a = 0.2618;
110                        beta_b = 12.4581;
111                } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, sparse-sampling=3, modified scoring
112                else if(set == 9)       {
113                        mu_a = 0.6672;
114                        mu_b = 26.5767;
115                        beta_a = 0.4373;
116                        beta_b = -1.4017;
117                } //score * sqrt(optLen / RMSD), the new best parameters for flexible FATCAT, sparse-sampling=3, modified scoring 2
118                else    {
119                        System.err.println("no corresponding parameter set found!");
120                }
121
122                mu = beta = .0;
123                calMu(len);
124                calBeta(len);
125        }
126
127
128        private void calMu(int len)
129        {
130                mu = mu_a * len + mu_b;
131                //printf("mu = %.4f\n", mu);
132        }
133
134        private void calBeta(int len)
135        {
136                beta = beta_a * len + beta_b;
137                //printf("beta = %.4f\n", beta);
138        }
139
140
141        private int aveLen(int len1, int len2)
142        {
143                //int   len = (int(sqrt(len1 * len2)));
144                //int   len = len1 < len2?len1:len2; //use the minimum length, bad discriment
145                int     len = (int)(0.5 * (len1 + len2));
146                return len;
147        }
148
149        public double calSigAll(FatCatParameters params, AFPChain afpChain){
150
151                int twist = params.getMaxTra();
152                int sparse = params.getSparse();
153                int len1 = afpChain.getCa1Length();
154                int len2 = afpChain.getCa2Length();
155                double score = afpChain.getAlignScore();
156                double rmsd = afpChain.getTotalRmsdOpt();
157                int optLen = afpChain.getOptLength();
158                int r = afpChain.getBlockNum() -1;
159
160                return calSigAll(twist,sparse,len1, len2,score,rmsd,optLen,r);
161
162        }
163
164        private double calSigAll(int twist, int sparse, int len1, int len2, double score, double rmsd, int optLen, int r)
165        {
166                int     len = aveLen(len1, len2);
167                if(sparse == 2) { //use sparse sampling = 2
168                        if(twist == 0)  getPara(4, len); //rigid-FATCAT
169                        else    getPara(5, len);        //flexible-FATCAT
170                }
171                else if(sparse == 3)    { //sparse sampling = 3
172                        if(twist == 0)  getPara(8, len); //rigid-FATCAT
173                        else    getPara(8, len); //flexible-FATCAT
174                }
175                else if(sparse == 1)    { //sparse sampling = 1
176                        if(twist == 0)  getPara(6, len); //rigid-FATCAT
177                        else    getPara(7, len); //flexible-FATCAT
178                }
179                else    { //no sparse sampling or the corresponding sparse parameters are not fitted
180                        if(twist == 0)  getPara(2, len); //rigid-FATCAT
181                        else    getPara(3, len);        //flexible-FATCAT
182                }
183
184                double  mods = normScore(score, rmsd, optLen, r);
185
186                double  t = (mods - mu) / beta;
187
188                double  sf = sF(t);
189                return sf;
190        }
191
192        private double sF(double t){
193                return (1- cDF(t));
194        }
195
196        private double cDF(double t){
197                return (Math.exp(-Math.exp(-t)));
198        }
199
200        //the chaining score is normalized by rmsd, twist and optimal alignment length
201        private double  normScore(double score, double rmsd, int optLen, int r)
202        {
203                //double        score1 = modScore(score, r);
204                double  score1 = score;
205                if(r > 0)       score1 /= Math.sqrt(r + 1);
206                //it is tested that flexible score is more linear relevant to 1/r2 than 1/r
207                if(rmsd < 0.5)  score1 *= Math.sqrt((optLen) / 0.5);
208                else    score1 *= Math.sqrt((optLen) / rmsd);
209                return score1;
210        }
211
212
213        public double calNS(FatCatParameters params, AFPChain afpChain){
214                int len1 = afpChain.getCa1Length();
215                int len2 = afpChain.getCa2Length();
216                double score =afpChain.getAlignScore();
217                double rmsd = afpChain.getTotalRmsdOpt();
218                int optLen  = afpChain.getOptLength();
219                int r = afpChain.getBlockNum() -1;
220                return calNS(len1, len2,score,rmsd,optLen,r);
221        }
222        private double calNS(int len1, int len2, double score, double rmsd, int optlen, int r)
223        {
224                int     len = aveLen(len1, len2);
225                getPara(3, len);
226                double  ns0 = normScore(score, rmsd, optlen, r);
227                double  ns1 = (ns0 - mu) / beta;
228                return ns1;
229        }
230}