001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029import org.biojava.nbio.structure.align.model.AFPChain; 030 031 032/* CDF: cumultive density function, SF: survial function for EVD distribution, Gumbel maximum function*/ 033 034/* EVD (extreme value distribution) 035 * Gumbel maximum distribution (longer right tail) (Type II) 036 * pdf (probability density function) f(x) = 1/b exp(-(x-u)/b)) * exp(-exp(-(x-u)/b))) 037 * cdf (cumulative distribution function) F(x) = exp(-exp(-x)) 038 * sf (survial function) S(x) = 1 - F(x) = 1 - exp(-exp(-x)) 039 * Gumbel minimum distribution (longer left tail) (Type I) 040 * pdf (probability density function) f(x) = 1/b exp((x-u)/b)) * exp(-exp((x-u)/b))) 041 * cdf (cumulative distribution function) F(x) = 1 - exp(-exp(x)) 042 * sf (survial function) S(x) = 1 - F(x) = exp(-exp(x)) 043 * the FATCAT follows Gumbel maximum distribution 044 * The survival function is the probability that the variate takes a value greater than x. 045 */ 046public class SigEva 047{ 048 049 050 double mu; 051 double beta; 052 double mu_a; 053 double beta_a; 054 double mu_b; 055 double beta_b; 056 057 public SigEva(){ 058 getPara(3,0); 059 } 060 061 062 public void getPara(int set, int len) 063 { 064 if(set == 1) { 065 mu_a = 0.2461; 066 mu_b = 17.1530; 067 beta_a = 0.1284; 068 beta_b = 1.3756; 069 } //fitting, based on the old benchmark (the fold.list is redundant), old parameters 070 else if(set == 2) { 071 mu_a = 1.1137; 072 mu_b = 6.5574; 073 beta_a = 0.6448; 074 beta_b = -12.2793; 075 } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, "NS3" 076 else if(set == 3) { 077 mu_a = 0.8440; 078 mu_b = 30.2160; 079 beta_a = 0.3525; 080 beta_b = 18.6652; 081 } //score * sqrt(optLen / (rmsd * (twist + 1))), the new best parameters for flexible FATCAT, "FNS8" 082 else if(set == 4) { 083 mu_a = 0.4708; 084 mu_b = 38.9863; 085 beta_a = 0.2511; 086 beta_b = 12.9228; 087 } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, sparse-sampling=2, modified scoring 088 else if(set == 5) { 089 mu_a = 1.3794; 090 mu_b = -14.4778; 091 beta_a = 0.7465; 092 beta_b = -22.9452; 093 } //score * sqrt(optLen / RMSD), the new best parameters for flexible FATCAT, sparse-sampling=2, modified scoring 3 094 else if(set == 6) { 095 mu_a = 0.6036; 096 mu_b = 35.4783; 097 beta_a = 0.3136; 098 beta_b = 13.3922; 099 } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, sparse-sampling=1 100 else if(set == 7) { 101 mu_a = 0.7183; 102 mu_b = 27.9647; 103 beta_a = 0.4688; 104 beta_b = -1.3293; 105 } //score * sqrt(optLen / RMSD), the new best parameters for flexible FATCAT, sparse-sampling=1 106 else if(set == 8) { 107 mu_a = 0.4813; 108 mu_b = 34.2051; 109 beta_a = 0.2618; 110 beta_b = 12.4581; 111 } //score * sqrt(optLen / RMSD), the new best parameters for rigid FATCAT, sparse-sampling=3, modified scoring 112 else if(set == 9) { 113 mu_a = 0.6672; 114 mu_b = 26.5767; 115 beta_a = 0.4373; 116 beta_b = -1.4017; 117 } //score * sqrt(optLen / RMSD), the new best parameters for flexible FATCAT, sparse-sampling=3, modified scoring 2 118 else { 119 System.err.println("no corresponding parameter set found!"); 120 } 121 122 mu = beta = .0; 123 calMu(len); 124 calBeta(len); 125 } 126 127 128 private void calMu(int len) 129 { 130 mu = mu_a * len + mu_b; 131 //printf("mu = %.4f\n", mu); 132 } 133 134 private void calBeta(int len) 135 { 136 beta = beta_a * len + beta_b; 137 //printf("beta = %.4f\n", beta); 138 } 139 140 141 private int aveLen(int len1, int len2) 142 { 143 //int len = (int(sqrt(len1 * len2))); 144 //int len = len1 < len2?len1:len2; //use the minimum length, bad discriment 145 int len = (int)(0.5 * (len1 + len2)); 146 return len; 147 } 148 149 public double calSigAll(FatCatParameters params, AFPChain afpChain){ 150 151 int twist = params.getMaxTra(); 152 int sparse = params.getSparse(); 153 int len1 = afpChain.getCa1Length(); 154 int len2 = afpChain.getCa2Length(); 155 double score = afpChain.getAlignScore(); 156 double rmsd = afpChain.getTotalRmsdOpt(); 157 int optLen = afpChain.getOptLength(); 158 int r = afpChain.getBlockNum() -1; 159 160 return calSigAll(twist,sparse,len1, len2,score,rmsd,optLen,r); 161 162 } 163 164 private double calSigAll(int twist, int sparse, int len1, int len2, double score, double rmsd, int optLen, int r) 165 { 166 int len = aveLen(len1, len2); 167 if(sparse == 2) { //use sparse sampling = 2 168 if(twist == 0) getPara(4, len); //rigid-FATCAT 169 else getPara(5, len); //flexible-FATCAT 170 } 171 else if(sparse == 3) { //sparse sampling = 3 172 if(twist == 0) getPara(8, len); //rigid-FATCAT 173 else getPara(8, len); //flexible-FATCAT 174 } 175 else if(sparse == 1) { //sparse sampling = 1 176 if(twist == 0) getPara(6, len); //rigid-FATCAT 177 else getPara(7, len); //flexible-FATCAT 178 } 179 else { //no sparse sampling or the corresponding sparse parameters are not fitted 180 if(twist == 0) getPara(2, len); //rigid-FATCAT 181 else getPara(3, len); //flexible-FATCAT 182 } 183 184 double mods = normScore(score, rmsd, optLen, r); 185 186 double t = (mods - mu) / beta; 187 188 double sf = sF(t); 189 return sf; 190 } 191 192 private double sF(double t){ 193 return (1- cDF(t)); 194 } 195 196 private double cDF(double t){ 197 return (Math.exp(-Math.exp(-t))); 198 } 199 200 //the chaining score is normalized by rmsd, twist and optimal alignment length 201 private double normScore(double score, double rmsd, int optLen, int r) 202 { 203 //double score1 = modScore(score, r); 204 double score1 = score; 205 if(r > 0) score1 /= Math.sqrt(r + 1); 206 //it is tested that flexible score is more linear relevant to 1/r2 than 1/r 207 if(rmsd < 0.5) score1 *= Math.sqrt((optLen) / 0.5); 208 else score1 *= Math.sqrt((optLen) / rmsd); 209 return score1; 210 } 211 212 213 public double calNS(FatCatParameters params, AFPChain afpChain){ 214 int len1 = afpChain.getCa1Length(); 215 int len2 = afpChain.getCa2Length(); 216 double score =afpChain.getAlignScore(); 217 double rmsd = afpChain.getTotalRmsdOpt(); 218 int optLen = afpChain.getOptLength(); 219 int r = afpChain.getBlockNum() -1; 220 return calNS(len1, len2,score,rmsd,optLen,r); 221 } 222 private double calNS(int len1, int len2, double score, double rmsd, int optlen, int r) 223 { 224 int len = aveLen(len1, len2); 225 getPara(3, len); 226 double ns0 = normScore(score, rmsd, optlen, r); 227 double ns1 = (ns0 - mu) / beta; 228 return ns1; 229 } 230}