001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029import org.biojava.nbio.structure.*; 030import org.biojava.nbio.structure.align.model.AFP; 031import org.biojava.nbio.structure.align.model.AFPChain; 032import org.biojava.nbio.structure.geometry.SuperPositions; 033import org.biojava.nbio.structure.jama.Matrix; 034 035import java.util.ArrayList; 036import java.util.List; 037 038/** a class that performs calculations on AFPCHains 039 * 040 * @author Andreas Prlic 041 * 042 */ 043public class AFPCalculator 044{ 045 public static final boolean debug = FatCatAligner.debug; 046 047 048 public static final void extractAFPChains(FatCatParameters params, AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException { 049 050 051 052 List<AFP> afpSet = new ArrayList<AFP>(); 053 afpChain.setAfpSet(afpSet); 054 055 if ( debug ) 056 System.err.println("nr of atoms ca1: " + ca1.length + " ca2: " + ca2.length); 057 058 059 060 int p1, p2; 061 @SuppressWarnings("unused") 062 int n0, n, n1, n2; 063 double filter1; 064 double rmsd = 0; 065 066 Matrix r = new Matrix(3,3); 067 Atom t = new AtomImpl(); 068 069 070 int sparse = params.getSparse(); 071 int maxTra = params.getMaxTra(); 072 int fragLen = params.getFragLen(); 073 double disFilter = params.getDisFilter(); 074 double rmsdCut = params.getRmsdCut(); 075 double badRmsd = params.getBadRmsd(); 076 double fragScore = params.getFragScore(); 077 078 int add = sparse + 1; //if add > 1, use sparse sampling 079 n0 = n = n1 = n2 = 0; 080 081 int minLen = 0; 082 083 int prot1Length = ca1.length; 084 int prot2Length = ca2.length; 085 086 if(prot1Length < prot2Length) 087 minLen = prot1Length; 088 else 089 minLen = prot2Length; 090 afpChain.setMinLen(minLen); 091 092 afpChain.setBlockResList(new int[maxTra+1][2][minLen]); 093 afpChain.setFocusRes1(new int[minLen]); 094 afpChain.setFocusRes2(new int[minLen]); 095 096 for(p1 = 0; p1 < prot1Length - fragLen; p1 += add ) { 097 for(p2 = 0; p2 < prot2Length - fragLen; p2 += add) { 098 n0 ++; 099 filter1 = getEnd2EndDistance(ca1, ca2, p1, p1 + fragLen - 1, p2, p2 + fragLen - 1); 100 //difference bewteen end-to-end distances 101 if(filter1 > disFilter) { n1 ++; continue; } 102 boolean filter2 = filterTerminal(ca1,ca2, p1, p1 + fragLen - 1, p2, p2 + fragLen - 1, fragLen, minLen); 103 if(filter2) { 104 n2 ++; 105 continue; 106 107 } //be cautious to use this filter !! 108 109 // here FATCAT does a a jacobi transformation 110 //rmsd = kearsay(fragLen, ca1[p1], ca2[p2], r, t); 111 // we use the BioJava SVD instead... 112 113 // 114 rmsd = getRmsd(ca1,ca2,fragLen, p1,p2,r,t); 115 116 if(rmsd < rmsdCut) { 117 AFP afptmp = new AFP(); 118 afptmp.setP1(p1); 119 afptmp.setP2(p2); 120 afptmp.setFragLen(fragLen); 121 afptmp.setRmsd(rmsd); 122 afptmp.setM(r); 123 afptmp.setT(t.getCoords()); 124 afptmp.setScore(scoreAfp(afptmp,badRmsd,fragScore)); 125 afpSet.add(afptmp); 126 n ++; 127 } 128 } 129 } 130 131 int afpNum = afpSet.size(); 132 133 if(debug) { 134 String msg = String.format("possible AFP-pairs %d, remain %d after filter 1 remove %d; filter 2 remove %d\n", 135 n0, afpNum, n1, n2); 136 System.err.println(msg); 137 } 138 139 140 } 141 142 /** 143 * filter 1 for AFP extration: the distance of end-to-end 144 * @param p1b 145 * @param p1e 146 * @param p2b 147 * @param p2e 148 * @return 149 */ 150 private static final double getEnd2EndDistance(Atom[] ca1, Atom[] ca2, int p1b, int p1e, int p2b, int p2e) 151 { 152 153 double min = 99; 154 double dist1 = Calc.getDistance(ca1[p1b], ca1[p1e]); 155 double dist2 = Calc.getDistance(ca2[p2b], ca2[p2e]); 156 min = dist1 - dist2; 157 158 return Math.abs(min); 159 } 160 161 /** 162 * filter 2 for AFP extration: the context 163 * @param p1b 164 * @param p1e 165 * @param p2b 166 * @param p2e 167 * @return 168 */ 169 170 private static final boolean filterTerminal(Atom[] ca1, Atom[] ca2, int p1b, int p1e, int p2b, int p2e, int fragLen, int minLen) 171 { 172 int d1 = (p1b < p2b)?p1b:p2b; 173 int d2 = (ca1.length - p1e) < (ca2.length - p2e)?(ca1.length - p1e):(ca2.length - p2e); 174 int d3 = d1 + d2 + fragLen; //maximum alignment length from current AFP 175 176 177 /// DO NOT DO Math.round() this will give different results to FATCAT.... 178 int d4 = (int)(0.3 * minLen); 179 180 return d3 < d4; 181 182 } 183 184 private static final double getRmsd(Atom[] ca1, Atom[] ca2, int fragLen, 185 int p1, int p2, Matrix m, Atom t) throws StructureException { 186 187 188 double rmsd = 99.9; 189 Atom[] catmp1 = getFragment(ca1, p1, fragLen,false); 190 Atom[] catmp2 = getFragment(ca2, p2, fragLen,false); 191 192 if ( catmp1 == null) { 193 System.err.println("could not get fragment for ca1 " + p1 + " " + fragLen ); 194 return rmsd; 195 } 196 197 if ( catmp2 == null) { 198 System.err.println("could not get fragment for ca2 " + p2 + " " + fragLen ); 199 return rmsd; 200 } 201 202 return SuperPositions.getRmsd(Calc.atomsToPoints(catmp1), 203 Calc.atomsToPoints(catmp2)); 204 } 205 206 /** get a continue subset of Atoms based by the starting position and the length 207 * 208 * @param caall 209 * @param pos ... the start position 210 * @param fragmentLength .. the length of the subset to extract. 211 * @param clone: returns a copy of the atom (in case the coordinate get manipulated...) 212 * @return an Atom[] array 213 */ 214 private static final Atom[] getFragment(Atom[] caall, int pos, int fragmentLength , 215 boolean clone){ 216 217 if ( pos+fragmentLength > caall.length) 218 return null; 219 220 Atom[] tmp = new Atom[fragmentLength]; 221 222 for (int i=0;i< fragmentLength;i++){ 223 if (clone){ 224 tmp[i] = (Atom)caall[i+pos].clone(); 225 } else { 226 tmp[i] = caall[i+pos]; 227 } 228 } 229 return tmp; 230 231 } 232 233 234 /** 235 * Assign score to each AFP 236 */ 237 238 private static final double scoreAfp(AFP afp, double badRmsd, double fragScore) 239 { 240 //longer AFP with low rmsd is better 241 double s, w; 242 //s = (rmsdCut - afptmp.rmsd) * afptmp.len; //the same scroing strategy as that in the post-processing 243 w = afp.getRmsd() / badRmsd; 244 w = w * w; 245 s = fragScore * (1.0 - w); 246 return s; 247 } 248 249 //------------------------------------------------------------------ 250 //Sort the AFPs in increase of their diagonals(i,j) 251 //------------------------------------------------------------------ 252 public static final void sortAfps(AFPChain afpChain, Atom[] ca1, Atom[] ca2) 253 { 254 255 256 List<AFP> afpSet = afpChain.getAfpSet(); 257 258 if ( debug) 259 System.err.println("entering sortAfps"); 260 261 int pro1Len = ca1.length; 262 int pro2Len = ca2.length; 263 264 afpChain.setAfpIndex( new int[pro1Len][pro2Len]); //the index of (i,j) pair in AFP list, otherwise -1 265 afpChain.setAfpAftIndex( new int[pro1Len][pro2Len]); //the index of AFP (i,j*) nearest to (i,j), j*<j. if a AFP exits for (i,j), it equals to afpIndex 266 afpChain.setAfpBefIndex( new int[pro1Len][pro2Len]); //the index of AFP (i,j*) nearest to (i,j), j*>j. if a AFP exits for (i,j), it equals to afpIndex 267 268 int[][] afpIndex = afpChain.getAfpIndex(); 269 int[][] afpAftIndex = afpChain.getAfpAftIndex(); 270 int[][] afpBefIndex = afpChain.getAfpBefIndex(); 271 272 for(int i = 0; i < pro1Len; i ++) { 273 for(int j = 0; j < pro2Len; j ++) { 274 275 afpIndex[i][j] = afpAftIndex[i][j] = afpBefIndex[i][j] = -1; 276 } 277 } 278 279 //index the AFP for easy extraction of compatible AFPs 280 int afpNum = afpSet.size(); 281 282 int b0 = 0; 283 for(int a = 0; a < afpNum; a ++) { 284 if(a == afpNum - 1 || afpSet.get(a).getP1() != afpSet.get(a+1).getP1()) { 285 int i = afpSet.get(a).getP1(); 286 for(int b = b0; b <= a; b ++) { 287 int j = afpSet.get(b).getP2(); 288 afpIndex[i][j]=b ; 289 afpBefIndex[i][j]=b; 290 afpAftIndex[i][j]=b; 291 if(afpSet.get(b).getP1() != i) { 292 System.err.println(String.format("Warning: wrong afp index %d %d\n", i, afpSet.get(b).getP1())); 293 return; 294 } 295 } 296 for(int k = 1; k < pro2Len; k ++) { 297 if( afpBefIndex[i][k] == -1){ 298 afpBefIndex[i][k] = afpBefIndex[i][k-1]; 299 } 300 } 301 for(int k = pro2Len - 2; k >= 0; k --) { 302 if(afpAftIndex[i][k] == -1) { 303 afpAftIndex[i][k] = afpAftIndex[i][k+1]; 304 } 305 } 306 b0 = a + 1; 307 } 308 } 309 310 if ( debug) 311 System.err.println("done sortAfps"); 312 313 314 } 315}