001/* This class is based on the original FATCAT implementation by 002 * <pre> 003 * Yuzhen Ye & Adam Godzik (2003) 004 * Flexible structure alignment by chaining aligned fragment pairs allowing twists. 005 * Bioinformatics vol.19 suppl. 2. ii246-ii255. 006 * https://www.ncbi.nlm.nih.gov/pubmed/14534198 007 * </pre> 008 * 009 * Thanks to Yuzhen Ye and A. Godzik for granting permission to freely use and redistribute this code. 010 * 011 * This code may be freely distributed and modified under the 012 * terms of the GNU Lesser General Public Licence. This should 013 * be distributed with the code. If you do not have a copy, 014 * see: 015 * 016 * http://www.gnu.org/copyleft/lesser.html 017 * 018 * Copyright for this code is held jointly by the individual 019 * authors. These should be listed in @author doc comments. 020 * 021 * 022 * Created on Jun 17, 2009 023 * Created by Andreas Prlic - RCSB PDB 024 * 025 */ 026 027package org.biojava.nbio.structure.align.fatcat.calc; 028 029import org.biojava.nbio.structure.*; 030import org.biojava.nbio.structure.align.model.AFP; 031import org.biojava.nbio.structure.align.model.AFPChain; 032import org.biojava.nbio.structure.geometry.SuperPositions; 033import org.biojava.nbio.structure.jama.Matrix; 034 035import java.util.ArrayList; 036import java.util.List; 037 038/** 039 * A class that performs calculations on AFPChains 040 * 041 * @author Andreas Prlic 042 * 043 */ 044public class AFPCalculator 045{ 046 public static final boolean debug = FatCatAligner.debug; 047 048 049 public static void extractAFPChains(FatCatParameters params, AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException { 050 051 List<AFP> afpSet = new ArrayList<AFP>(); 052 afpChain.setAfpSet(afpSet); 053 054 if ( debug ) 055 System.err.println("nr of atoms ca1: " + ca1.length + " ca2: " + ca2.length); 056 057 058 059 int p1, p2; 060 @SuppressWarnings("unused") 061 int n0, n, n1, n2; 062 double filter1; 063 double rmsd = 0; 064 065 Matrix r = new Matrix(3,3); 066 Atom t = new AtomImpl(); 067 068 069 int sparse = params.getSparse(); 070 int maxTra = params.getMaxTra(); 071 int fragLen = params.getFragLen(); 072 double disFilter = params.getDisFilter(); 073 double rmsdCut = params.getRmsdCut(); 074 double badRmsd = params.getBadRmsd(); 075 double fragScore = params.getFragScore(); 076 077 int add = sparse + 1; //if add > 1, use sparse sampling 078 n0 = n = n1 = n2 = 0; 079 080 int minLen = 0; 081 082 int prot1Length = ca1.length; 083 int prot2Length = ca2.length; 084 085 if(prot1Length < prot2Length) 086 minLen = prot1Length; 087 else 088 minLen = prot2Length; 089 afpChain.setMinLen(minLen); 090 091 afpChain.setBlockResList(new int[maxTra+1][2][minLen]); 092 afpChain.setFocusRes1(new int[minLen]); 093 afpChain.setFocusRes2(new int[minLen]); 094 095 for(p1 = 0; p1 < prot1Length - fragLen; p1 += add ) { 096 for(p2 = 0; p2 < prot2Length - fragLen; p2 += add) { 097 n0 ++; 098 filter1 = getEnd2EndDistance(ca1, ca2, p1, p1 + fragLen - 1, p2, p2 + fragLen - 1); 099 //difference bewteen end-to-end distances 100 if(filter1 > disFilter) { n1 ++; continue; } 101 boolean filter2 = filterTerminal(ca1,ca2, p1, p1 + fragLen - 1, p2, p2 + fragLen - 1, fragLen, minLen); 102 if(filter2) { 103 n2 ++; 104 continue; 105 106 } //be cautious to use this filter !! 107 108 // here FATCAT does a a jacobi transformation 109 //rmsd = kearsay(fragLen, ca1[p1], ca2[p2], r, t); 110 // we use the BioJava SVD instead... 111 112 // 113 rmsd = getRmsd(ca1,ca2,fragLen, p1,p2,r,t); 114 115 if(rmsd < rmsdCut) { 116 AFP afptmp = new AFP(); 117 afptmp.setP1(p1); 118 afptmp.setP2(p2); 119 afptmp.setFragLen(fragLen); 120 afptmp.setRmsd(rmsd); 121 afptmp.setM(r); 122 afptmp.setT(t.getCoords()); 123 afptmp.setScore(scoreAfp(afptmp,badRmsd,fragScore)); 124 afpSet.add(afptmp); 125 n ++; 126 } 127 } 128 } 129 130 int afpNum = afpSet.size(); 131 132 if(debug) { 133 String msg = String.format("possible AFP-pairs %d, remain %d after filter 1 remove %d; filter 2 remove %d\n", 134 n0, afpNum, n1, n2); 135 System.err.println(msg); 136 } 137 138 139 } 140 141 /** 142 * filter 1 for AFP extration: the distance of end-to-end 143 * @param p1b 144 * @param p1e 145 * @param p2b 146 * @param p2e 147 * @return 148 */ 149 private static final double getEnd2EndDistance(Atom[] ca1, Atom[] ca2, int p1b, int p1e, int p2b, int p2e) 150 { 151 152 double min = 99; 153 double dist1 = Calc.getDistance(ca1[p1b], ca1[p1e]); 154 double dist2 = Calc.getDistance(ca2[p2b], ca2[p2e]); 155 min = dist1 - dist2; 156 157 return Math.abs(min); 158 } 159 160 /** 161 * filter 2 for AFP extration: the context 162 * @param p1b 163 * @param p1e 164 * @param p2b 165 * @param p2e 166 * @return 167 */ 168 169 private static final boolean filterTerminal(Atom[] ca1, Atom[] ca2, int p1b, int p1e, int p2b, int p2e, int fragLen, int minLen) 170 { 171 int d1 = (p1b < p2b)?p1b:p2b; 172 int d2 = (ca1.length - p1e) < (ca2.length - p2e)?(ca1.length - p1e):(ca2.length - p2e); 173 int d3 = d1 + d2 + fragLen; //maximum alignment length from current AFP 174 175 176 /// DO NOT DO Math.round() this will give different results to FATCAT.... 177 int d4 = (int)(0.3 * minLen); 178 179 return d3 < d4; 180 181 } 182 183 private static final double getRmsd(Atom[] ca1, Atom[] ca2, int fragLen, 184 int p1, int p2, Matrix m, Atom t) throws StructureException { 185 186 187 double rmsd = 99.9; 188 Atom[] catmp1 = getFragment(ca1, p1, fragLen,false); 189 Atom[] catmp2 = getFragment(ca2, p2, fragLen,false); 190 191 if ( catmp1 == null) { 192 System.err.println("could not get fragment for ca1 " + p1 + " " + fragLen ); 193 return rmsd; 194 } 195 196 if ( catmp2 == null) { 197 System.err.println("could not get fragment for ca2 " + p2 + " " + fragLen ); 198 return rmsd; 199 } 200 201 return SuperPositions.getRmsd(Calc.atomsToPoints(catmp1), 202 Calc.atomsToPoints(catmp2)); 203 } 204 205 /** get a continue subset of Atoms based by the starting position and the length 206 * 207 * @param caall 208 * @param pos ... the start position 209 * @param fragmentLength .. the length of the subset to extract. 210 * @param clone: returns a copy of the atom (in case the coordinate get manipulated...) 211 * @return an Atom[] array 212 */ 213 private static final Atom[] getFragment(Atom[] caall, int pos, int fragmentLength , 214 boolean clone){ 215 216 if ( pos+fragmentLength > caall.length) 217 return null; 218 219 Atom[] tmp = new Atom[fragmentLength]; 220 221 for (int i=0;i< fragmentLength;i++){ 222 if (clone){ 223 tmp[i] = (Atom)caall[i+pos].clone(); 224 } else { 225 tmp[i] = caall[i+pos]; 226 } 227 } 228 return tmp; 229 230 } 231 232 233 /** 234 * Assign score to each AFP 235 */ 236 237 private static final double scoreAfp(AFP afp, double badRmsd, double fragScore) 238 { 239 //longer AFP with low rmsd is better 240 double s, w; 241 //s = (rmsdCut - afptmp.rmsd) * afptmp.len; //the same scroing strategy as that in the post-processing 242 w = afp.getRmsd() / badRmsd; 243 w = w * w; 244 s = fragScore * (1.0 - w); 245 return s; 246 } 247 248 //------------------------------------------------------------------ 249 //Sort the AFPs in increase of their diagonals(i,j) 250 //------------------------------------------------------------------ 251 public static final void sortAfps(AFPChain afpChain, Atom[] ca1, Atom[] ca2) 252 { 253 254 255 List<AFP> afpSet = afpChain.getAfpSet(); 256 257 if ( debug) 258 System.err.println("entering sortAfps"); 259 260 int pro1Len = ca1.length; 261 int pro2Len = ca2.length; 262 263 afpChain.setAfpIndex( new int[pro1Len][pro2Len]); //the index of (i,j) pair in AFP list, otherwise -1 264 afpChain.setAfpAftIndex( new int[pro1Len][pro2Len]); //the index of AFP (i,j*) nearest to (i,j), j*<j. if a AFP exits for (i,j), it equals to afpIndex 265 afpChain.setAfpBefIndex( new int[pro1Len][pro2Len]); //the index of AFP (i,j*) nearest to (i,j), j*>j. if a AFP exits for (i,j), it equals to afpIndex 266 267 int[][] afpIndex = afpChain.getAfpIndex(); 268 int[][] afpAftIndex = afpChain.getAfpAftIndex(); 269 int[][] afpBefIndex = afpChain.getAfpBefIndex(); 270 271 for(int i = 0; i < pro1Len; i ++) { 272 for(int j = 0; j < pro2Len; j ++) { 273 274 afpIndex[i][j] = afpAftIndex[i][j] = afpBefIndex[i][j] = -1; 275 } 276 } 277 278 //index the AFP for easy extraction of compatible AFPs 279 int afpNum = afpSet.size(); 280 281 int b0 = 0; 282 for(int a = 0; a < afpNum; a ++) { 283 if(a == afpNum - 1 || afpSet.get(a).getP1() != afpSet.get(a+1).getP1()) { 284 int i = afpSet.get(a).getP1(); 285 for(int b = b0; b <= a; b ++) { 286 int j = afpSet.get(b).getP2(); 287 afpIndex[i][j]=b ; 288 afpBefIndex[i][j]=b; 289 afpAftIndex[i][j]=b; 290 if(afpSet.get(b).getP1() != i) { 291 System.err.println(String.format("Warning: wrong afp index %d %d\n", i, afpSet.get(b).getP1())); 292 return; 293 } 294 } 295 for(int k = 1; k < pro2Len; k ++) { 296 if( afpBefIndex[i][k] == -1){ 297 afpBefIndex[i][k] = afpBefIndex[i][k-1]; 298 } 299 } 300 for(int k = pro2Len - 2; k >= 0; k --) { 301 if(afpAftIndex[i][k] == -1) { 302 afpAftIndex[i][k] = afpAftIndex[i][k+1]; 303 } 304 } 305 b0 = a + 1; 306 } 307 } 308 309 if ( debug) 310 System.err.println("done sortAfps"); 311 312 313 } 314}