001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.util; 022 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.Calc; 029import org.biojava.nbio.structure.StructureException; 030import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 031import org.biojava.nbio.structure.jama.Matrix; 032 033/** 034 * Utility class for calculating common scores of {@link MultipleAlignment}s. 035 * 036 * @author Spencer Bliven 037 * @author Aleix Lafita 038 * @since 4.1.0 039 * 040 */ 041public class MultipleAlignmentScorer { 042 043 // Names for commonly used scores 044 public static final String PROBABILITY = "Probability"; 045 public static final String CE_SCORE = "CE-score"; 046 public static final String RMSD = "RMSD"; 047 public static final String AVGTM_SCORE = "AvgTM-score"; 048 public static final String MC_SCORE = "MC-score"; 049 public static final String REF_RMSD = "Ref-RMSD"; 050 public static final String REFTM_SCORE = "RefTM-score"; 051 052 /** 053 * Calculates and puts the RMSD and the average TM-Score of the 054 * MultipleAlignment. 055 * 056 * @param alignment 057 * @throws StructureException 058 * @see #getAvgTMScore(MultipleAlignment) 059 * @see #getRMSD(MultipleAlignment) 060 */ 061 public static void calculateScores(MultipleAlignment alignment) 062 throws StructureException { 063 064 // Put RMSD 065 List<Atom[]> trans = MultipleAlignmentTools.transformAtoms(alignment); 066 alignment.putScore(RMSD, getRMSD(trans)); 067 068 // Put AvgTM-Score 069 List<Integer> lengths = new ArrayList<Integer>(alignment.size()); 070 for (Atom[] atoms : alignment.getAtomArrays()) { 071 lengths.add(atoms.length); 072 } 073 alignment.putScore(AVGTM_SCORE, getAvgTMScore(trans, lengths)); 074 } 075 076 /** 077 * Calculates the RMSD of all-to-all structure comparisons (distances) of 078 * the given MultipleAlignment. 079 * <p> 080 * Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment 081 * length. 082 * <p> 083 * The formula used is just the sqroot of the average distance of all 084 * possible pairs of atoms. Thus, for R structures aligned over C columns 085 * without gaps, we have 086 * 087 * <pre> 088 * RMSD = \sqrt{1/(C*(R*(R-1)/2)) * \sum_{r1=1}^{R-1} 089 * \sum_{r2=r1+1}^{R-1} \sum_{j=0}^{C-1} (atom[r1][c]-atom[r2][c])^2} 090 * </pre> 091 * 092 * @param alignment 093 * @return double RMSD 094 */ 095 public static double getRMSD(MultipleAlignment alignment) { 096 List<Atom[]> trans = MultipleAlignmentTools.transformAtoms(alignment); 097 return getRMSD(trans); 098 } 099 100 /** 101 * Calculates the RMSD of all-to-all structure comparisons (distances), 102 * given a set of superimposed atoms. 103 * 104 * @param transformed 105 * @return double RMSD 106 * @see #getRMSD(MultipleAlignment) 107 */ 108 public static double getRMSD(List<Atom[]> transformed) { 109 110 double sumSqDist = 0; 111 int comparisons = 0; 112 113 for (int r1 = 0; r1 < transformed.size(); r1++) { 114 for (int c = 0; c < transformed.get(r1).length; c++) { 115 Atom refAtom = transformed.get(r1)[c]; 116 if (refAtom == null) 117 continue; 118 119 double nonNullSqDist = 0; 120 int nonNullLength = 0; 121 for (int r2 = r1 + 1; r2 < transformed.size(); r2++) { 122 Atom atom = transformed.get(r2)[c]; 123 if (atom != null) { 124 nonNullSqDist += Calc.getDistanceFast(refAtom, atom); 125 nonNullLength++; 126 } 127 } 128 129 if (nonNullLength > 0) { 130 comparisons++; 131 sumSqDist += nonNullSqDist / nonNullLength; 132 } 133 } 134 } 135 return Math.sqrt(sumSqDist / comparisons); 136 } 137 138 /** 139 * /** Calculates the average RMSD from all structures to a reference s 140 * tructure, given a set of superimposed atoms. 141 * <p> 142 * Complexity: T(n,l) = O(l*n), if n=number of structures and l=alignment 143 * length. 144 * <p> 145 * For ungapped alignments, this is just the sqroot of the average distance 146 * from an atom to the aligned atom from the reference. Thus, for R 147 * structures aligned over C columns (with structure 0 as the reference), we 148 * have: 149 * 150 * <pre> 151 * RefRMSD = \sqrt{ 1/(C*(R-1)) * \sum_{r=1}^{R-1} \sum_{j=0}^{C-1} 152 * (atom[1][c]-atom[r][c])^2 } 153 * </pre> 154 * <p> 155 * For gapped alignments, null atoms are omitted from consideration, so that 156 * the RMSD is the average over all columns with non-null reference of the 157 * average RMSD within the non-null elements of the column. 158 * 159 * @param alignment 160 * MultipleAlignment 161 * @param ref 162 * reference structure index 163 * @return 164 */ 165 public static double getRefRMSD(MultipleAlignment alignment, int ref) { 166 List<Atom[]> trans = MultipleAlignmentTools.transformAtoms(alignment); 167 return getRefRMSD(trans, ref); 168 } 169 170 /** 171 * Calculates the average RMSD from all structures to a reference s 172 * tructure, given a set of superimposed atoms. 173 * <p> 174 * Complexity: T(n,l) = O(l*n), if n=number of structures and l=alignment 175 * length. 176 * <p> 177 * For ungapped alignments, this is just the sqroot of the average distance 178 * from an atom to the aligned atom from the reference. Thus, for R 179 * structures aligned over C columns (with structure 0 as the reference), we 180 * have: 181 * 182 * <pre> 183 * RefRMSD = \sqrt{ 1/(C*(R-1)) * \sum_{r=1}^{R-1} \sum_{j=0}^{C-1} 184 * (atom[1][c]-atom[r][c])^2 } 185 * </pre> 186 * <p> 187 * For gapped alignments, null atoms are omitted from consideration, so that 188 * the RMSD is the average over all columns with non-null reference of the 189 * average RMSD within the non-null elements of the column. 190 * 191 * @param transformed 192 * @param reference 193 * @return 194 */ 195 public static double getRefRMSD(List<Atom[]> transformed, int reference) { 196 197 double sumSqDist = 0; 198 int totalLength = 0; 199 for (int c = 0; c < transformed.get(reference).length; c++) { 200 Atom refAtom = transformed.get(reference)[c]; 201 if (refAtom == null) 202 continue; 203 204 double nonNullSqDist = 0; 205 int nonNullLength = 0; 206 for (int r = 0; r < transformed.size(); r++) { 207 if (r == reference) 208 continue; 209 Atom atom = transformed.get(r)[c]; 210 if (atom != null) { 211 nonNullSqDist += Calc.getDistanceFast(refAtom, atom); 212 nonNullLength++; 213 } 214 } 215 216 if (nonNullLength > 0) { 217 totalLength++; 218 sumSqDist += nonNullSqDist / nonNullLength; 219 } 220 } 221 return Math.sqrt(sumSqDist / totalLength); 222 } 223 224 /** 225 * Calculates the average TMScore of all the possible pairwise structure 226 * comparisons of the given alignment. 227 * <p> 228 * Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment 229 * length. 230 * 231 * @param alignment 232 * @return double Average TMscore 233 * @throws StructureException 234 */ 235 public static double getAvgTMScore(MultipleAlignment alignment) 236 throws StructureException { 237 238 List<Atom[]> trans = MultipleAlignmentTools.transformAtoms(alignment); 239 240 List<Integer> lengths = new ArrayList<Integer>(alignment.size()); 241 for (Atom[] atoms : alignment.getAtomArrays()) { 242 lengths.add(atoms.length); 243 } 244 return getAvgTMScore(trans, lengths); 245 } 246 247 /** 248 * Calculates the average TMScore all the possible pairwise structure 249 * comparisons of the given a set of superimposed Atoms and the original 250 * structure lengths. 251 * <p> 252 * Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment 253 * length. 254 * 255 * @param transformed 256 * aligned Atoms transformed 257 * @param lengths 258 * lengths of the structures in residue number 259 * @return double Average TMscore 260 * @throws StructureException 261 */ 262 public static double getAvgTMScore(List<Atom[]> transformed, 263 List<Integer> lengths) throws StructureException { 264 265 if (transformed.size() != lengths.size()) 266 throw new IllegalArgumentException("Input sizes differ."); 267 268 double sumTM = 0; 269 int comparisons = 0; 270 271 for (int r1 = 0; r1 < transformed.size(); r1++) { 272 for (int r2 = r1 + 1; r2 < transformed.size(); r2++) { 273 int len = transformed.get(r1).length; 274 // Remove nulls from both arrays 275 Atom[] ref = new Atom[len]; 276 Atom[] aln = new Atom[len]; 277 int nonNullLen = 0; 278 for (int c = 0; c < len; c++) { 279 if (transformed.get(r1)[c] != null 280 && transformed.get(r2)[c] != null) { 281 ref[nonNullLen] = transformed.get(r1)[c]; 282 aln[nonNullLen] = transformed.get(r2)[c]; 283 nonNullLen++; 284 } 285 } 286 // Truncate nulls 287 if (nonNullLen < len) { 288 ref = Arrays.copyOf(ref, nonNullLen); 289 aln = Arrays.copyOf(aln, nonNullLen); 290 } 291 sumTM += Calc.getTMScore(ref, aln, lengths.get(r1), 292 lengths.get(r2)); 293 comparisons++; 294 } 295 } 296 return sumTM / comparisons; 297 } 298 299 /** 300 * Calculates the average TMScore from all structures to a reference 301 * structure, given a set of superimposed atoms. 302 * <p> 303 * Complexity: T(n,l) = O(l*n), if n=number of structures and l=alignment 304 * length. 305 * 306 * @param alignment 307 * @param reference 308 * Index of the reference structure 309 * @return 310 * @throws StructureException 311 */ 312 public static double getRefTMScore(MultipleAlignment alignment, int ref) 313 throws StructureException { 314 315 List<Atom[]> trans = MultipleAlignmentTools.transformAtoms(alignment); 316 317 List<Integer> lengths = new ArrayList<Integer>(alignment.size()); 318 for (Atom[] atoms : alignment.getAtomArrays()) { 319 lengths.add(atoms.length); 320 } 321 return getRefTMScore(trans, lengths, ref); 322 } 323 324 /** 325 * Calculates the average TMScore from all structures to a reference 326 * structure, given a set of superimposed atoms. 327 * <p> 328 * Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment 329 * length. 330 * 331 * @param transformed 332 * Arrays of aligned atoms, after superposition 333 * @param lengths 334 * lengths of the full input structures 335 * @param reference 336 * Index of the reference structure 337 * @return 338 * @throws StructureException 339 */ 340 public static double getRefTMScore(List<Atom[]> transformed, 341 List<Integer> lengths, int reference) throws StructureException { 342 343 if (transformed.size() != lengths.size()) 344 throw new IllegalArgumentException("Input sizes differ"); 345 346 double sumTM = 0; 347 int comparisons = 0; 348 349 int len = transformed.get(reference).length; 350 for (int r = 0; r < transformed.size(); r++) { 351 if (r == reference) 352 continue; 353 // remove nulls from both arrays 354 Atom[] ref = new Atom[len]; 355 Atom[] aln = new Atom[len]; 356 int nonNullLen = 0; 357 for (int c = 0; c < len; c++) { 358 if (transformed.get(reference)[c] != null 359 && transformed.get(r)[c] != null) { 360 ref[nonNullLen] = transformed.get(reference)[c]; 361 aln[nonNullLen] = transformed.get(r)[c]; 362 nonNullLen++; 363 } 364 } 365 // truncate nulls 366 if (nonNullLen < len) { 367 ref = Arrays.copyOf(ref, nonNullLen); 368 aln = Arrays.copyOf(aln, nonNullLen); 369 } 370 sumTM += Calc.getTMScore(ref, aln, 371 lengths.get(reference), lengths.get(r)); 372 comparisons++; 373 } 374 return sumTM / comparisons; 375 } 376 377 /** 378 * Calculates the MC score, specific for the MultipleAlignment algorithm. 379 * The score function is modified from the original CEMC paper, making it 380 * continuous and differentiable. 381 * <p> 382 * The maximum score of a match is 20, and the penalties for gaps are part 383 * of the input. The half-score distance, d0, is chosen as in the TM-score. 384 * <p> 385 * Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment 386 * length. 387 * 388 * @param alignment 389 * @param gapOpen 390 * penalty for gap opening (reasonable values are in the range 391 * (1.0-20.0) 392 * @param gapExtension 393 * penalty for extending a gap (reasonable values are in the 394 * range (0.5-10.0) 395 * @param dCutoff 396 * the distance cutoff 397 * @return the value of the score 398 * @throws StructureException 399 */ 400 public static double getMCScore(MultipleAlignment alignment, 401 double gapOpen, double gapExtension, double dCutoff) 402 throws StructureException { 403 404 List<Atom[]> trans = MultipleAlignmentTools.transformAtoms(alignment); 405 406 // Calculate d0: same as the one in TM score 407 int minLen = Integer.MAX_VALUE; 408 for (Atom[] atoms : alignment.getAtomArrays()) 409 if (atoms.length < minLen) 410 minLen = atoms.length; 411 double d0 = 1.24 * Math.cbrt((minLen) - 15.) - 1.8; 412 413 // Calculate the distance cutoff penalization 414 double A = 20.0 / (1 + (dCutoff * dCutoff) / (d0 * d0)); 415 416 return getMCScore(trans, d0, gapOpen, gapExtension, A); 417 } 418 419 /** 420 * Calculates the MC score, specific for the MultipleAlignment algorithm. 421 * The score function is modified from the original CEMC paper, making it 422 * continuous and differentiable. 423 * <p> 424 * The maximum score of a match is 20, and the penalties for gaps are part 425 * of the input. 426 * <p> 427 * Complexity: T(n,l) = O(l*n^2), if n=number of structures and l=alignment 428 * length. 429 * 430 * @param transformed 431 * List of transformed Atom arrays 432 * @param d0 433 * parameter for the half-score distance 434 * @param gapOpen 435 * penalty for gap opening (reasonable values are in the range 436 * (1.0-20.0) 437 * @param gapExtension 438 * penalty for extending a gap (reasonable values are in the 439 * range (0.5-10.0) 440 * @param A 441 * the distance cutoff penalization 442 * @return the value of the score 443 * @throws StructureException 444 */ 445 private static double getMCScore(List<Atom[]> trans, double d0, 446 double gapOpen, double gapExtension, double A) 447 throws StructureException { 448 449 int size = trans.size(); 450 int length = trans.get(0).length; 451 Matrix residueDistances = new Matrix(size, length, -1); 452 double scoreMC = 0.0; 453 int openGaps = 0; 454 int extensionGaps = 0; 455 456 // Calculate the average residue distances 457 for (int r1 = 0; r1 < size; r1++) { 458 boolean gapped = false; 459 for (int c = 0; c < trans.get(r1).length; c++) { 460 Atom refAtom = trans.get(r1)[c]; 461 // Calculate the gap extension and opening on the fly 462 if (refAtom == null) { 463 if (gapped) 464 extensionGaps++; 465 else { 466 gapped = true; 467 openGaps++; 468 } 469 continue; 470 } else 471 gapped = false; 472 473 for (int r2 = r1 + 1; r2 < size; r2++) { 474 Atom atom = trans.get(r2)[c]; 475 if (atom != null) { 476 double distance = Calc.getDistance(refAtom, atom); 477 if (residueDistances.get(r1, c) == -1) { 478 residueDistances.set(r1, c, 1 + distance); 479 } else { 480 residueDistances.set(r1, c, 481 residueDistances.get(r1, c) + distance); 482 } 483 if (residueDistances.get(r2, c) == -1) { 484 residueDistances.set(r2, c, 1 + distance); 485 } else { 486 residueDistances.set(r2, c, 487 residueDistances.get(r2, c) + distance); 488 } 489 } 490 } 491 } 492 } 493 494 for (int c = 0; c < length; c++) { 495 int nonNullRes = 0; 496 for (int r = 0; r < size; r++) { 497 if (residueDistances.get(r, c) != -1) 498 nonNullRes++; 499 } 500 for (int r = 0; r < size; r++) { 501 if (residueDistances.get(r, c) != -1) { 502 residueDistances.set(r, c, residueDistances.get(r, c) 503 / nonNullRes); 504 } 505 } 506 } 507 508 // Sum all the aligned residue scores 509 for (int row = 0; row < size; row++) { 510 for (int col = 0; col < length; col++) { 511 if (residueDistances.get(row, col) == -1) 512 continue; 513 double d1 = residueDistances.get(row, col); 514 double resScore = 20.0 / (1 + (d1 * d1) / (d0 * d0)); 515 scoreMC += resScore - A; 516 } 517 } 518 519 // Apply the Gap penalty and return 520 return scoreMC - (openGaps * gapOpen + extensionGaps * gapExtension); 521 } 522}