001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.cluster; 022 023import org.biojava.nbio.structure.align.ce.CeMain; 024 025import java.io.Serializable; 026 027/** 028 * The SubunitClustererParameters specifies the options used for the clustering 029 * of the subunits in structures using the {@link SubunitClusterer}. 030 * 031 * @author Peter Rose 032 * @author Aleix Lafita 033 * @since 5.0.0 034 * 035 */ 036public class SubunitClustererParameters implements Serializable { 037 038 private static final long serialVersionUID = 1L; 039 040 private int minimumSequenceLength = 20; 041 private int absoluteMinimumSequenceLength = 5; 042 private double minimumSequenceLengthFraction = 0.75; 043 044 private boolean useGlobalMetrics; 045 private double sequenceIdentityThreshold; 046 private double sequenceCoverageThreshold = 0.75; 047 048 private double rmsdThreshold = 3.0; 049 private double structureCoverageThreshold = 0.75; 050 private double tmThreshold = 0.5; 051 052 private SubunitClustererMethod clustererMethod = SubunitClustererMethod.SEQUENCE_STRUCTURE; 053 054 private String superpositionAlgorithm = CeMain.algorithmName; 055 private boolean optimizeAlignment = true; 056 057 private boolean useSequenceCoverage; 058 private boolean useRMSD; 059 private boolean useStructureCoverage; 060 private boolean useTMScore; 061 062 private boolean internalSymmetry = false; 063 064 /** 065 * Subunits aligned with these or better scores will be considered "identical". 066 */ 067 private static final double hcSequenceIdentityLocal = 0.95; 068 private static final double hcSequenceCoverageLocal = 0.75; 069 private static final double hcSequenceIdentityGlobal = 0.85; 070 071 /** 072 * "Local" metrics are scoring 073 * SubunitClustererMethod.SEQUENCE: sequence identity of a local alignment 074 * (normalised by the number of aligned residues) 075 * sequence coverage of the alignment 076 * (normalised by the length of the longer sequence) 077 * SubunitClustererMethod.STRUCTURE: RMSD of the aligned substructures 078 * and structure coverage of the alignment 079 * (normalised by the length of the larger structure) 080 * Two thresholds for each method are required. 081 * 082 * "Global" metrics are scoring 083 * SubunitClustererMethod.SEQUENCE: sequence identity of a global alignment 084 * (normalised by the length of the alignment) 085 * SubunitClustererMethod.STRUCTURE: TMScore of the aligned structures 086 * (normalised by the length of the larger structure) 087 * One threshold for each method is required. 088 * 089 */ 090 public SubunitClustererParameters(boolean useGlobalMetrics) { 091 this.useGlobalMetrics = useGlobalMetrics; 092 093 if (useGlobalMetrics) { 094 sequenceIdentityThreshold = hcSequenceIdentityGlobal; 095 useSequenceCoverage = false; 096 useRMSD = false; 097 useStructureCoverage = false; 098 useTMScore = true; 099 } else { 100 sequenceIdentityThreshold = hcSequenceIdentityLocal; 101 useSequenceCoverage = true; 102 useRMSD = true; 103 useStructureCoverage = true; 104 useTMScore = false; 105 } 106 } 107 108 /** 109 * Initialize with "local" metrics by default. 110 */ 111 public SubunitClustererParameters() { 112 this(false); 113 } 114 115 /** 116 * Get the minimum number of residues of a subunits to be considered in the 117 * clusters. 118 * 119 * @return minimumSequenceLength 120 */ 121 public int getMinimumSequenceLength() { 122 return minimumSequenceLength; 123 } 124 125 /** 126 * Set the minimum number of residues of a subunits to be considered in the 127 * clusters. 128 * 129 * @param minimumSequenceLength 130 */ 131 public void setMinimumSequenceLength(int minimumSequenceLength) { 132 this.minimumSequenceLength = minimumSequenceLength; 133 } 134 135 /** 136 * If the shortest subunit sequence length is higher or equal the 137 * minimumSequenceLengthFraction times the median subunit sequence length, 138 * then the minimumSequenceLength is set to shortest subunit sequence 139 * length, but not shorter than the absoluteMinimumSequenceLength. 140 * <p> 141 * This adaptive feature allows the consideration of structures mainly 142 * constructed by very short chains, such as collagen (1A3I) 143 * 144 * @return the absoluteMinimumSequenceLength 145 */ 146 public int getAbsoluteMinimumSequenceLength() { 147 return absoluteMinimumSequenceLength; 148 } 149 150 /** 151 * If the shortest subunit sequence length is higher or equal the 152 * minimumSequenceLengthFraction times the median subunit sequence length, 153 * then the minimumSequenceLength is set to shortest subunit sequence 154 * length, but not shorter than the absoluteMinimumSequenceLength. 155 * <p> 156 * This adaptive feature allows the consideration of structures mainly 157 * constructed by very short chains, such as collagen (1A3I) 158 * 159 * @param absoluteMinimumSequenceLength 160 */ 161 public void setAbsoluteMinimumSequenceLength( 162 int absoluteMinimumSequenceLength) { 163 this.absoluteMinimumSequenceLength = absoluteMinimumSequenceLength; 164 } 165 166 /** 167 * If the shortest subunit sequence length is higher or equal the 168 * minimumSequenceLengthFraction times the median subunit sequence length, 169 * then the minimumSequenceLength is set to shortest subunit sequence 170 * length, but not shorter than the absoluteMinimumSequenceLength. 171 * <p> 172 * This adaptive feature allows the consideration of structures mainly 173 * constructed by very short chains, such as collagen (1A3I) 174 * 175 * @return the minimumSequenceLengthFraction 176 */ 177 public double getMinimumSequenceLengthFraction() { 178 return minimumSequenceLengthFraction; 179 } 180 181 /** 182 * If the shortest subunit sequence length is higher or equal the 183 * minimumSequenceLengthFraction times the median subunit sequence length, 184 * then the minimumSequenceLength is set to shortest subunit sequence 185 * length, but not shorter than the absoluteMinimumSequenceLength. 186 * <p> 187 * This adaptive feature allows the consideration of structures mainly 188 * constructed by very short chains, such as collagen (1A3I) 189 * 190 * @param minimumSequenceLengthFraction 191 */ 192 public void setMinimumSequenceLengthFraction( 193 double minimumSequenceLengthFraction) { 194 this.minimumSequenceLengthFraction = minimumSequenceLengthFraction; 195 } 196 197 /** 198 * Sequence identity threshold to consider for the subunits clustering. 199 * <p> 200 * Two subunits with sequence identity equal or higher than the threshold 201 * will be clustered together. 202 * 203 * @return sequenceIdentityThreshold 204 */ 205 public double getSequenceIdentityThreshold() { 206 return sequenceIdentityThreshold; 207 } 208 209 /** 210 * Sequence identity threshold to consider for the sequence subunit 211 * clustering. 212 * <p> 213 * Two subunits with sequence identity equal or higher than the threshold 214 * will be clustered together. 215 * 216 * @param sequenceIdentityThreshold 217 */ 218 public void setSequenceIdentityThreshold(double sequenceIdentityThreshold) { 219 this.sequenceIdentityThreshold = sequenceIdentityThreshold; 220 } 221 222 /** 223 * The minimum coverage of the sequence alignment between two subunits to be 224 * clustered together. 225 * 226 * @return sequenceCoverageThreshold 227 */ 228 public double getSequenceCoverageThreshold() { 229 return sequenceCoverageThreshold; 230 } 231 232 /** 233 * The minimum coverage of the sequence alignment between two subunits to be 234 * clustered together. 235 * 236 * @param sequenceCoverageThreshold 237 */ 238 public void setSequenceCoverageThreshold(double sequenceCoverageThreshold) { 239 this.sequenceCoverageThreshold = sequenceCoverageThreshold; 240 } 241 242 /** 243 * Structure similarity threshold (measured with RMSD) to consider for the 244 * structural subunit clustering. 245 * 246 * @return rmsdThreshold 247 */ 248 public double getRMSDThreshold() { 249 return rmsdThreshold; 250 } 251 252 /** 253 * Structure similarity threshold (measured with RMSD) to consider for the 254 * structural subunit clustering. 255 * 256 * @param rmsdThreshold 257 */ 258 public void setRMSDThreshold(double rmsdThreshold) { 259 this.rmsdThreshold = rmsdThreshold; 260 } 261 262 /** 263 * Structure similarity threshold (measured with TMScore) to consider for the 264 * structural subunit clustering. 265 * 266 * @return tmThreshold 267 */ 268 public double getTMThreshold() { 269 return tmThreshold; 270 } 271 272 /** 273 * Structure similarity threshold (measured with TMScore) to consider for the 274 * structural subunit clustering. 275 * 276 * @param tmThreshold 277 */ 278 public void setTMThreshold(double tmThreshold) { 279 this.tmThreshold = tmThreshold; 280 } 281 282 /** 283 * The minimum coverage of the structure alignment between two subunits to be 284 * clustered together. 285 * 286 * @return structureCoverageThreshold 287 */ 288 public double getStructureCoverageThreshold() { 289 return structureCoverageThreshold; 290 } 291 292 /** 293 * The minimum coverage of the structure alignment between two subunits to be 294 * clustered together. 295 * 296 * @param structureCoverageThreshold 297 */ 298 public void setStructureCoverageThreshold(double structureCoverageThreshold) { 299 this.structureCoverageThreshold = structureCoverageThreshold; 300 } 301 302 /** 303 * Method to cluster subunits. 304 * 305 * @return clustererMethod 306 */ 307 public SubunitClustererMethod getClustererMethod() { 308 return clustererMethod; 309 } 310 311 /** 312 * Method to cluster subunits. 313 * 314 * @param method 315 */ 316 public void setClustererMethod(SubunitClustererMethod method) { 317 this.clustererMethod = method; 318 } 319 320 /** 321 * The internal symmetry option divides each {@link Subunit} of each 322 * {@link SubunitCluster} into its internally symmetric repeats. 323 * <p> 324 * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider 325 * internal symmetry, otherwise this parameter will be ignored. 326 * 327 * @return true if internal symmetry is considered, false otherwise 328 */ 329 public boolean isInternalSymmetry() { 330 return internalSymmetry; 331 } 332 333 /** 334 * The internal symmetry option divides each {@link Subunit} of each 335 * {@link SubunitCluster} into its internally symmetric repeats. 336 * <p> 337 * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider 338 * internal symmetry, otherwise this parameter will be ignored. 339 * 340 * @param internalSymmetry 341 * true if internal symmetry is considered, false otherwise 342 */ 343 public void setInternalSymmetry(boolean internalSymmetry) { 344 this.internalSymmetry = internalSymmetry; 345 } 346 347 @Override 348 public String toString() { 349 return "SubunitClustererParameters [minimumSequenceLength=" 350 + minimumSequenceLength + ", absoluteMinimumSequenceLength=" 351 + absoluteMinimumSequenceLength 352 + ", minimumSequenceLengthFraction=" 353 + minimumSequenceLengthFraction 354 + ", sequenceIdentityThreshold=" + sequenceIdentityThreshold 355 + ", rmsdThreshold=" + rmsdThreshold + ", coverageThreshold=" 356 + sequenceCoverageThreshold + ", clustererMethod=" + clustererMethod 357 + ", internalSymmetry=" + internalSymmetry + "]"; 358 } 359 360 /** 361 * Method to superpose subunits (i.e., structural aligner). 362 * 363 * @return superpositionAlgorithm 364 */ 365 public String getSuperpositionAlgorithm() { 366 return superpositionAlgorithm; 367 } 368 369 /** 370 * Method to cluster subunits. 371 * 372 * @param superpositionAlgorithm 373 */ 374 public void setSuperpositionAlgorithm(String superpositionAlgorithm) { 375 this.superpositionAlgorithm = superpositionAlgorithm; 376 } 377 378 /** 379 * Whether the alignment algorithm should try its best to optimize the alignment, 380 * or we are happy with a quick and dirty result. Effect depends on implementation 381 * of the specific algorithm's method. * 382 * 383 * @return optimizeAlignment 384 */ 385 public boolean isOptimizeAlignment() { 386 return optimizeAlignment; 387 } 388 389 /** 390 * Whether the alignment algorithm should try its best to optimize the alignment, 391 * or we are happy with a quick and dirty result. Effect depends on implementation 392 * of the specific algorithm's method. * 393 * 394 * @param optimizeAlignment 395 */ 396 public void setOptimizeAlignment(boolean optimizeAlignment) { 397 this.optimizeAlignment = optimizeAlignment; 398 } 399 400 /** 401 * Use RMSD for evaluating structure similarity 402 * 403 * @return useRMSD 404 */ 405 public boolean isUseRMSD() { return useRMSD; } 406 407 /** 408 * Use RMSD for evaluating structure similarity 409 * 410 * @param useRMSD 411 */ 412 public void setUseRMSD(boolean useRMSD) { 413 this.useRMSD = useRMSD; 414 } 415 416 /** 417 * Use TMScore for evaluating structure similarity 418 * 419 * @return useTMScore 420 */ 421 public boolean isUseTMScore() { 422 return useTMScore; 423 } 424 425 /** 426 * Use TMScore for evaluating structure similarity 427 * 428 * @param useTMScore 429 */ 430 public void setUseTMScore(boolean useTMScore) { 431 this.useTMScore = useTMScore; 432 } 433 434 /** 435 * Use sequence coverage for evaluating sequence similarity 436 * 437 * @return useSequenceCoverage 438 */ 439 public boolean isUseSequenceCoverage() { 440 return useSequenceCoverage; 441 } 442 443 /** 444 * Use sequence coverage for evaluating sequence similarity 445 * 446 * @param useSequenceCoverage 447 */ 448 public void setUseSequenceCoverage(boolean useSequenceCoverage) { 449 this.useSequenceCoverage = useSequenceCoverage; 450 } 451 452 /** 453 * Use structure coverage for evaluating sequence similarity 454 * 455 * @return useStructureCoverage 456 */ 457 public boolean isUseStructureCoverage() { 458 return useStructureCoverage; 459 } 460 461 /** 462 * Use structure coverage for evaluating sequence similarity 463 * 464 * @param useStructureCoverage 465 */ 466 public void setUseStructureCoverage(boolean useStructureCoverage) { 467 this.useStructureCoverage = useStructureCoverage; 468 } 469 470 /** 471 * Use metrics calculated relative to the whole sequence or structure, 472 * rather than the aligned part only 473 * 474 * @return useGlobalMetrics 475 */ 476 public boolean isUseGlobalMetrics() { 477 return useGlobalMetrics; 478 } 479 480 /** 481 * Use metrics calculated relative to the whole sequence or structure, 482 * rather than the aligned part only 483 * 484 * @param useGlobalMetrics 485 */ 486 public void setUseGlobalMetrics(boolean useGlobalMetrics) { 487 this.useGlobalMetrics = useGlobalMetrics; 488 } 489 490 /** 491 * Whether the subunits can be considered "identical" by sequence alignment. 492 * For local sequence alignment (normalized by the number of aligned pairs) 493 * this means 0.95 or higher identity and 0.75 or higher coverage. 494 * For global sequence alignment (normalised by the alignment length) 495 * this means 0.85 or higher sequence identity. 496 * 497 * @param sequenceIdentity 498 * @param sequenceCoverage 499 * @return true if the sequence alignment scores are equal to 500 * or better than the "high confidence" scores, false otherwise. 501 */ 502 public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCoverage) { 503 if (useGlobalMetrics) 504 return sequenceIdentity>=hcSequenceIdentityGlobal; 505 else 506 return sequenceIdentity>=hcSequenceIdentityLocal && sequenceCoverage >= hcSequenceCoverageLocal; 507 } 508 509 510}