001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.cluster; 022 023import org.biojava.nbio.structure.align.ce.CeMain; 024 025import java.io.Serializable; 026 027/** 028 * The SubunitClustererParameters specifies the options used for the clustering 029 * of the subunits in structures using the {@link SubunitClusterer}. 030 * 031 * @author Peter Rose 032 * @author Aleix Lafita 033 * @since 5.0.0 034 * 035 */ 036public class SubunitClustererParameters implements Serializable { 037 038 private static final long serialVersionUID = 1L; 039 040 private int minimumSequenceLength = 20; 041 private int absoluteMinimumSequenceLength = 5; 042 private double minimumSequenceLengthFraction = 0.75; 043 044 private boolean useGlobalMetrics; 045 private double sequenceIdentityThreshold; 046 private double sequenceCoverageThreshold = 0.75; 047 048 private boolean useEntityIdForSeqIdentityDetermination = false; 049 050 private double rmsdThreshold = 3.0; 051 private double structureCoverageThreshold = 0.75; 052 private double tmThreshold = 0.5; 053 054 private SubunitClustererMethod clustererMethod = SubunitClustererMethod.SEQUENCE_STRUCTURE; 055 056 private String superpositionAlgorithm = CeMain.algorithmName; 057 private boolean optimizeAlignment = true; 058 059 private boolean useSequenceCoverage; 060 private boolean useRMSD; 061 private boolean useStructureCoverage; 062 private boolean useTMScore; 063 064 private boolean internalSymmetry = false; 065 066 /** 067 * Subunits aligned with these or better scores will be considered "identical". 068 */ 069 private static final double hcSequenceIdentityLocal = 0.95; 070 private static final double hcSequenceCoverageLocal = 0.75; 071 private static final double hcSequenceIdentityGlobal = 0.85; 072 073 /** 074 * "Local" metrics are scoring 075 * SubunitClustererMethod.SEQUENCE: sequence identity of a local alignment 076 * (normalised by the number of aligned residues) 077 * sequence coverage of the alignment 078 * (normalised by the length of the longer sequence) 079 * SubunitClustererMethod.STRUCTURE: RMSD of the aligned substructures 080 * and structure coverage of the alignment 081 * (normalised by the length of the larger structure) 082 * Two thresholds for each method are required. 083 * 084 * "Global" metrics are scoring 085 * SubunitClustererMethod.SEQUENCE: sequence identity of a global alignment 086 * (normalised by the length of the alignment) 087 * SubunitClustererMethod.STRUCTURE: TMScore of the aligned structures 088 * (normalised by the length of the larger structure) 089 * One threshold for each method is required. 090 * 091 */ 092 public SubunitClustererParameters(boolean useGlobalMetrics) { 093 this.useGlobalMetrics = useGlobalMetrics; 094 095 if (useGlobalMetrics) { 096 sequenceIdentityThreshold = hcSequenceIdentityGlobal; 097 useSequenceCoverage = false; 098 useRMSD = false; 099 useStructureCoverage = false; 100 useTMScore = true; 101 } else { 102 sequenceIdentityThreshold = hcSequenceIdentityLocal; 103 useSequenceCoverage = true; 104 useRMSD = true; 105 useStructureCoverage = true; 106 useTMScore = false; 107 } 108 } 109 110 /** 111 * Initialize with "local" metrics by default. 112 */ 113 public SubunitClustererParameters() { 114 this(false); 115 } 116 117 /** 118 * Get the minimum number of residues of a subunits to be considered in the 119 * clusters. 120 * 121 * @return minimumSequenceLength 122 */ 123 public int getMinimumSequenceLength() { 124 return minimumSequenceLength; 125 } 126 127 /** 128 * Set the minimum number of residues of a subunits to be considered in the 129 * clusters. 130 * 131 * @param minimumSequenceLength 132 */ 133 public void setMinimumSequenceLength(int minimumSequenceLength) { 134 this.minimumSequenceLength = minimumSequenceLength; 135 } 136 137 /** 138 * If the shortest subunit sequence length is higher or equal the 139 * minimumSequenceLengthFraction times the median subunit sequence length, 140 * then the minimumSequenceLength is set to shortest subunit sequence 141 * length, but not shorter than the absoluteMinimumSequenceLength. 142 * <p> 143 * This adaptive feature allows the consideration of structures mainly 144 * constructed by very short chains, such as collagen (1A3I) 145 * 146 * @return the absoluteMinimumSequenceLength 147 */ 148 public int getAbsoluteMinimumSequenceLength() { 149 return absoluteMinimumSequenceLength; 150 } 151 152 /** 153 * If the shortest subunit sequence length is higher or equal the 154 * minimumSequenceLengthFraction times the median subunit sequence length, 155 * then the minimumSequenceLength is set to shortest subunit sequence 156 * length, but not shorter than the absoluteMinimumSequenceLength. 157 * <p> 158 * This adaptive feature allows the consideration of structures mainly 159 * constructed by very short chains, such as collagen (1A3I) 160 * 161 * @param absoluteMinimumSequenceLength 162 */ 163 public void setAbsoluteMinimumSequenceLength( 164 int absoluteMinimumSequenceLength) { 165 this.absoluteMinimumSequenceLength = absoluteMinimumSequenceLength; 166 } 167 168 /** 169 * If the shortest subunit sequence length is higher or equal the 170 * minimumSequenceLengthFraction times the median subunit sequence length, 171 * then the minimumSequenceLength is set to shortest subunit sequence 172 * length, but not shorter than the absoluteMinimumSequenceLength. 173 * <p> 174 * This adaptive feature allows the consideration of structures mainly 175 * constructed by very short chains, such as collagen (1A3I) 176 * 177 * @return the minimumSequenceLengthFraction 178 */ 179 public double getMinimumSequenceLengthFraction() { 180 return minimumSequenceLengthFraction; 181 } 182 183 /** 184 * If the shortest subunit sequence length is higher or equal the 185 * minimumSequenceLengthFraction times the median subunit sequence length, 186 * then the minimumSequenceLength is set to shortest subunit sequence 187 * length, but not shorter than the absoluteMinimumSequenceLength. 188 * <p> 189 * This adaptive feature allows the consideration of structures mainly 190 * constructed by very short chains, such as collagen (1A3I) 191 * 192 * @param minimumSequenceLengthFraction 193 */ 194 public void setMinimumSequenceLengthFraction( 195 double minimumSequenceLengthFraction) { 196 this.minimumSequenceLengthFraction = minimumSequenceLengthFraction; 197 } 198 199 /** 200 * Sequence identity threshold to consider for the subunits clustering. 201 * <p> 202 * Two subunits with sequence identity equal or higher than the threshold 203 * will be clustered together. 204 * 205 * @return sequenceIdentityThreshold 206 */ 207 public double getSequenceIdentityThreshold() { 208 return sequenceIdentityThreshold; 209 } 210 211 /** 212 * Sequence identity threshold to consider for the sequence subunit 213 * clustering. 214 * <p> 215 * Two subunits with sequence identity equal or higher than the threshold 216 * will be clustered together. 217 * 218 * @param sequenceIdentityThreshold 219 */ 220 public void setSequenceIdentityThreshold(double sequenceIdentityThreshold) { 221 this.sequenceIdentityThreshold = sequenceIdentityThreshold; 222 } 223 224 /** 225 * The minimum coverage of the sequence alignment between two subunits to be 226 * clustered together. 227 * 228 * @return sequenceCoverageThreshold 229 */ 230 public double getSequenceCoverageThreshold() { 231 return sequenceCoverageThreshold; 232 } 233 234 /** 235 * The minimum coverage of the sequence alignment between two subunits to be 236 * clustered together. 237 * 238 * @param sequenceCoverageThreshold 239 */ 240 public void setSequenceCoverageThreshold(double sequenceCoverageThreshold) { 241 this.sequenceCoverageThreshold = sequenceCoverageThreshold; 242 } 243 244 /** 245 * Structure similarity threshold (measured with RMSD) to consider for the 246 * structural subunit clustering. 247 * 248 * @return rmsdThreshold 249 */ 250 public double getRMSDThreshold() { 251 return rmsdThreshold; 252 } 253 254 /** 255 * Structure similarity threshold (measured with RMSD) to consider for the 256 * structural subunit clustering. 257 * 258 * @param rmsdThreshold 259 */ 260 public void setRMSDThreshold(double rmsdThreshold) { 261 this.rmsdThreshold = rmsdThreshold; 262 } 263 264 /** 265 * Structure similarity threshold (measured with TMScore) to consider for the 266 * structural subunit clustering. 267 * 268 * @return tmThreshold 269 */ 270 public double getTMThreshold() { 271 return tmThreshold; 272 } 273 274 /** 275 * Structure similarity threshold (measured with TMScore) to consider for the 276 * structural subunit clustering. 277 * 278 * @param tmThreshold 279 */ 280 public void setTMThreshold(double tmThreshold) { 281 this.tmThreshold = tmThreshold; 282 } 283 284 /** 285 * The minimum coverage of the structure alignment between two subunits to be 286 * clustered together. 287 * 288 * @return structureCoverageThreshold 289 */ 290 public double getStructureCoverageThreshold() { 291 return structureCoverageThreshold; 292 } 293 294 /** 295 * The minimum coverage of the structure alignment between two subunits to be 296 * clustered together. 297 * 298 * @param structureCoverageThreshold 299 */ 300 public void setStructureCoverageThreshold(double structureCoverageThreshold) { 301 this.structureCoverageThreshold = structureCoverageThreshold; 302 } 303 304 /** 305 * Method to cluster subunits. 306 * 307 * @return clustererMethod 308 */ 309 public SubunitClustererMethod getClustererMethod() { 310 return clustererMethod; 311 } 312 313 /** 314 * Method to cluster subunits. 315 * 316 * @param method 317 */ 318 public void setClustererMethod(SubunitClustererMethod method) { 319 this.clustererMethod = method; 320 } 321 322 /** 323 * The internal symmetry option divides each {@link Subunit} of each 324 * {@link SubunitCluster} into its internally symmetric repeats. 325 * <p> 326 * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider 327 * internal symmetry, otherwise this parameter will be ignored. 328 * 329 * @return true if internal symmetry is considered, false otherwise 330 */ 331 public boolean isInternalSymmetry() { 332 return internalSymmetry; 333 } 334 335 /** 336 * The internal symmetry option divides each {@link Subunit} of each 337 * {@link SubunitCluster} into its internally symmetric repeats. 338 * <p> 339 * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider 340 * internal symmetry, otherwise this parameter will be ignored. 341 * 342 * @param internalSymmetry 343 * true if internal symmetry is considered, false otherwise 344 */ 345 public void setInternalSymmetry(boolean internalSymmetry) { 346 this.internalSymmetry = internalSymmetry; 347 } 348 349 @Override 350 public String toString() { 351 return "SubunitClustererParameters [minimumSequenceLength=" 352 + minimumSequenceLength + ", absoluteMinimumSequenceLength=" 353 + absoluteMinimumSequenceLength 354 + ", minimumSequenceLengthFraction=" 355 + minimumSequenceLengthFraction 356 + ", sequenceIdentityThreshold=" + sequenceIdentityThreshold 357 + ", rmsdThreshold=" + rmsdThreshold + ", coverageThreshold=" 358 + sequenceCoverageThreshold + ", clustererMethod=" + clustererMethod 359 + ", internalSymmetry=" + internalSymmetry + "]"; 360 } 361 362 /** 363 * Method to superpose subunits (i.e., structural aligner). 364 * 365 * @return superpositionAlgorithm 366 */ 367 public String getSuperpositionAlgorithm() { 368 return superpositionAlgorithm; 369 } 370 371 /** 372 * Method to cluster subunits. 373 * 374 * @param superpositionAlgorithm 375 */ 376 public void setSuperpositionAlgorithm(String superpositionAlgorithm) { 377 this.superpositionAlgorithm = superpositionAlgorithm; 378 } 379 380 /** 381 * Whether the alignment algorithm should try its best to optimize the alignment, 382 * or we are happy with a quick and dirty result. Effect depends on implementation 383 * of the specific algorithm's method. * 384 * 385 * @return optimizeAlignment 386 */ 387 public boolean isOptimizeAlignment() { 388 return optimizeAlignment; 389 } 390 391 /** 392 * Whether the alignment algorithm should try its best to optimize the alignment, 393 * or we are happy with a quick and dirty result. Effect depends on implementation 394 * of the specific algorithm's method. * 395 * 396 * @param optimizeAlignment 397 */ 398 public void setOptimizeAlignment(boolean optimizeAlignment) { 399 this.optimizeAlignment = optimizeAlignment; 400 } 401 402 /** 403 * Use RMSD for evaluating structure similarity 404 * 405 * @return useRMSD 406 */ 407 public boolean isUseRMSD() { return useRMSD; } 408 409 /** 410 * Use RMSD for evaluating structure similarity 411 * 412 * @param useRMSD 413 */ 414 public void setUseRMSD(boolean useRMSD) { 415 this.useRMSD = useRMSD; 416 } 417 418 /** 419 * Use TMScore for evaluating structure similarity 420 * 421 * @return useTMScore 422 */ 423 public boolean isUseTMScore() { 424 return useTMScore; 425 } 426 427 /** 428 * Use TMScore for evaluating structure similarity 429 * 430 * @param useTMScore 431 */ 432 public void setUseTMScore(boolean useTMScore) { 433 this.useTMScore = useTMScore; 434 } 435 436 /** 437 * Use sequence coverage for evaluating sequence similarity 438 * 439 * @return useSequenceCoverage 440 */ 441 public boolean isUseSequenceCoverage() { 442 return useSequenceCoverage; 443 } 444 445 /** 446 * Use sequence coverage for evaluating sequence similarity 447 * 448 * @param useSequenceCoverage 449 */ 450 public void setUseSequenceCoverage(boolean useSequenceCoverage) { 451 this.useSequenceCoverage = useSequenceCoverage; 452 } 453 454 /** 455 * Use structure coverage for evaluating sequence similarity 456 * 457 * @return useStructureCoverage 458 */ 459 public boolean isUseStructureCoverage() { 460 return useStructureCoverage; 461 } 462 463 /** 464 * Use structure coverage for evaluating sequence similarity 465 * 466 * @param useStructureCoverage 467 */ 468 public void setUseStructureCoverage(boolean useStructureCoverage) { 469 this.useStructureCoverage = useStructureCoverage; 470 } 471 472 /** 473 * Use metrics calculated relative to the whole sequence or structure, 474 * rather than the aligned part only 475 * 476 * @return useGlobalMetrics 477 */ 478 public boolean isUseGlobalMetrics() { 479 return useGlobalMetrics; 480 } 481 482 /** 483 * Use metrics calculated relative to the whole sequence or structure, 484 * rather than the aligned part only 485 * 486 * @param useGlobalMetrics 487 */ 488 public void setUseGlobalMetrics(boolean useGlobalMetrics) { 489 this.useGlobalMetrics = useGlobalMetrics; 490 } 491 492 /** 493 * Whether the subunits can be considered "identical" by sequence alignment. 494 * For local sequence alignment (normalized by the number of aligned pairs) 495 * this means 0.95 or higher identity and 0.75 or higher coverage. 496 * For global sequence alignment (normalised by the alignment length) 497 * this means 0.85 or higher sequence identity. 498 * 499 * @param sequenceIdentity 500 * @param sequenceCoverage 501 * @return true if the sequence alignment scores are equal to 502 * or better than the "high confidence" scores, false otherwise. 503 */ 504 public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCoverage) { 505 if (useGlobalMetrics) 506 return sequenceIdentity>=hcSequenceIdentityGlobal; 507 else 508 return sequenceIdentity>=hcSequenceIdentityLocal && sequenceCoverage >= hcSequenceCoverageLocal; 509 } 510 511 /** 512 * Whether to use the entity id of subunits to infer that sequences are identical. 513 * Only applies if the {@link SubunitClustererMethod} is a sequence based one. 514 * @return the flag 515 * @since 5.4.0 516 */ 517 public boolean isUseEntityIdForSeqIdentityDetermination() { 518 return useEntityIdForSeqIdentityDetermination; 519 } 520 521 /** 522 * Whether to use the entity id of subunits to infer that sequences are identical. 523 * Only applies if the {@link SubunitClustererMethod} is a sequence based one. 524 * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be 525 * set to true. 526 * @param useEntityIdForSeqIdentityDetermination the flag to be set 527 * @since 5.4.0 528 */ 529 public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) { 530 this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination; 531 } 532}