001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Sep 15, 2009 021 * Author: Andreas Prlic 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.structure.align.util.CliTools; 030import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 031 032import java.util.ArrayList; 033import java.util.List; 034 035 036/** 037 * Contains the parameters that can be sent to CE 038 * 039 * @author Andreas Prlic 040 * 041 */ 042public class CeParameters implements ConfigStrucAligParams { 043 044 protected int winSize; 045 protected double rmsdThr; 046 protected double rmsdThrJoin; 047 protected double maxOptRMSD; 048 049 public static enum ScoringStrategy { 050 CA_SCORING("CA only"), 051 SIDE_CHAIN_SCORING("Sidechain orientation"), 052 SIDE_CHAIN_ANGLE_SCORING("Angle between sidechains"), 053 CA_AND_SIDE_CHAIN_ANGLE_SCORING("CA distance+Angle between sidechains"), 054 SEQUENCE_CONSERVATION("Sequence Conservation"); 055 public static ScoringStrategy DEFAULT_SCORING_STRATEGY = CA_SCORING; 056 057 private String name; 058 private ScoringStrategy(String name) { 059 this.name = name; 060 } 061 @Override 062 public String toString() { 063 return name; 064 } 065 } 066 067 protected ScoringStrategy scoringStrategy; 068 //String[] alignmentAtoms; 069 protected int maxGapSize; 070 071 protected boolean showAFPRanges; 072 protected int sideChainScoringType; 073 074 /** 075 * Whether the CE algorithm should extend the best found trace with dynamic programming, 076 * while keeping RMSD at about the same level. (Shindyalov and Bourne, 1998) 077 * This is useful for edge cases with remote homology, but can be slow for large structures. 078 */ 079 private boolean optimizeAlignment; 080 081 protected static final double DEFAULT_GAP_OPEN = 5.0; 082 protected static final double DEFAULT_GAP_EXTENSION = 0.5; 083 protected static final double DISTANCE_INCREMENT = 0.5; 084 protected static final double DEFAULT_oRmsdThr = 2.0; 085 protected static final String DEFAULT_SUBSTITUTION_MATRIX = "PRLA000101"; 086 087 protected double gapOpen; 088 protected double gapExtension; 089 protected double distanceIncrement; 090 protected double oRmsdThr; 091 092 protected int maxNrIterationsForOptimization; 093 094 protected SubstitutionMatrix<AminoAcidCompound> substitutionMatrix; 095 protected double seqWeight; 096 097 public CeParameters(){ 098 reset(); 099 } 100 101 @Override 102 public String toString() { 103 return "CeParameters [scoringStrategy=" + scoringStrategy 104 + ", maxGapSize=" + maxGapSize 105 + ", rmsdThr=" + rmsdThr 106 + ", rmsdThrJoin="+ rmsdThrJoin 107 + ", winSize=" + winSize 108 + ", showAFPRanges=" + showAFPRanges 109 + ", maxOptRMSD=" + maxOptRMSD 110 + ", seqWeight=" + seqWeight 111 + "]"; 112 } 113 114 115 @Override 116 public void reset(){ 117 winSize = 8; 118 rmsdThr = 3.0; 119 rmsdThrJoin = 4.0; 120 scoringStrategy = ScoringStrategy.DEFAULT_SCORING_STRATEGY; 121 maxGapSize = 30; 122 showAFPRanges = false; 123 maxOptRMSD = 99; 124 125 gapOpen = DEFAULT_GAP_OPEN; 126 gapExtension = DEFAULT_GAP_EXTENSION; 127 distanceIncrement = DISTANCE_INCREMENT; 128 oRmsdThr = DEFAULT_oRmsdThr; 129 130 maxNrIterationsForOptimization = Integer.MAX_VALUE; 131 seqWeight = 0; 132 optimizeAlignment = true; 133 } 134 135 /** The window size to look at 136 * 137 * @return window size 138 */ 139 public Integer getWinSize() { 140 return winSize; 141 } 142 public void setWinSize(Integer winSize) { 143 this.winSize = winSize; 144 } 145 146 /** RMSD Threshold 147 * 148 * @return RMSD threshold 149 */ 150 public Double getRmsdThr() { 151 return rmsdThr; 152 } 153 public void setRmsdThr(Double rmsdThr) { 154 this.rmsdThr = rmsdThr; 155 } 156 157 /** RMSD threshold for joining of AFPs 158 * 159 * @return rmsd threshold 160 */ 161 public Double getRmsdThrJoin() { 162 return rmsdThrJoin; 163 } 164 public void setRmsdThrJoin(Double rmsdThrJoin) { 165 this.rmsdThrJoin = rmsdThrJoin; 166 } 167 168 public ScoringStrategy getScoringStrategy() 169 { 170 return scoringStrategy; 171 } 172 173 174 /** Set the scoring strategy to use. 0 is default CE scoring scheme. 1 uses 175 * Side chain orientation. 176 * 177 * @param scoringStrategy 178 */ 179 public void setScoringStrategy(ScoringStrategy scoringStrategy) 180 { 181 this.scoringStrategy = scoringStrategy; 182 } 183 184 185 186 /** Set the Max gap size parameter. Default 30. For unlimited gaps set to -1 187 * 188 * @param maxGapSize 189 */ 190 public void setMaxGapSize(Integer maxGapSize){ 191 this.maxGapSize = maxGapSize; 192 } 193 194 /** the Max gap size parameter G . default is 30, which was 195 * described to obtained empirically in the CE paper. 196 * the larger the max gap size, the longer the compute time, 197 * but in same cases drastically improved results. Set to -1 for unlimited gap size. 198 * 199 * @return max gap size parameter 200 */ 201 public Integer getMaxGapSize() { 202 return maxGapSize; 203 } 204 205 206 @Override 207 public List<String> getUserConfigHelp() { 208 List<String> params =new ArrayList<>(); 209 String helpMaxGap = "This parameter configures the maximum gap size G, that is applied during the AFP extension. The larger the value, the longer the calculation time can become, Default value is 30. Set to 0 for no limit. " ; 210 //String helpRmsdThr = "This configures the RMSD threshold applied during the trace of the fragment matrix."; 211 String helpWinSize = "This configures the fragment size m of Aligned Fragment Pairs (AFPs)."; 212 213 params.add(helpMaxGap); 214 //params.add(helpRmsdThr); 215 params.add(helpWinSize); 216 params.add("Which scoring function to use: "+CliTools.getEnumValuesAsString(ScoringStrategy.class) ); 217 params.add("The maximum RMSD at which to stop alignment optimization. (default: unlimited=99)"); 218 params.add("Gap opening penalty during alignment optimization [default: "+DEFAULT_GAP_OPEN+"]."); 219 params.add("Gap extension penalty during alignment optimization [default: "+DEFAULT_GAP_EXTENSION+"]."); 220 return params; 221 } 222 223 @Override 224 public List<String> getUserConfigParameters() { 225 List<String> params = new ArrayList<>(); 226 params.add("MaxGapSize"); 227 //params.add("RmsdThr"); 228 params.add("WinSize"); 229 params.add("ScoringStrategy"); 230 params.add("MaxOptRMSD"); 231 params.add("GapOpen"); 232 params.add("GapExtension"); 233 234 return params; 235 } 236 237 @Override 238 public List<String> getUserConfigParameterNames(){ 239 List<String> params = new ArrayList<>(); 240 params.add("max. gap size G (during AFP extension)."); 241 //params.add("RMSD threshold during trace of the fragment matrix."); 242 params.add("fragment size m"); 243 params.add("Which scoring function to use"); 244 params.add("RMSD threshold for alignment."); 245 params.add("Gap open"); 246 params.add("Gap extension"); 247 return params; 248 } 249 250 @Override 251 @SuppressWarnings("rawtypes") 252 public List<Class> getUserConfigTypes() { 253 List<Class> params = new ArrayList<>(); 254 params.add(Integer.class); 255 //params.add(Double.class); 256 params.add(Integer.class); 257 params.add(ScoringStrategy.class); 258 params.add(Double.class); 259 params.add(Double.class); 260 params.add(Double.class); 261 return params; 262 } 263 264 265 266 /** 267 * @return whether information about AFPs should be printed 268 */ 269 public boolean isShowAFPRanges() 270 { 271 return showAFPRanges; 272 } 273 public void setShowAFPRanges(boolean showAFPRanges) 274 { 275 this.showAFPRanges = showAFPRanges; 276 } 277 278 279 280 281 282 /** set the maximum RMSD cutoff to be applied during alignment optimization. (default: 99 = unlimited) 283 * 284 * @param param maxOptRMSD 285 */ 286 public void setMaxOptRMSD(Double param){ 287 if ( param == null) 288 param = 99d; 289 maxOptRMSD = param; 290 } 291 292 /** Returns the maximum RMSD cutoff to be applied during alignment optimization (default: 99 = unlimited) 293 * 294 * @return maxOptRMSD 295 */ 296 public Double getMaxOptRMSD() 297 { 298 return maxOptRMSD; 299 } 300 301 302 303 public Double getGapOpen() 304 { 305 return gapOpen; 306 } 307 308 309 310 public void setGapOpen(Double gapOpen) 311 { 312 this.gapOpen = gapOpen; 313 } 314 315 316 317 public Double getGapExtension() 318 { 319 return gapExtension; 320 } 321 322 323 324 public void setGapExtension(Double gapExtension) 325 { 326 this.gapExtension = gapExtension; 327 } 328 329 330 331 public Double getDistanceIncrement() 332 { 333 return distanceIncrement; 334 } 335 336 337 338 public void setDistanceIncrement(Double distanceIncrement) 339 { 340 this.distanceIncrement = distanceIncrement; 341 } 342 343 344 345 /** 346 * Get the Original RMSD threshold from which the alignment optimization is started 347 * 348 * @return oRMSDThreshold 349 */ 350 public Double getORmsdThr() 351 { 352 return oRmsdThr; 353 } 354 355 356 357 /** 358 * Set the Original RMSD threshold from which the alignment optimization is started 359 * 360 * @param oRmsdThr the threshold 361 */ 362 public void setORmsdThr(Double oRmsdThr) 363 { 364 this.oRmsdThr = oRmsdThr; 365 } 366 367 368 /** 369 * Get the maximum nr of times the (slow) optimiziation of alignment should iterate. Default: unlimited 370 * 371 */ 372 public int getMaxNrIterationsForOptimization() { 373 return maxNrIterationsForOptimization; 374 } 375 376 377 /** 378 * Set the maximum nr of times the (slow) optimiziation of alignment should iterate. Default: unlimited 379 * 380 * @param maxNrIterationsForOptimization 381 */ 382 public void setMaxNrIterationsForOptimization(int maxNrIterationsForOptimization) { 383 this.maxNrIterationsForOptimization = maxNrIterationsForOptimization; 384 } 385 386 387 /** 388 * Should sequence conservation be considered as part of the alignment? If yes, this weight factor allows to determine how much. 389 * By default this is set to 0, meaning no contribution of the sequence alignment score. 390 * 391 * @return seqWeight the weight factor (default 0) 392 */ 393 394 public double getSeqWeight() { 395 return seqWeight; 396 } 397 398 399 /** 400 * Should sequence conservation be considered as part of the alignment? If yes, this weight factor allows to determine how much. 401 * By default this is set to 0, meaning no contribution of the sequence alignment score. 402 * 403 * @param seqWeight the weight factor (default 0) 404 */ 405 public void setSeqWeight(double seqWeight) { 406 this.seqWeight = seqWeight; 407 } 408 409 410 /** Sets the substitution matrix to be used for influencing the alignment with sequence conservation information. 411 * Default: SDM matrix (Prlic et al 2000) 412 * @return substitutionMatrix 413 */ 414 public SubstitutionMatrix<AminoAcidCompound> getSubstitutionMatrix() { 415 if ( substitutionMatrix == null){ 416 substitutionMatrix = SubstitutionMatrixHelper.getMatrixFromAAINDEX(DEFAULT_SUBSTITUTION_MATRIX); 417 418 } 419 return substitutionMatrix; 420 } 421 422 423 /** Sets the substitution matrix to be used for influencing the alignment with sequence conservation information. 424 * Default: SDM matrix (Prlic et al 2000) 425 * @param substitutionMatrix 426 */ 427 public void setSubstitutionMatrix( 428 SubstitutionMatrix<AminoAcidCompound> substitutionMatrix) { 429 this.substitutionMatrix = substitutionMatrix; 430 } 431 432 433 /** 434 * Whether the CE algorithm should extend the best found trace with dynamic programming, 435 * while keeping RMSD at about the same level. This is useful for edge cases with remote homology, 436 * but can be slow for large structures. 437 * 438 * @return optimizeAlignment 439 */ 440 public boolean isOptimizeAlignment() { 441 return optimizeAlignment; 442 } 443 444 /** 445 * Whether the CE algorithm should extend the best found trace with dynamic programming, 446 * while keeping RMSD at about the same level. This is useful for edge cases with remote homology, 447 * but can be slow for large structures. 448 * 449 * @param optimizeAlignment 450 */ 451 public void setOptimizeAlignment(boolean optimizeAlignment) { 452 this.optimizeAlignment = optimizeAlignment; 453 } 454 455}