001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.cluster;
022
023import org.biojava.nbio.structure.align.ce.CeMain;
024
025import java.io.Serializable;
026
027/**
028 * The SubunitClustererParameters specifies the options used for the clustering
029 * of the subunits in structures using the {@link SubunitClusterer}.
030 *
031 * @author Peter Rose
032 * @author Aleix Lafita
033 * @since 5.0.0
034 *
035 */
036public class SubunitClustererParameters implements Serializable {
037
038        private static final long serialVersionUID = 1L;
039
040        private int minimumSequenceLength = 20;
041        private int absoluteMinimumSequenceLength = 5;
042        private double minimumSequenceLengthFraction = 0.75;
043
044        private boolean useGlobalMetrics;
045        private double sequenceIdentityThreshold;
046        private double sequenceCoverageThreshold = 0.75;
047
048        private boolean useEntityIdForSeqIdentityDetermination = false;
049
050        private double rmsdThreshold = 3.0;
051        private double structureCoverageThreshold = 0.75;
052        private double tmThreshold = 0.5;
053
054        private SubunitClustererMethod clustererMethod = SubunitClustererMethod.SEQUENCE_STRUCTURE;
055
056        private String superpositionAlgorithm = CeMain.algorithmName;
057        private boolean optimizeAlignment = true;
058
059        private boolean useSequenceCoverage;
060        private boolean useRMSD;
061        private boolean useStructureCoverage;
062        private boolean useTMScore;
063
064        private boolean internalSymmetry = false;
065
066        /**
067         * Subunits aligned with these or better scores will be considered "identical".
068         */
069        private static final double hcSequenceIdentityLocal = 0.95;
070        private static final double hcSequenceCoverageLocal = 0.75;
071        private static final double hcSequenceIdentityGlobal = 0.85;
072
073        /**
074         * "Local" metrics are scoring
075         * SubunitClustererMethod.SEQUENCE: sequence identity of a local alignment
076         *                                  (normalised by the number of aligned residues)
077         *                                  sequence coverage of the alignment
078         *                                  (normalised by the length of the longer sequence)
079         * SubunitClustererMethod.STRUCTURE: RMSD of the aligned substructures
080         *                                   and structure coverage of the alignment
081         *                                   (normalised by the length of the larger structure)
082         * Two thresholds for each method are required.
083         *
084         * "Global" metrics are scoring
085         * SubunitClustererMethod.SEQUENCE: sequence identity of a global alignment
086         *                                  (normalised by the length of the alignment)
087         * SubunitClustererMethod.STRUCTURE: TMScore of the aligned structures
088         *                                  (normalised by the length of the larger structure)
089         * One threshold for each method is required.
090         *
091         */
092        public SubunitClustererParameters(boolean useGlobalMetrics) {
093                this.useGlobalMetrics = useGlobalMetrics;
094
095                if (useGlobalMetrics) {
096                        sequenceIdentityThreshold = hcSequenceIdentityGlobal;
097                        useSequenceCoverage = false;
098                        useRMSD = false;
099                        useStructureCoverage = false;
100                        useTMScore = true;
101                } else {
102                        sequenceIdentityThreshold = hcSequenceIdentityLocal;
103                        useSequenceCoverage = true;
104                        useRMSD = true;
105                        useStructureCoverage = true;
106                        useTMScore = false;
107                }
108        }
109
110        /**
111         * Initialize with "local" metrics by default.
112         */
113        public SubunitClustererParameters() {
114                this(false);
115        }
116
117        /**
118         * Get the minimum number of residues of a subunits to be considered in the
119         * clusters.
120         *
121         * @return minimumSequenceLength
122         */
123        public int getMinimumSequenceLength() {
124                return minimumSequenceLength;
125        }
126
127        /**
128         * Set the minimum number of residues of a subunits to be considered in the
129         * clusters.
130         *
131         * @param minimumSequenceLength
132         */
133        public void setMinimumSequenceLength(int minimumSequenceLength) {
134                this.minimumSequenceLength = minimumSequenceLength;
135        }
136
137        /**
138         * If the shortest subunit sequence length is higher or equal the
139         * minimumSequenceLengthFraction times the median subunit sequence length,
140         * then the minimumSequenceLength is set to shortest subunit sequence
141         * length, but not shorter than the absoluteMinimumSequenceLength.
142         * <p>
143         * This adaptive feature allows the consideration of structures mainly
144         * constructed by very short chains, such as collagen (1A3I)
145         *
146         * @return the absoluteMinimumSequenceLength
147         */
148        public int getAbsoluteMinimumSequenceLength() {
149                return absoluteMinimumSequenceLength;
150        }
151
152        /**
153         * If the shortest subunit sequence length is higher or equal the
154         * minimumSequenceLengthFraction times the median subunit sequence length,
155         * then the minimumSequenceLength is set to shortest subunit sequence
156         * length, but not shorter than the absoluteMinimumSequenceLength.
157         * <p>
158         * This adaptive feature allows the consideration of structures mainly
159         * constructed by very short chains, such as collagen (1A3I)
160         *
161         * @param absoluteMinimumSequenceLength
162         */
163        public void setAbsoluteMinimumSequenceLength(
164                        int absoluteMinimumSequenceLength) {
165                this.absoluteMinimumSequenceLength = absoluteMinimumSequenceLength;
166        }
167
168        /**
169         * If the shortest subunit sequence length is higher or equal the
170         * minimumSequenceLengthFraction times the median subunit sequence length,
171         * then the minimumSequenceLength is set to shortest subunit sequence
172         * length, but not shorter than the absoluteMinimumSequenceLength.
173         * <p>
174         * This adaptive feature allows the consideration of structures mainly
175         * constructed by very short chains, such as collagen (1A3I)
176         *
177         * @return the minimumSequenceLengthFraction
178         */
179        public double getMinimumSequenceLengthFraction() {
180                return minimumSequenceLengthFraction;
181        }
182
183        /**
184         * If the shortest subunit sequence length is higher or equal the
185         * minimumSequenceLengthFraction times the median subunit sequence length,
186         * then the minimumSequenceLength is set to shortest subunit sequence
187         * length, but not shorter than the absoluteMinimumSequenceLength.
188         * <p>
189         * This adaptive feature allows the consideration of structures mainly
190         * constructed by very short chains, such as collagen (1A3I)
191         *
192         * @param minimumSequenceLengthFraction
193         */
194        public void setMinimumSequenceLengthFraction(
195                        double minimumSequenceLengthFraction) {
196                this.minimumSequenceLengthFraction = minimumSequenceLengthFraction;
197        }
198
199        /**
200         * Sequence identity threshold to consider for the subunits clustering.
201         * <p>
202         * Two subunits with sequence identity equal or higher than the threshold
203         * will be clustered together.
204         *
205         * @return sequenceIdentityThreshold
206         */
207        public double getSequenceIdentityThreshold() {
208                return sequenceIdentityThreshold;
209        }
210
211        /**
212         * Sequence identity threshold to consider for the sequence subunit
213         * clustering.
214         * <p>
215         * Two subunits with sequence identity equal or higher than the threshold
216         * will be clustered together.
217         *
218         * @param sequenceIdentityThreshold
219         */
220        public void setSequenceIdentityThreshold(double sequenceIdentityThreshold) {
221                this.sequenceIdentityThreshold = sequenceIdentityThreshold;
222        }
223
224        /**
225         * The minimum coverage of the sequence alignment between two subunits to be
226         * clustered together.
227         *
228         * @return sequenceCoverageThreshold
229         */
230        public double getSequenceCoverageThreshold() {
231                return sequenceCoverageThreshold;
232        }
233
234        /**
235         * The minimum coverage of the sequence alignment between two subunits to be
236         * clustered together.
237         *
238         * @param sequenceCoverageThreshold
239         */
240        public void setSequenceCoverageThreshold(double sequenceCoverageThreshold) {
241                this.sequenceCoverageThreshold = sequenceCoverageThreshold;
242        }
243
244        /**
245         * Structure similarity threshold (measured with RMSD) to consider for the
246         * structural subunit clustering.
247         *
248         * @return rmsdThreshold
249         */
250        public double getRMSDThreshold() {
251                return rmsdThreshold;
252        }
253
254        /**
255         * Structure similarity threshold (measured with RMSD) to consider for the
256         * structural subunit clustering.
257         *
258         * @param rmsdThreshold
259         */
260        public void setRMSDThreshold(double rmsdThreshold) {
261                this.rmsdThreshold = rmsdThreshold;
262        }
263
264        /**
265         * Structure similarity threshold (measured with TMScore) to consider for the
266         * structural subunit clustering.
267         *
268         * @return tmThreshold
269         */
270        public double getTMThreshold() {
271                return tmThreshold;
272        }
273
274        /**
275         * Structure similarity threshold (measured with TMScore) to consider for the
276         * structural subunit clustering.
277         *
278         * @param tmThreshold
279         */
280        public void setTMThreshold(double tmThreshold) {
281                this.tmThreshold = tmThreshold;
282        }
283
284        /**
285         * The minimum coverage of the structure alignment between two subunits to be
286         * clustered together.
287         *
288         * @return structureCoverageThreshold
289         */
290        public double getStructureCoverageThreshold() {
291                return structureCoverageThreshold;
292        }
293
294        /**
295         * The minimum coverage of the structure alignment between two subunits to be
296         * clustered together.
297         *
298         * @param structureCoverageThreshold
299         */
300        public void setStructureCoverageThreshold(double structureCoverageThreshold) {
301                this.structureCoverageThreshold = structureCoverageThreshold;
302        }
303
304        /**
305         * Method to cluster subunits.
306         *
307         * @return clustererMethod
308         */
309        public SubunitClustererMethod getClustererMethod() {
310                return clustererMethod;
311        }
312
313        /**
314         * Method to cluster subunits.
315         *
316         * @param method
317         */
318        public void setClustererMethod(SubunitClustererMethod method) {
319                this.clustererMethod = method;
320        }
321
322        /**
323         * The internal symmetry option divides each {@link Subunit} of each
324         * {@link SubunitCluster} into its internally symmetric repeats.
325         * <p>
326         * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider
327         * internal symmetry, otherwise this parameter will be ignored.
328         *
329         * @return true if internal symmetry is considered, false otherwise
330         */
331        public boolean isInternalSymmetry() {
332                return internalSymmetry;
333        }
334
335        /**
336         * The internal symmetry option divides each {@link Subunit} of each
337         * {@link SubunitCluster} into its internally symmetric repeats.
338         * <p>
339         * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider
340         * internal symmetry, otherwise this parameter will be ignored.
341         *
342         * @param internalSymmetry
343         *            true if internal symmetry is considered, false otherwise
344         */
345        public void setInternalSymmetry(boolean internalSymmetry) {
346                this.internalSymmetry = internalSymmetry;
347        }
348
349        @Override
350        public String toString() {
351                return "SubunitClustererParameters [minimumSequenceLength="
352                                + minimumSequenceLength + ", absoluteMinimumSequenceLength="
353                                + absoluteMinimumSequenceLength
354                                + ", minimumSequenceLengthFraction="
355                                + minimumSequenceLengthFraction
356                                + ", sequenceIdentityThreshold=" + sequenceIdentityThreshold
357                                + ", rmsdThreshold=" + rmsdThreshold + ", coverageThreshold="
358                                + sequenceCoverageThreshold + ", clustererMethod=" + clustererMethod
359                                + ", internalSymmetry=" + internalSymmetry + "]";
360        }
361
362        /**
363         * Method to superpose subunits (i.e., structural aligner).
364         *
365         * @return superpositionAlgorithm
366         */
367        public String getSuperpositionAlgorithm() {
368                return superpositionAlgorithm;
369        }
370
371        /**
372         * Method to cluster subunits.
373         *
374         * @param superpositionAlgorithm
375         */
376        public void setSuperpositionAlgorithm(String superpositionAlgorithm) {
377                this.superpositionAlgorithm = superpositionAlgorithm;
378        }
379
380        /**
381         * Whether the alignment algorithm should try its best to optimize the alignment,
382         * or we are happy with a quick and dirty result. Effect depends on implementation
383         * of the specific algorithm's method.   *
384         *
385         * @return optimizeAlignment
386         */
387        public boolean isOptimizeAlignment() {
388                return optimizeAlignment;
389        }
390
391        /**
392         * Whether the alignment algorithm should try its best to optimize the alignment,
393         * or we are happy with a quick and dirty result. Effect depends on implementation
394         * of the specific algorithm's method.   *
395         *
396         * @param optimizeAlignment
397         */
398        public void setOptimizeAlignment(boolean optimizeAlignment) {
399                this.optimizeAlignment = optimizeAlignment;
400        }
401
402        /**
403         * Use RMSD for evaluating structure similarity
404         *
405         * @return useRMSD
406         */
407        public boolean isUseRMSD() { return useRMSD; }
408
409        /**
410         * Use RMSD for evaluating structure similarity
411         *
412         * @param useRMSD
413         */
414        public void setUseRMSD(boolean useRMSD) {
415                this.useRMSD = useRMSD;
416        }
417
418        /**
419         * Use TMScore for evaluating structure similarity
420         *
421         * @return useTMScore
422         */
423        public boolean isUseTMScore() {
424                return useTMScore;
425        }
426
427        /**
428         * Use TMScore for evaluating structure similarity
429         *
430         * @param useTMScore
431         */
432        public void setUseTMScore(boolean useTMScore) {
433                this.useTMScore = useTMScore;
434        }
435
436        /**
437         * Use sequence coverage for evaluating sequence similarity
438         *
439         * @return useSequenceCoverage
440         */
441        public boolean isUseSequenceCoverage() {
442                return useSequenceCoverage;
443        }
444
445        /**
446         * Use sequence coverage for evaluating sequence similarity
447         *
448         * @param useSequenceCoverage
449         */
450        public void setUseSequenceCoverage(boolean useSequenceCoverage) {
451                this.useSequenceCoverage = useSequenceCoverage;
452        }
453
454        /**
455         * Use structure coverage for evaluating sequence similarity
456         *
457         * @return useStructureCoverage
458         */
459        public boolean isUseStructureCoverage() {
460                return useStructureCoverage;
461        }
462
463        /**
464         * Use structure coverage for evaluating sequence similarity
465         *
466         * @param useStructureCoverage
467         */
468        public void setUseStructureCoverage(boolean useStructureCoverage) {
469                this.useStructureCoverage = useStructureCoverage;
470        }
471
472        /**
473         * Use metrics calculated relative to the whole sequence or structure,
474         * rather than the aligned part only
475         *
476         * @return useGlobalMetrics
477         */
478        public boolean isUseGlobalMetrics() {
479                return useGlobalMetrics;
480        }
481
482        /**
483         * Use metrics calculated relative to the whole sequence or structure,
484         * rather than the aligned part only
485         *
486         * @param useGlobalMetrics
487         */
488        public void setUseGlobalMetrics(boolean useGlobalMetrics) {
489                this.useGlobalMetrics = useGlobalMetrics;
490        }
491
492        /**
493         * Whether the subunits can be considered "identical" by sequence alignment.
494         * For local sequence alignment (normalized by the number of aligned pairs)
495         * this means 0.95 or higher identity and 0.75 or higher coverage.
496         * For global sequence alignment (normalised by the alignment length)
497         * this means 0.85 or higher sequence identity.
498         *
499         * @param sequenceIdentity
500         * @param sequenceCoverage
501         * @return true if the sequence alignment scores are equal to
502         * or better than the "high confidence" scores, false otherwise.
503         */
504        public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCoverage) {
505                if (useGlobalMetrics)
506                        return sequenceIdentity>=hcSequenceIdentityGlobal;
507                else
508                        return sequenceIdentity>=hcSequenceIdentityLocal && sequenceCoverage >= hcSequenceCoverageLocal;
509        }
510
511        /**
512         * Whether to use the entity id of subunits to infer that sequences are identical.
513         * Only applies if the {@link SubunitClustererMethod} is a sequence based one.
514         * @return the flag
515         * @since 5.4.0
516         */
517        public boolean isUseEntityIdForSeqIdentityDetermination() {
518                return useEntityIdForSeqIdentityDetermination;
519        }
520
521        /**
522         * Whether to use the entity id of subunits to infer that sequences are identical.
523         * Only applies if the {@link SubunitClustererMethod} is a sequence based one.
524         * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be
525         * set to true.
526         * @param useEntityIdForSeqIdentityDetermination the flag to be set
527         * @since 5.4.0
528         */
529        public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) {
530                this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination;
531        }
532}