001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.cluster;
022
023import org.biojava.nbio.structure.align.ce.CeMain;
024
025import java.io.Serializable;
026
027/**
028 * The SubunitClustererParameters specifies the options used for the clustering
029 * of the subunits in structures using the {@link SubunitClusterer}.
030 *
031 * @author Peter Rose
032 * @author Aleix Lafita
033 * @since 5.0.0
034 *
035 */
036public class SubunitClustererParameters implements Serializable {
037
038        private static final long serialVersionUID = 1L;
039
040        private int minimumSequenceLength = 20;
041        private int absoluteMinimumSequenceLength = 5;
042        private double minimumSequenceLengthFraction = 0.75;
043
044        private boolean useGlobalMetrics;
045        private double sequenceIdentityThreshold;
046        private double sequenceCoverageThreshold = 0.75;
047
048        private double rmsdThreshold = 3.0;
049        private double structureCoverageThreshold = 0.75;
050        private double tmThreshold = 0.5;
051
052        private SubunitClustererMethod clustererMethod = SubunitClustererMethod.SEQUENCE_STRUCTURE;
053
054        private String superpositionAlgorithm = CeMain.algorithmName;
055        private boolean optimizeAlignment = true;
056
057        private boolean useSequenceCoverage;
058        private boolean useRMSD;
059        private boolean useStructureCoverage;
060        private boolean useTMScore;
061
062        private boolean internalSymmetry = false;
063
064        /**
065         * Subunits aligned with these or better scores will be considered "identical".
066         */
067        private static final double hcSequenceIdentityLocal = 0.95;
068        private static final double hcSequenceCoverageLocal = 0.75;
069        private static final double hcSequenceIdentityGlobal = 0.85;
070
071        /**
072         * "Local" metrics are scoring
073         * SubunitClustererMethod.SEQUENCE: sequence identity of a local alignment
074         *                                  (normalised by the number of aligned residues)
075         *                                  sequence coverage of the alignment
076         *                                  (normalised by the length of the longer sequence)
077         * SubunitClustererMethod.STRUCTURE: RMSD of the aligned substructures
078         *                                   and structure coverage of the alignment
079         *                                   (normalised by the length of the larger structure)
080         * Two thresholds for each method are required.
081         *
082         * "Global" metrics are scoring
083         * SubunitClustererMethod.SEQUENCE: sequence identity of a global alignment
084         *                                  (normalised by the length of the alignment)
085         * SubunitClustererMethod.STRUCTURE: TMScore of the aligned structures
086         *                                  (normalised by the length of the larger structure)
087         * One threshold for each method is required.
088         *
089         */
090        public SubunitClustererParameters(boolean useGlobalMetrics) {
091                this.useGlobalMetrics = useGlobalMetrics;
092
093                if (useGlobalMetrics) {
094                        sequenceIdentityThreshold = hcSequenceIdentityGlobal;
095                        useSequenceCoverage = false;
096                        useRMSD = false;
097                        useStructureCoverage = false;
098                        useTMScore = true;
099                } else {
100                        sequenceIdentityThreshold = hcSequenceIdentityLocal;
101                        useSequenceCoverage = true;
102                        useRMSD = true;
103                        useStructureCoverage = true;
104                        useTMScore = false;
105                }
106        }
107
108        /**
109         * Initialize with "local" metrics by default.
110         */
111        public SubunitClustererParameters() {
112                this(false);
113        }
114
115        /**
116         * Get the minimum number of residues of a subunits to be considered in the
117         * clusters.
118         *
119         * @return minimumSequenceLength
120         */
121        public int getMinimumSequenceLength() {
122                return minimumSequenceLength;
123        }
124
125        /**
126         * Set the minimum number of residues of a subunits to be considered in the
127         * clusters.
128         *
129         * @param minimumSequenceLength
130         */
131        public void setMinimumSequenceLength(int minimumSequenceLength) {
132                this.minimumSequenceLength = minimumSequenceLength;
133        }
134
135        /**
136         * If the shortest subunit sequence length is higher or equal the
137         * minimumSequenceLengthFraction times the median subunit sequence length,
138         * then the minimumSequenceLength is set to shortest subunit sequence
139         * length, but not shorter than the absoluteMinimumSequenceLength.
140         * <p>
141         * This adaptive feature allows the consideration of structures mainly
142         * constructed by very short chains, such as collagen (1A3I)
143         *
144         * @return the absoluteMinimumSequenceLength
145         */
146        public int getAbsoluteMinimumSequenceLength() {
147                return absoluteMinimumSequenceLength;
148        }
149
150        /**
151         * If the shortest subunit sequence length is higher or equal the
152         * minimumSequenceLengthFraction times the median subunit sequence length,
153         * then the minimumSequenceLength is set to shortest subunit sequence
154         * length, but not shorter than the absoluteMinimumSequenceLength.
155         * <p>
156         * This adaptive feature allows the consideration of structures mainly
157         * constructed by very short chains, such as collagen (1A3I)
158         *
159         * @param absoluteMinimumSequenceLength
160         */
161        public void setAbsoluteMinimumSequenceLength(
162                        int absoluteMinimumSequenceLength) {
163                this.absoluteMinimumSequenceLength = absoluteMinimumSequenceLength;
164        }
165
166        /**
167         * If the shortest subunit sequence length is higher or equal the
168         * minimumSequenceLengthFraction times the median subunit sequence length,
169         * then the minimumSequenceLength is set to shortest subunit sequence
170         * length, but not shorter than the absoluteMinimumSequenceLength.
171         * <p>
172         * This adaptive feature allows the consideration of structures mainly
173         * constructed by very short chains, such as collagen (1A3I)
174         *
175         * @return the minimumSequenceLengthFraction
176         */
177        public double getMinimumSequenceLengthFraction() {
178                return minimumSequenceLengthFraction;
179        }
180
181        /**
182         * If the shortest subunit sequence length is higher or equal the
183         * minimumSequenceLengthFraction times the median subunit sequence length,
184         * then the minimumSequenceLength is set to shortest subunit sequence
185         * length, but not shorter than the absoluteMinimumSequenceLength.
186         * <p>
187         * This adaptive feature allows the consideration of structures mainly
188         * constructed by very short chains, such as collagen (1A3I)
189         *
190         * @param minimumSequenceLengthFraction
191         */
192        public void setMinimumSequenceLengthFraction(
193                        double minimumSequenceLengthFraction) {
194                this.minimumSequenceLengthFraction = minimumSequenceLengthFraction;
195        }
196
197        /**
198         * Sequence identity threshold to consider for the subunits clustering.
199         * <p>
200         * Two subunits with sequence identity equal or higher than the threshold
201         * will be clustered together.
202         *
203         * @return sequenceIdentityThreshold
204         */
205        public double getSequenceIdentityThreshold() {
206                return sequenceIdentityThreshold;
207        }
208
209        /**
210         * Sequence identity threshold to consider for the sequence subunit
211         * clustering.
212         * <p>
213         * Two subunits with sequence identity equal or higher than the threshold
214         * will be clustered together.
215         *
216         * @param sequenceIdentityThreshold
217         */
218        public void setSequenceIdentityThreshold(double sequenceIdentityThreshold) {
219                this.sequenceIdentityThreshold = sequenceIdentityThreshold;
220        }
221
222        /**
223         * The minimum coverage of the sequence alignment between two subunits to be
224         * clustered together.
225         *
226         * @return sequenceCoverageThreshold
227         */
228        public double getSequenceCoverageThreshold() {
229                return sequenceCoverageThreshold;
230        }
231
232        /**
233         * The minimum coverage of the sequence alignment between two subunits to be
234         * clustered together.
235         *
236         * @param sequenceCoverageThreshold
237         */
238        public void setSequenceCoverageThreshold(double sequenceCoverageThreshold) {
239                this.sequenceCoverageThreshold = sequenceCoverageThreshold;
240        }
241
242        /**
243         * Structure similarity threshold (measured with RMSD) to consider for the
244         * structural subunit clustering.
245         *
246         * @return rmsdThreshold
247         */
248        public double getRMSDThreshold() {
249                return rmsdThreshold;
250        }
251
252        /**
253         * Structure similarity threshold (measured with RMSD) to consider for the
254         * structural subunit clustering.
255         *
256         * @param rmsdThreshold
257         */
258        public void setRMSDThreshold(double rmsdThreshold) {
259                this.rmsdThreshold = rmsdThreshold;
260        }
261
262        /**
263         * Structure similarity threshold (measured with TMScore) to consider for the
264         * structural subunit clustering.
265         *
266         * @return tmThreshold
267         */
268        public double getTMThreshold() {
269                return tmThreshold;
270        }
271
272        /**
273         * Structure similarity threshold (measured with TMScore) to consider for the
274         * structural subunit clustering.
275         *
276         * @param tmThreshold
277         */
278        public void setTMThreshold(double tmThreshold) {
279                this.tmThreshold = tmThreshold;
280        }
281
282        /**
283         * The minimum coverage of the structure alignment between two subunits to be
284         * clustered together.
285         *
286         * @return structureCoverageThreshold
287         */
288        public double getStructureCoverageThreshold() {
289                return structureCoverageThreshold;
290        }
291
292        /**
293         * The minimum coverage of the structure alignment between two subunits to be
294         * clustered together.
295         *
296         * @param structureCoverageThreshold
297         */
298        public void setStructureCoverageThreshold(double structureCoverageThreshold) {
299                this.structureCoverageThreshold = structureCoverageThreshold;
300        }
301
302        /**
303         * Method to cluster subunits.
304         *
305         * @return clustererMethod
306         */
307        public SubunitClustererMethod getClustererMethod() {
308                return clustererMethod;
309        }
310
311        /**
312         * Method to cluster subunits.
313         *
314         * @param method
315         */
316        public void setClustererMethod(SubunitClustererMethod method) {
317                this.clustererMethod = method;
318        }
319
320        /**
321         * The internal symmetry option divides each {@link Subunit} of each
322         * {@link SubunitCluster} into its internally symmetric repeats.
323         * <p>
324         * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider
325         * internal symmetry, otherwise this parameter will be ignored.
326         *
327         * @return true if internal symmetry is considered, false otherwise
328         */
329        public boolean isInternalSymmetry() {
330                return internalSymmetry;
331        }
332
333        /**
334         * The internal symmetry option divides each {@link Subunit} of each
335         * {@link SubunitCluster} into its internally symmetric repeats.
336         * <p>
337         * The {@link SubunitClustererMethod#STRUCTURE} must be chosen to consider
338         * internal symmetry, otherwise this parameter will be ignored.
339         *
340         * @param internalSymmetry
341         *            true if internal symmetry is considered, false otherwise
342         */
343        public void setInternalSymmetry(boolean internalSymmetry) {
344                this.internalSymmetry = internalSymmetry;
345        }
346
347        @Override
348        public String toString() {
349                return "SubunitClustererParameters [minimumSequenceLength="
350                                + minimumSequenceLength + ", absoluteMinimumSequenceLength="
351                                + absoluteMinimumSequenceLength
352                                + ", minimumSequenceLengthFraction="
353                                + minimumSequenceLengthFraction
354                                + ", sequenceIdentityThreshold=" + sequenceIdentityThreshold
355                                + ", rmsdThreshold=" + rmsdThreshold + ", coverageThreshold="
356                                + sequenceCoverageThreshold + ", clustererMethod=" + clustererMethod
357                                + ", internalSymmetry=" + internalSymmetry + "]";
358        }
359
360        /**
361         * Method to superpose subunits (i.e., structural aligner).
362         *
363         * @return superpositionAlgorithm
364         */
365        public String getSuperpositionAlgorithm() {
366                return superpositionAlgorithm;
367        }
368
369        /**
370         * Method to cluster subunits.
371         *
372         * @param superpositionAlgorithm
373         */
374        public void setSuperpositionAlgorithm(String superpositionAlgorithm) {
375                this.superpositionAlgorithm = superpositionAlgorithm;
376        }
377
378        /**
379         * Whether the alignment algorithm should try its best to optimize the alignment,
380         * or we are happy with a quick and dirty result. Effect depends on implementation
381         * of the specific algorithm's method.   *
382         *
383         * @return optimizeAlignment
384         */
385        public boolean isOptimizeAlignment() {
386                return optimizeAlignment;
387        }
388
389        /**
390         * Whether the alignment algorithm should try its best to optimize the alignment,
391         * or we are happy with a quick and dirty result. Effect depends on implementation
392         * of the specific algorithm's method.   *
393         *
394         * @param optimizeAlignment
395         */
396        public void setOptimizeAlignment(boolean optimizeAlignment) {
397                this.optimizeAlignment = optimizeAlignment;
398        }
399
400        /**
401         * Use RMSD for evaluating structure similarity
402         *
403         * @return useRMSD
404         */
405        public boolean isUseRMSD() { return useRMSD; }
406
407        /**
408         * Use RMSD for evaluating structure similarity
409         *
410         * @param useRMSD
411         */
412        public void setUseRMSD(boolean useRMSD) {
413                this.useRMSD = useRMSD;
414        }
415
416        /**
417         * Use TMScore for evaluating structure similarity
418         *
419         * @return useTMScore
420         */
421        public boolean isUseTMScore() {
422                return useTMScore;
423        }
424
425        /**
426         * Use TMScore for evaluating structure similarity
427         *
428         * @param useTMScore
429         */
430        public void setUseTMScore(boolean useTMScore) {
431                this.useTMScore = useTMScore;
432        }
433
434        /**
435         * Use sequence coverage for evaluating sequence similarity
436         *
437         * @return useSequenceCoverage
438         */
439        public boolean isUseSequenceCoverage() {
440                return useSequenceCoverage;
441        }
442
443        /**
444         * Use sequence coverage for evaluating sequence similarity
445         *
446         * @param useSequenceCoverage
447         */
448        public void setUseSequenceCoverage(boolean useSequenceCoverage) {
449                this.useSequenceCoverage = useSequenceCoverage;
450        }
451
452        /**
453         * Use structure coverage for evaluating sequence similarity
454         *
455         * @return useStructureCoverage
456         */
457        public boolean isUseStructureCoverage() {
458                return useStructureCoverage;
459        }
460
461        /**
462         * Use structure coverage for evaluating sequence similarity
463         *
464         * @param useStructureCoverage
465         */
466        public void setUseStructureCoverage(boolean useStructureCoverage) {
467                this.useStructureCoverage = useStructureCoverage;
468        }
469
470        /**
471         * Use metrics calculated relative to the whole sequence or structure,
472         * rather than the aligned part only
473         *
474         * @return useGlobalMetrics
475         */
476        public boolean isUseGlobalMetrics() {
477                return useGlobalMetrics;
478        }
479
480        /**
481         * Use metrics calculated relative to the whole sequence or structure,
482         * rather than the aligned part only
483         *
484         * @param useGlobalMetrics
485         */
486        public void setUseGlobalMetrics(boolean useGlobalMetrics) {
487                this.useGlobalMetrics = useGlobalMetrics;
488        }
489
490        /**
491         * Whether the subunits can be considered "identical" by sequence alignment.
492         * For local sequence alignment (normalized by the number of aligned pairs)
493         * this means 0.95 or higher identity and 0.75 or higher coverage.
494         * For global sequence alignment (normalised by the alignment length)
495         * this means 0.85 or higher sequence identity.
496         *
497         * @param sequenceIdentity
498         * @param sequenceCoverage
499         * @return true if the sequence alignment scores are equal to
500         * or better than the "high confidence" scores, false otherwise.
501         */
502        public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCoverage) {
503                if (useGlobalMetrics)
504                        return sequenceIdentity>=hcSequenceIdentityGlobal;
505                else
506                        return sequenceIdentity>=hcSequenceIdentityLocal && sequenceCoverage >= hcSequenceCoverageLocal;
507        }
508
509
510}