001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Sep 15, 2009
021 * Author: Andreas Prlic
022 *
023 */
024
025package org.biojava.nbio.structure.align.ce;
026
027import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
029import org.biojava.nbio.structure.align.util.CliTools;
030import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
031
032import java.util.ArrayList;
033import java.util.List;
034
035
036/**
037 * Contains the parameters that can be sent to CE
038 *
039 * @author Andreas Prlic
040 *
041 */
042public class CeParameters implements ConfigStrucAligParams  {
043
044        protected int winSize;
045        protected double rmsdThr;
046        protected double rmsdThrJoin;
047        protected double maxOptRMSD;
048
049        public static enum ScoringStrategy {
050                CA_SCORING("CA only"),
051                SIDE_CHAIN_SCORING("Sidechain orientation"),
052                SIDE_CHAIN_ANGLE_SCORING("Angle between sidechains"),
053                CA_AND_SIDE_CHAIN_ANGLE_SCORING("CA distance+Angle between sidechains"),
054                SEQUENCE_CONSERVATION("Sequence Conservation");
055                public static ScoringStrategy DEFAULT_SCORING_STRATEGY = CA_SCORING;
056
057                private String name;
058                private ScoringStrategy(String name) {
059                        this.name = name;
060                }
061                @Override
062                public String toString() {
063                        return name;
064                }
065        }
066
067        protected ScoringStrategy scoringStrategy;
068        //String[] alignmentAtoms;
069        protected int maxGapSize;
070
071        protected boolean showAFPRanges;
072        protected int  sideChainScoringType;
073
074        /**
075         * Whether the CE algorithm should extend the best found trace with dynamic programming,
076         * while keeping RMSD at about the same level. (Shindyalov and Bourne, 1998)
077         * This is useful for edge cases with remote homology, but can be slow for large structures.
078         */
079        private boolean optimizeAlignment;
080
081        protected static final double DEFAULT_GAP_OPEN = 5.0;
082        protected static final double DEFAULT_GAP_EXTENSION = 0.5;
083        protected static final double DISTANCE_INCREMENT = 0.5;
084        protected static final double DEFAULT_oRmsdThr = 2.0;
085        protected static final String DEFAULT_SUBSTITUTION_MATRIX = "PRLA000101";
086
087        protected double gapOpen;
088        protected double gapExtension;
089        protected double distanceIncrement;
090        protected double oRmsdThr;
091
092        protected int maxNrIterationsForOptimization;
093
094        protected SubstitutionMatrix<AminoAcidCompound> substitutionMatrix;
095        protected double seqWeight;
096
097        public CeParameters(){
098                reset();
099        }
100
101        @Override
102        public String toString() {
103                return "CeParameters [scoringStrategy=" + scoringStrategy
104                + ", maxGapSize=" + maxGapSize
105                + ", rmsdThr=" + rmsdThr
106                + ", rmsdThrJoin="+ rmsdThrJoin
107                + ", winSize=" + winSize
108                + ", showAFPRanges=" + showAFPRanges
109                + ", maxOptRMSD=" + maxOptRMSD
110                + ", seqWeight=" + seqWeight
111                + "]";
112        }
113
114
115        @Override
116        public void reset(){
117                winSize = 8;
118                rmsdThr = 3.0;
119                rmsdThrJoin = 4.0;
120                scoringStrategy = ScoringStrategy.DEFAULT_SCORING_STRATEGY;
121                maxGapSize = 30;
122                showAFPRanges = false;
123                maxOptRMSD = 99;
124
125                gapOpen = DEFAULT_GAP_OPEN;
126                gapExtension = DEFAULT_GAP_EXTENSION;
127                distanceIncrement = DISTANCE_INCREMENT;
128                oRmsdThr = DEFAULT_oRmsdThr;
129
130                maxNrIterationsForOptimization = Integer.MAX_VALUE;
131                seqWeight = 0;
132                optimizeAlignment = true;
133        }
134
135        /** The window size to look at
136         *
137         * @return window size
138         */
139        public Integer getWinSize() {
140                return winSize;
141        }
142        public void setWinSize(Integer winSize) {
143                this.winSize = winSize;
144        }
145
146        /** RMSD Threshold
147         *
148         * @return RMSD threshold
149         */
150        public Double getRmsdThr() {
151                return rmsdThr;
152        }
153        public void setRmsdThr(Double rmsdThr) {
154                this.rmsdThr = rmsdThr;
155        }
156
157        /** RMSD threshold for joining of AFPs
158         *
159         * @return rmsd threshold
160         */
161        public Double getRmsdThrJoin() {
162                return rmsdThrJoin;
163        }
164        public void setRmsdThrJoin(Double rmsdThrJoin) {
165                this.rmsdThrJoin = rmsdThrJoin;
166        }
167
168        public ScoringStrategy getScoringStrategy()
169        {
170                return scoringStrategy;
171        }
172
173
174        /** Set the scoring strategy to use. 0 is default CE scoring scheme. 1 uses
175         * Side chain orientation.
176         *
177         * @param scoringStrategy
178         */
179        public void setScoringStrategy(ScoringStrategy scoringStrategy)
180        {
181                this.scoringStrategy = scoringStrategy;
182        }
183
184
185
186        /** Set the Max gap size parameter. Default 30. For unlimited gaps set to -1
187         *
188         * @param maxGapSize
189         */
190        public void setMaxGapSize(Integer maxGapSize){
191                this.maxGapSize = maxGapSize;
192        }
193
194        /** the Max gap size parameter G . default is 30, which was
195         * described to obtained empirically in the CE paper.
196         * the larger the max gap size, the longer the compute time,
197         * but in same cases drastically improved results. Set to -1 for unlimited gap size.
198         *
199         * @return max gap size parameter
200         */
201        public Integer getMaxGapSize() {
202                return maxGapSize;
203        }
204
205
206        @Override
207        public List<String> getUserConfigHelp() {
208                List<String> params =new ArrayList<String>();
209                String helpMaxGap = "This parameter configures the maximum gap size G, that is applied during the AFP extension. The larger the value, the longer the calculation time can become, Default value is 30. Set to 0 for no limit. " ;
210                //String helpRmsdThr = "This configures the RMSD threshold applied during the trace of the fragment matrix.";
211                String helpWinSize = "This configures the fragment size m of Aligned Fragment Pairs (AFPs).";
212
213                params.add(helpMaxGap);
214                //params.add(helpRmsdThr);
215                params.add(helpWinSize);
216                params.add("Which scoring function to use: "+CliTools.getEnumValuesAsString(ScoringStrategy.class) );
217                params.add("The maximum RMSD at which to stop alignment optimization. (default: unlimited=99)");
218                params.add("Gap opening penalty during alignment optimization [default: "+DEFAULT_GAP_OPEN+"].");
219                params.add("Gap extension penalty during alignment optimization [default: "+DEFAULT_GAP_EXTENSION+"].");
220                return params;
221        }
222
223        @Override
224        public List<String> getUserConfigParameters() {
225                List<String> params = new ArrayList<String>();
226                params.add("MaxGapSize");
227                //params.add("RmsdThr");
228                params.add("WinSize");
229                params.add("ScoringStrategy");
230                params.add("MaxOptRMSD");
231                params.add("GapOpen");
232                params.add("GapExtension");
233
234                return params;
235        }
236
237        @Override
238        public List<String> getUserConfigParameterNames(){
239                List<String> params = new ArrayList<String>();
240                params.add("max. gap size G (during AFP extension).");
241                //params.add("RMSD threshold during trace of the fragment matrix.");
242                params.add("fragment size m");
243                params.add("Which scoring function to use");
244                params.add("RMSD threshold for alignment.");
245                params.add("Gap open");
246                params.add("Gap extension");
247                return params;
248        }
249
250        @Override
251        @SuppressWarnings("rawtypes")
252        public List<Class> getUserConfigTypes() {
253                List<Class> params = new ArrayList<Class>();
254                params.add(Integer.class);
255                //params.add(Double.class);
256                params.add(Integer.class);
257                params.add(ScoringStrategy.class);
258                params.add(Double.class);
259                params.add(Double.class);
260                params.add(Double.class);
261                return params;
262        }
263
264
265
266        /**
267         * @return whether information about AFPs should be printed
268         */
269        public boolean isShowAFPRanges()
270        {
271                return showAFPRanges;
272        }
273        public void setShowAFPRanges(boolean showAFPRanges)
274        {
275                this.showAFPRanges = showAFPRanges;
276        }
277
278
279
280
281
282        /** set the maximum RMSD cutoff to be applied during alignment optimization. (default: 99 = unlimited)
283         *
284         * @param param maxOptRMSD
285         */
286        public void setMaxOptRMSD(Double param){
287                if ( param == null)
288                        param = 99d;
289                maxOptRMSD = param;
290        }
291
292        /** Returns the maximum RMSD cutoff to be applied during alignment optimization (default: 99 = unlimited)
293         *
294         * @return maxOptRMSD
295         */
296        public Double getMaxOptRMSD()
297        {
298                return maxOptRMSD;
299        }
300
301
302
303        public Double getGapOpen()
304        {
305                return gapOpen;
306        }
307
308
309
310        public void setGapOpen(Double gapOpen)
311        {
312                this.gapOpen = gapOpen;
313        }
314
315
316
317        public Double getGapExtension()
318        {
319                return gapExtension;
320        }
321
322
323
324        public void setGapExtension(Double gapExtension)
325        {
326                this.gapExtension = gapExtension;
327        }
328
329
330
331        public Double getDistanceIncrement()
332        {
333                return distanceIncrement;
334        }
335
336
337
338        public void setDistanceIncrement(Double distanceIncrement)
339        {
340                this.distanceIncrement = distanceIncrement;
341        }
342
343
344
345        /** Get the Original RMSD threshold from which the alignment optimization is started
346         *
347         * @return oRMSDThreshold
348         */
349        public Double getORmsdThr()
350        {
351                return oRmsdThr;
352        }
353
354
355
356        /** Set the Original RMSD threshold from which the alignment optimization is started
357         *
358         * @param oRmsdThr the threshold
359         */
360        public void setORmsdThr(Double oRmsdThr)
361        {
362                this.oRmsdThr = oRmsdThr;
363        }
364
365
366        /** Get the maximum nr of times the (slow) optimiziation of alignment should iterate. Default: unlimited
367         *
368         * @param maxNrIterationsForOptimization
369         */
370        public int getMaxNrIterationsForOptimization() {
371                return maxNrIterationsForOptimization;
372        }
373
374
375        /** Set the maximum nr of times the (slow) optimiziation of alignment should iterate. Default: unlimited
376         *
377         * @param maxNrIterationsForOptimization
378         */
379        public void setMaxNrIterationsForOptimization(int maxNrIterationsForOptimization) {
380                this.maxNrIterationsForOptimization = maxNrIterationsForOptimization;
381        }
382
383
384        /** Should sequence conservation be considered as part of the alignment? If yes, this weight factor allows to determine how much.
385         *  By default this is set to 0, meaning no contribution of the sequence alignment score.
386         *
387         * @return seqWeight the weight factor (default 0)
388         */
389
390        public double getSeqWeight() {
391                return seqWeight;
392        }
393
394
395        /** Should sequence conservation be considered as part of the alignment? If yes, this weight factor allows to determine how much.
396         *  By default this is set to 0, meaning no contribution of the sequence alignment score.
397         *
398         * @param seqWeight the weight factor (default 0)
399         */
400        public void setSeqWeight(double seqWeight) {
401                this.seqWeight = seqWeight;
402        }
403
404
405        /** Sets the  substitution matrix to be used for influencing the alignment with sequence conservation information.
406         * Default: SDM matrix (Prlic et al 2000)
407         * @return substitutionMatrix
408         */
409        public SubstitutionMatrix<AminoAcidCompound> getSubstitutionMatrix() {
410                if ( substitutionMatrix == null){
411                        substitutionMatrix = SubstitutionMatrixHelper.getMatrixFromAAINDEX(DEFAULT_SUBSTITUTION_MATRIX);
412
413                }
414                return substitutionMatrix;
415        }
416
417
418        /** Sets the  substitution matrix to be used for influencing the alignment with sequence conservation information.
419         * Default: SDM matrix (Prlic et al 2000)
420         * @param substitutionMatrix
421         */
422        public void setSubstitutionMatrix(
423                        SubstitutionMatrix<AminoAcidCompound> substitutionMatrix) {
424                this.substitutionMatrix = substitutionMatrix;
425        }
426
427
428        /**
429         * Whether the CE algorithm should extend the best found trace with dynamic programming,
430         * while keeping RMSD at about the same level. This is useful for edge cases with remote homology,
431         * but can be slow for large structures.
432         *
433         * @return optimizeAlignment
434         */
435        public boolean isOptimizeAlignment() {
436                return optimizeAlignment;
437        }
438
439        /**
440         * Whether the CE algorithm should extend the best found trace with dynamic programming,
441         * while keeping RMSD at about the same level. This is useful for edge cases with remote homology,
442         * but can be slow for large structures.
443         *
444         * @param optimizeAlignment
445         */
446        public void setOptimizeAlignment(boolean optimizeAlignment) {
447                this.optimizeAlignment = optimizeAlignment;
448        }
449
450}