001/*
002 * @(#)RonnConstraint.java      1.0 June 2010
003 *
004 * Copyright (c) 2010 Peter Troshin
005 *
006 * JRONN version: 3.1
007 *
008 *        BioJava development code
009 *
010 * This code may be freely distributed and modified under the
011 * terms of the GNU Lesser General Public Licence.  This should
012 * be distributed with the code.  If you do not have a copy,
013 * see:
014 *
015 *      http://www.gnu.org/copyleft/lesser.html
016 *
017 * Copyright for this code is held jointly by the individual
018 * authors.  These should be listed in @author doc comments.
019 *
020 * For more information on the BioJava project and its aims,
021 * or to join the biojava-l mailing list, visit the home page
022 * at:
023 *
024 *      http://www.biojava.org/
025 *
026 */
027
028package org.biojava.nbio.ronn;
029
030
031/**
032 * A collection of various constrain values used by RONN
033 *
034 * @author Peter Troshin
035 * @version 1.0
036 * @since 3.0.2
037 */
038public final class RonnConstraint {
039
040        public final static float DEFAULT_DISORDER = 0.53f;
041        public final static float DEFAULT_ORDER = 0.47f;
042
043        public final static float DEFAULT_RANGE_PROBABILITY_THRESHOLD = 0.50f;
044
045        // A b C D E F G H I j K L M N o P Q R S T u V W x Y
046        //
047        // 0 0 1 2 3 4 5 6 7 0 8 9 10 11 0 12 13 14 15 16 0 17 18 0 19
048
049        public static final short[] INDEX = new short[] { 0, 0, 1, 2, 3, 4, 5, 6,
050                7, 0, 8, 9, 10, 11, 0, 12, 13, 14, 15, 16, 0, 17, 18, 0, 19 };
051
052        public static final short[][] Blosum62 = new short[][] {
053                { 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1,
054                        -2, -3, -2 },
055                { 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1,
056                        -1, -2, -2 },
057                { -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, 1, -3,
058                        -4, -3 },
059                { -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, 0, -3,
060                        -3, -2 },
061                { -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2,
062                        -1, 1, 3 },
063                { 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, -2, -2, -2, -2, 0, 1,
064                        0, -2, -3 },
065                { -2, -3, 1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, 0, -2,
066                        -2, 2 },
067                { -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -2,
068                        1, -3, -1 },
069                { -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, 0, -3,
070                        -3, -2 },
071                { -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -2,
072                        3, -2, -1 },
073                { -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1,
074                        -2, -1, -1 },
075                { -2, -3, 1, 0, -3, 0, -1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3,
076                        -4, -2 },
077                { -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -1, 7, -1, -2, -1, 1,
078                        -2, -4, -3 },
079                { -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, 0, -2, -2,
080                        -1 },
081                { -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3,
082                        -3, -2 },
083                { 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2,
084                        -3, -2 },
085                { -1, -1, 1, 0, -2, 1, 0, -2, 0, -2, -1, 0, 1, 0, -1, 1, 4, -2, -3,
086                        -2 },
087                { 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, -2,
088                        4, -3, -1 },
089                { -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3,
090                        -3, -3, 11, 2 },
091                { -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2,
092                        -1, 2, 7 } };
093
094        public static enum Threshold {
095        T0(0, RonnConstraint.THRESHOLD0), T1(1, RonnConstraint.THRESHOLD1), T2(
096                2, RonnConstraint.THRESHOLD2), T3(3, RonnConstraint.THRESHOLD3), T4(
097                4, RonnConstraint.THRESHOLD4), T5(5, RonnConstraint.THRESHOLD5), T6(
098                6, RonnConstraint.THRESHOLD6), T7(7, RonnConstraint.THRESHOLD7), T8(
099                8, RonnConstraint.THRESHOLD8), T9(9, RonnConstraint.THRESHOLD9);
100
101        private final int tnum;
102        private final float[] values;
103
104        private Threshold(final int tnum, final float[] values) {
105                this.tnum = tnum;
106                this.values = values;
107        }
108
109        public float[] getValues() {
110                return values;
111        }
112
113        public static float[] getTreshold(final int number) {
114                assert (number >= 0) && (number < 10) : number;
115                for (final Threshold t : Threshold.values()) {
116                if (t.tnum == number) {
117                        return t.values;
118                }
119                }
120                return null;
121        }
122
123        }
124
125        static final float[] THRESHOLD0 = { 0.09847548204866169f,
126                0.5537946867723033f, 0.052493213903229766f, 0.07714031903493762f };
127
128        static final float[] THRESHOLD1 = { 0.09032956077766974f,
129                0.5167594539472075f, 0.04596823441915963f, 0.06455503987769765f };
130
131        static final float[] THRESHOLD2 = { 0.09266796710382286f,
132                0.5127732233896729f, 0.04963484289158484f, 0.061048745226114226f };
133
134        static final float[] THRESHOLD3 = { 0.10562230953899814f,
135                0.4488757690530404f, 0.04922765471815812f, 0.0824807293665649f };
136
137        static final float[] THRESHOLD4 = { 0.1163716006651586f,
138                0.5315238539228951f, 0.0556565226094971f, 0.0714892726762588f };
139
140        static final float[] THRESHOLD5 = { 0.09358976618303182f,
141                0.49296410198137725f, 0.054219917228374236f, 0.06593535778132877f };
142
143        static final float[] THRESHOLD6 = { 0.10526844980518248f,
144                0.4842710501752991f, 0.05215080306266067f, 0.06518758643119664f };
145
146        static final float[] THRESHOLD7 = { 0.08434396215650031f,
147                0.6007148113473553f, 0.05107636795876212f, 0.10341362611675203f };
148
149        static final float[] THRESHOLD8 = { 0.1309148603226209f,
150                0.5122245658772394f, 0.06436561753520677f, 0.06639259175313134f };
151
152        static final float[] THRESHOLD9 = { 0.18610705811017647f,
153                0.5279244438321989f, 0.0721466513318003f, 0.07983168408322228f };
154
155        static final byte MIN_SEQUENCE_LENGTH = 19;
156
157        static final String HELP_MESSAGE = " \r\n" +
158                        "JRONN version 3.1b usage 1 August 2011:\r\n" +
159                        "java -jar JRONN_JAR_NAME -i=inputfile <OPTIONS>\r\n" +
160                        "\r\n" +
161                        "Where -i=input file \r\n" +
162                        "       Input file can contain one or more FASTA formatted sequences.\r\n" +
163                        "\r\n" +
164                        "All OPTIONS are optional\r\n" +
165                        "Supported OPTIONS are: \r\n" +
166                        "       -o=output file\r\n" +
167                        "       -d=disorder value\r\n" +
168                        "       -f=V or H \r\n" +
169                        "       -s=statistics file\r\n" +
170                        "       -n=number of threads to use\r\n" +
171                        "OPTION DETAILED DESCRIPTION:\r\n" +
172                        "       -o full path to the output file, if not specified \r\n" +
173                        "       standard out is used\r\n" +
174                        "\r\n" +
175                        "       -d the value of disorder, defaults to 0.5\r\n" +
176                        "\r\n" +
177                        "       -f output format, V for vertical, where the letters \r\n" +
178                        "       of the sequence and corresponding disorder values are \r\n" +
179                        "       output in two column layout. H for horizontal, where the\r\n" +
180                        "       disorder values are provided under the letters of the \r\n" +
181                        "       sequence. Letters and values separated by tabulation in\r\n" +
182                        "       this case. Defaults to V.\r\n" +
183                        "\r\n" +
184                        "       -s the file name to write execution statistics to.\r\n" +
185                        "\r\n" +
186                        "       -n the number of threads to use. Defaults to the number of \r\n" +
187                        "       cores available on the computer. n=1 mean sequential \r\n" +
188                        "       processing. Valid values are 1 < n < (2 x num_of_cores)\r\n" +
189                        "       Default value will give the best performance.\r\n" +
190                        "       \r\n" +
191                        "EXAMPLES: \r\n" +
192                        "\r\n" +
193                        "       Predict disorder values for sequences from input file /home/input.fasta\r\n" +
194                        "       output the results to the standard out. Use default disorder value\r\n" +
195                        "       and utilise all cpus available on the computer.\r\n" +
196                        "\r\n" +
197                        "       java -jar JRONN.JAR -i=/home/input.fasta\r\n" +
198                        "       \r\n" +
199                        "       Predict disorder values for sequences from input file /home/input.fasta\r\n" +
200                        "       output the results in horizontal layout to the /home/jronn.out, collect \r\n" +
201                        "       execution statistics to /home/jronn.stat.txt file and limit the number \r\n" +
202                        "       of threads to two. \r\n" +
203                        "       \r\n" +
204                        "       java -jar JRONN.JAR -i=/home/input.fasta -o=/home/jronn.out -d=0.6 -n=2 -f=H\r\n" +
205                        "        \r\n" +
206                        "       The arguments can be provided in any order.\r\n" +
207                        "\r\n" +
208                        "ABOUT THE PROGRAM:     \r\n" +
209                        "       \r\n" +
210                        "       JRONN is a Java implementation of RONN. JRONN is based on RONN and uses the \r\n" +
211                        "       same model data, therefore gives the same predictions. Main motivation \r\n" +
212                        "       behind JRONN development was providing an implementation of RONN more \r\n" +
213                        "       suitable to use by the automated analysis pipelines and web services.  \r\n" +
214                        "       \r\n" +
215                        "       Original version of RONN is described in Yang,Z.R., Thomson,R., \r\n" +
216                        "       McMeil,P. and Esnouf,R.M. (2005) RONN: the bio-basis function neural network\r\n" +
217                        "       technique applied to the detection of natively disordered regions in proteins  \r\n" +
218                        "       Bioinformatics 21: 3369-3376\r\n" +
219                        "       See also http://www.strubi.ox.ac.uk/RONN\r\n" +
220                        "       \r\n" +
221                        "       Author: Peter Troshin \r\n" +
222                        "       email: to.petr AT gmail DOT com\r\n" +
223                        "       \r\n" +
224                        "       This is a free software which comes with no guarantees.\r\n" +
225                        "       JRONN is distributed under Apache Licence version 2. The full version of \r\n" +
226                        "       licence can be obtained from http://www.apache.org/licenses/LICENSE-2.0\r\n" +
227                        "       ";
228        /**
229         * 700 - maximum number of lines (with sequence values) in the single model
230         * file.
231         */
232        static final int maxD = 700;
233}