001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.aaproperties;
022
023import org.biojava.nbio.aaproperties.PeptideProperties.SingleLetterAACode;
024import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
026
027import java.util.HashMap;
028import java.util.Map;
029
030/**
031 * This class is used to support the implementation of properties stated in IPeptideProperties.
032 * It initializes several values that would be needed for the computation of properties such as
033 * <p/>
034 * Molecular weight<br/>
035 * Instability index<br/>
036 * Hydropathy value<br/>
037 * pKa<br/>
038 *
039 * @author kohchuanhock
040 * @version 2011.05.21
041 * @see IPeptideProperties
042 */
043public class Constraints {
044        private static AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
045        //A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V
046        public static AminoAcidCompound A = aaSet.getCompoundForString("A");
047        public static AminoAcidCompound R = aaSet.getCompoundForString("R");
048        public static AminoAcidCompound N = aaSet.getCompoundForString("N");
049        public static AminoAcidCompound D = aaSet.getCompoundForString("D");
050        public static AminoAcidCompound C = aaSet.getCompoundForString("C");
051        public static AminoAcidCompound E = aaSet.getCompoundForString("E");
052        public static AminoAcidCompound Q = aaSet.getCompoundForString("Q");
053        public static AminoAcidCompound G = aaSet.getCompoundForString("G");
054        public static AminoAcidCompound H = aaSet.getCompoundForString("H");
055        public static AminoAcidCompound I = aaSet.getCompoundForString("I");
056        public static AminoAcidCompound L = aaSet.getCompoundForString("L");
057        public static AminoAcidCompound K = aaSet.getCompoundForString("K");
058        public static AminoAcidCompound M = aaSet.getCompoundForString("M");
059        public static AminoAcidCompound F = aaSet.getCompoundForString("F");
060        public static AminoAcidCompound P = aaSet.getCompoundForString("P");
061        public static AminoAcidCompound S = aaSet.getCompoundForString("S");
062        public static AminoAcidCompound T = aaSet.getCompoundForString("T");
063        public static AminoAcidCompound W = aaSet.getCompoundForString("W");
064        public static AminoAcidCompound Y = aaSet.getCompoundForString("Y");
065        public static AminoAcidCompound V = aaSet.getCompoundForString("V");
066
067        public static Map<AminoAcidCompound, Double> aa2ExtinctionCoefficient = new HashMap<AminoAcidCompound, Double>();
068        public static Map<AminoAcidCompound, Double> aa2MolecularWeight = new HashMap<AminoAcidCompound, Double>();
069        public static Map<AminoAcidCompound, Double> aa2Hydrophathicity = new HashMap<AminoAcidCompound, Double>();
070        public static Map<AminoAcidCompound, Double> aa2PKa = new HashMap<AminoAcidCompound, Double>();
071        public static Map<String, Double> diAA2Instability = new HashMap<String, Double>();
072
073        public static Map<AminoAcidCompound, Double> aa2NTerminalPka = new HashMap<AminoAcidCompound, Double>();
074        public static Map<AminoAcidCompound, Double> aa2CTerminalPka = new HashMap<AminoAcidCompound, Double>();
075
076        static{
077                initMolecularWeight();
078                initHydropathicity();
079                initPKa();
080                initInstability();
081                initExtinctionCoefficient();
082        }
083
084        /**
085         * Does the initialization of molecular weights based on http://au.expasy.org/tools/findmod/findmod_masses.html#AA
086         */
087        public static void initMolecularWeight(){
088                //              Alanine (A)     71.03711        71.0788
089                aa2MolecularWeight.put(A, 71.0788);
090                //              Arginine (R)    156.10111       156.1875
091                aa2MolecularWeight.put(R, 156.1875);
092                //              Asparagine (N)  114.04293       114.1038
093                aa2MolecularWeight.put(N, 114.1038);
094                //              Aspartic acid (D)       115.02694       115.0886
095                aa2MolecularWeight.put(D, 115.0886);
096                //              Cysteine (C)    103.00919       103.1388
097                aa2MolecularWeight.put(C, 103.1388);
098                //              Glutamic acid (E)       129.04259       129.1155
099                aa2MolecularWeight.put(E, 129.1155);
100                //              Glutamine (Q)   128.05858       128.1307
101                aa2MolecularWeight.put(Q, 128.1307);
102                //              Glycine (G)     57.02146        57.0519
103                aa2MolecularWeight.put(G, 57.0519);
104                //              Histidine (H)   137.05891       137.1411
105                aa2MolecularWeight.put(H, 137.1411);
106                //              Isoleucine (I)  113.08406       113.1594
107                aa2MolecularWeight.put(I, 113.1594);
108                //              Leucine (L)     113.08406       113.1594
109                aa2MolecularWeight.put(L, 113.1594);
110                //              Lysine (K)      128.09496       128.1741
111                aa2MolecularWeight.put(K, 128.1741);
112                //              Methionine (M)  131.04049       131.1926
113                aa2MolecularWeight.put(M, 131.1926);
114                //              Phenylalanine (F)       147.06841       147.1766
115                aa2MolecularWeight.put(F, 147.1766);
116                //              Proline (P)     97.05276        97.1167
117                aa2MolecularWeight.put(P, 97.1167);
118                //              Serine (S)      87.03203        87.0782
119                aa2MolecularWeight.put(S, 87.0782);
120                //              Threonine (T)   101.04768       101.1051
121                aa2MolecularWeight.put(T, 101.1051);
122                //              Tryptophan (W)  186.07931       186.2132
123                aa2MolecularWeight.put(W, 186.2132);
124                //              Tyrosine (Y)    163.06333       163.1760
125                aa2MolecularWeight.put(Y, 163.1760);
126                //              Valine (V)      99.06841        99.1326
127                aa2MolecularWeight.put(V, 99.1326);
128        }
129
130        /**
131         * Does the initialization of hydropathicity based on http://web.expasy.org/protscale/pscale/Hphob.Doolittle.html
132         */
133        private static void initHydropathicity(){
134                //              Ala(A):  1.800
135                aa2Hydrophathicity.put(A, 1.800);
136                //              Arg(R): -4.500
137                aa2Hydrophathicity.put(R, -4.500);
138                //              Asn(N): -3.500
139                aa2Hydrophathicity.put(N, -3.500);
140                //              Asp(D): -3.500
141                aa2Hydrophathicity.put(D, -3.500);
142                //              Cys(C):  2.500
143                aa2Hydrophathicity.put(C, 2.500);
144                //              Gln(E): -3.500
145                aa2Hydrophathicity.put(E, -3.500);
146                //              Glu(Q): -3.500
147                aa2Hydrophathicity.put(Q, -3.500);
148                //              Gly(G): -0.400
149                aa2Hydrophathicity.put(G, -0.400);
150                //              His(H): -3.200
151                aa2Hydrophathicity.put(H, -3.200);
152                //              Ile(I):  4.500
153                aa2Hydrophathicity.put(I, 4.500);
154                //              Leu(L):  3.800
155                aa2Hydrophathicity.put(L, 3.800);
156                //              Lys(K): -3.900
157                aa2Hydrophathicity.put(K, -3.900);
158                //              Met(M):  1.900
159                aa2Hydrophathicity.put(M, 1.900);
160                //              Phe(F):  2.800
161                aa2Hydrophathicity.put(F, 2.800);
162                //              Pro(P): -1.600
163                aa2Hydrophathicity.put(P, -1.600);
164                //              Ser(S): -0.800
165                aa2Hydrophathicity.put(S, -0.800);
166                //              Thr(T): -0.700
167                aa2Hydrophathicity.put(T, -0.700);
168                //              Trp(W): -0.900
169                aa2Hydrophathicity.put(W, -0.900);
170                //              Tyr(Y): -1.300
171                aa2Hydrophathicity.put(Y, -1.300);
172                //              Val(V):  4.200
173                aa2Hydrophathicity.put(V, 4.200);
174        }
175
176        /**
177         * Does the initialization of PKa based on
178         * http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#NetCharge
179         */
180        private static void initPKaInnovagen(){
181                /*
182                 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1.
183                 */
184                //(NH2-)        9.69    (-COOH) 2.34
185                aa2CTerminalPka.put(G, 2.34);
186                aa2CTerminalPka.put(A, 2.34);
187                aa2CTerminalPka.put(P, 1.99);
188                aa2CTerminalPka.put(V, 2.32);
189                aa2CTerminalPka.put(L, 2.36);
190                aa2CTerminalPka.put(I, 2.36);
191                aa2CTerminalPka.put(M, 2.28);
192
193                aa2CTerminalPka.put(F, 1.83);
194                aa2CTerminalPka.put(Y, 2.20);
195                aa2CTerminalPka.put(W, 2.38);
196
197                aa2CTerminalPka.put(S, 2.21);
198                aa2CTerminalPka.put(T, 2.11);
199                aa2CTerminalPka.put(C, 1.96);
200                aa2CTerminalPka.put(N, 2.02);
201                aa2CTerminalPka.put(Q, 2.17);
202
203                aa2CTerminalPka.put(K, 2.18);
204                aa2CTerminalPka.put(H, 1.82);
205                aa2CTerminalPka.put(R, 2.17);
206
207                aa2CTerminalPka.put(D, 1.88);
208                aa2CTerminalPka.put(E, 2.19);
209
210                aa2NTerminalPka.put(G, 9.60);
211                aa2NTerminalPka.put(A, 9.69);
212                aa2NTerminalPka.put(P, 10.96);
213                aa2NTerminalPka.put(V, 9.62);
214                aa2NTerminalPka.put(L, 9.60);
215                aa2NTerminalPka.put(I, 9.68);
216                aa2NTerminalPka.put(M, 9.21);
217
218                aa2NTerminalPka.put(F, 9.13);
219                aa2NTerminalPka.put(Y, 9.11);
220                aa2NTerminalPka.put(W, 9.39);
221
222                aa2NTerminalPka.put(S, 9.15);
223                aa2NTerminalPka.put(T, 9.62);
224                aa2NTerminalPka.put(C, 10.28);
225                aa2NTerminalPka.put(N, 8.80);
226                aa2NTerminalPka.put(Q, 9.13);
227
228                aa2NTerminalPka.put(K, 8.95);
229                aa2NTerminalPka.put(H, 9.17);
230                aa2NTerminalPka.put(R, 9.04);
231
232                aa2NTerminalPka.put(D, 9.60);
233                aa2NTerminalPka.put(E, 9.67);
234
235                //              K, Lys  10.53
236                aa2PKa.put(K, 10.53);
237                //              D, Asp  3.65
238                aa2PKa.put(D, 3.65);
239                //              R, Arg  12.48
240                aa2PKa.put(R, 12.48);
241                //              E, Glu  4.25
242                aa2PKa.put(E, 4.25);
243                //              H, His  6.00
244                aa2PKa.put(H, 6.00);
245                //              C, Cys  8.18
246                aa2PKa.put(C, 8.18);
247                //              Y, Tyr  10.07
248                aa2PKa.put(Y, 10.07);
249        }
250
251        private static void initPKa(){
252                initPKaInnovagen();
253        }
254
255        /**
256         * Does the initialization of dipeptide instability index based on the following paper
257         *
258         * Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990)
259         * Correlation between stability of a protein and its dipeptide composition: a novel approach for predicting in vivo stability of a protein from its primary sequence.
260         * Protein Eng. 4,155-161. Table III.
261         */
262        private static void initInstability(){
263                double[][] instability = {
264                                //W             C               M               H               Y               F               Q               N               I               R               D               P               T               K               E               V               S               G               A               L
265                                {1.0,   1.0,    24.68,  24.68,  1.0,    1.0,    1.0,    13.34,  1.0,    1.0,    1.0,    1.0,    -14.03, 1.0,    1.0,    -7.49,  1.0,    -9.37,  -14.03, 13.34},
266                                {24.68, 1.0,    33.6,   33.6,   1.0,    1.0,    -6.54,  1.0,    1.0,    1.0,    20.26,  20.26,  33.6,   1.0,    1.0,    -6.54,  1.0,    1.0,    1.0,    20.26},
267                                {1.0,   1.0,    -1.88,  58.28,  24.68,  1.0,    -6.54,  1.0,    1.0,    -6.54,  1.0,    44.94,  -1.88,  1.0,    1.0,    1.0,    44.94,  1.0,    13.34,  1.0},
268                                {-1.88, 1.0,    1.0,    1.0,    44.94,  -9.37,  1.0,    24.68,  44.94,  1.0,    1.0,    -1.88,  -6.54,  24.68,  1.0,    1.0,    1.0,    -9.37,  1.0,    1.0},
269                                {-9.37, 1.0,    44.94,  13.34,  13.34,  1.0,    1.0,    1.0,    1.0,    -15.91, 24.68,  13.34,  -7.49,  1.0,    -6.54,  1.0,    1.0,    -7.49,  24.68,  1.0},
270                                {1.0,   1.0,    1.0,    1.0,    33.6,   1.0,    1.0,    1.0,    1.0,    1.0,    13.34,  20.26,  1.0,    -14.03, 1.0,    1.0,    1.0,    1.0,    1.0,    1.0},
271                                {1.0,   -6.54,  1.0,    1.0,    -6.54,  -6.54,  20.26,  1.0,    1.0,    1.0,    20.26,  20.26,  1.0,    1.0,    20.26,  -6.54,  44.94,  1.0,    1.0,    1.0},
272                                {-9.37, -1.88,  1.0,    1.0,    1.0,    -14.03, -6.54,  1.0,    44.94,  1.0,    1.0,    -1.88,  -7.49,  24.68,  1.0,    1.0,    1.0,    -14.03, 1.0,    1.0},
273                                {1.0,   1.0,    1.0,    13.34,  1.0,    1.0,    1.0,    1.0,    1.0,    1.0,    1.0,    -1.88,  1.0,    -7.49,  44.94,  -7.49,  1.0,    1.0,    1.0,    20.26},
274                                {58.28, 1.0,    1.0,    20.26,  -6.54,  1.0,    20.26,  13.34,  1.0,    58.28,  1.0,    20.26,  1.0,    1.0,    1.0,    1.0,    44.94,  -7.49,  1.0,    1.0},
275                                {1.0,   1.0,    1.0,    1.0,    1.0,    -6.54,  1.0,    1.0,    1.0,    -6.54,  1.0,    1.0,    -14.03, -7.49,  1.0,    1.0,    20.26,  1.0,    1.0,    1.0},
276                                {-1.88, -6.54,  -6.54,  1.0,    1.0,    20.26,  20.26,  1.0,    1.0,    -6.54,  -6.54,  20.26,  1.0,    1.0,    18.38,  20.26,  20.26,  1.0,    20.26,  1.0},
277                                {-14.03,1.0,    1.0,    1.0,    1.0,    13.34,  -6.54,  -14.03, 1.0,    1.0,    1.0,    1.0,    1.0,    1.0,    20.26,  1.0,    1.0,    -7.49,  1.0,    1.0},
278                                {1.0,   1.0,    33.6,   1.0,    1.0,    1.0,    24.68,  1.0,    -7.49,  33.6,   1.0,    -6.54,  1.0,    1.0,    1.0,    -7.49,  1.0,    -7.49,  1.0,    -7.49},
279                                {-14.03,44.94,  1.0,    -6.54,  1.0,    1.0,    20.26,  1.0,    20.26,  1.0,    20.26,  20.26,  1.0,    1.0,    33.6,   1.0,    20.26,  1.0,    1.0,    1.0},
280                                {1.0,   1.0,    1.0,    1.0,    -6.54,  1.0,    1.0,    1.0,    1.0,    1.0,    -14.03, 20.26,  -7.49,  -1.88,  1.0,    1.0,    1.0,    -7.49,  1.0,    1.0},
281                                {1.0,   33.6,   1.0,    1.0,    1.0,    1.0,    20.26,  1.0,    1.0,    20.26,  1.0,    44.94,  1.0,    1.0,    20.26,  1.0,    20.26,  1.0,    1.0,    1.0},
282                                {13.34, 1.0,    1.0,    1.0,    -7.49,  1.0,    1.0,    -7.49,  -7.49,  1.0,    1.0,    1.0,    -7.49,  -7.49,  -6.54,  1.0,    1.0,    13.34,  -7.49,  1.0},
283                                {1.0,   44.94,  1.0,    -7.49,  1.0,    1.0,    1.0,    1.0,    1.0,    1.0,    -7.49,  20.26,  1.0,    1.0,    1.0,    1.0,    1.0,    1.0,    1.0,    1.0},
284                                {24.68, 1.0,    1.0,    1.0,    1.0,    1.0,    33.6,   1.0,    1.0,    20.26,  1.0,    20.26,  1.0,    -7.49,  1.0,    1.0,    1.0,    1.0,    1.0,    1.0}
285                };
286
287                SingleLetterAACode[] aa = SingleLetterAACode.values();
288                for(int i = 0; i < aa.length; i++){
289                        for(int j = 0; j < aa.length; j++){
290                                diAA2Instability.put("" + aa[i] + aa[j], instability[i][j]);
291                        }
292                }
293        }
294
295        /**
296         * Does the initialization of extinction coefficient based on
297         * http://au.expasy.org/tools/protparam-doc.html
298         */
299        public static void initExtinctionCoefficient(){
300                aa2ExtinctionCoefficient.put(Y, 1490.0);
301                aa2ExtinctionCoefficient.put(W, 5500.0);
302                aa2ExtinctionCoefficient.put(C, 125.0);
303        }
304}