001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.proteomics;
023
024import java.util.HashMap;
025import java.util.Iterator;
026import java.util.Map;
027
028import org.biojava.bio.BioException;
029import org.biojava.bio.seq.ProteinTools;
030import org.biojava.bio.symbol.IllegalAlphabetException;
031import org.biojava.bio.symbol.IllegalSymbolException;
032import org.biojava.bio.symbol.Symbol;
033import org.biojava.bio.symbol.SymbolList;
034import org.biojava.bio.symbol.SymbolPropertyTable;
035import org.biojava.utils.math.BinarySearch;
036import org.biojava.utils.math.ComputeObject;
037
038/** Class that computes isoelectric point for denaturated proteins. These pIs 
039 * are useful for predicting the position of a protein on a 2D gel.<p>
040 * The pK values are taken from Bjellqvist B. et al., "Reference 
041 * points for comparisons of two-dimensional maps of proteins from different
042 * human cell types defined in a pH scale where isoelectric points correlate
043 * with polypeptide compositions", Electrophoresis 1994, 15, 529-539.<p>
044 *
045 * @author David Huen
046 * @author George Waldon
047 * @since 1.22
048 *
049 */
050public class IsoelectricPointCalc {
051    
052   /** minimum pH value */
053     private static double PH_MIN = 0.0;
054    
055    /** maximum pH value */
056    private static double PH_MAX = 14.0;
057    
058    /** desired precision */
059    private static double EPSI = 0.0001;
060
061    private static Map pK_NtermCache = new HashMap();
062    private static Map pKCache = new HashMap();
063    private static Map pK_CtermCache = new HashMap();
064
065    public IsoelectricPointCalc() {
066        // recover pK and pK_NTerm tables and cache only relevant residues
067        SymbolPropertyTable PK_NtermTable = ProteinTools.getSymbolPropertyTable(SymbolPropertyTable.PK_Nterm);
068        SymbolPropertyTable pKTable = ProteinTools.getSymbolPropertyTable(SymbolPropertyTable.PK);
069        SymbolPropertyTable PK_CtermTable = ProteinTools.getSymbolPropertyTable(SymbolPropertyTable.PK_Cterm);
070        
071        Iterator aaSyms = ProteinTools.getAlphabet().iterator();
072        
073        // iterate thru' all AA symbols and cache the non-zero pKs
074        while (aaSyms.hasNext()) {
075            Symbol sym = (Symbol) aaSyms.next();
076            
077            // only cache symbols that have a non-zero pK
078            try {
079                double pK = PK_NtermTable.getDoubleValue(sym);
080                if (Math.abs(pK) > 0.01) {
081                    pK_NtermCache.put(sym, new Double(pK));
082                }
083                
084                pK = pKTable.getDoubleValue(sym);
085                if (Math.abs(pK) > 0.01) {
086                    pKCache.put(sym, new Double(pK));
087                }
088                
089                pK = PK_CtermTable.getDoubleValue(sym);
090                if (Math.abs(pK) > 0.01) {
091                    pK_CtermCache.put(sym, new Double(pK));
092                }
093                
094            } catch (IllegalSymbolException ise) {
095                // SimpleSymbolPropertyTable throws this if there is no value for the symbol
096                // just ignore.
097            }
098        }
099    }
100
101    public class ChargeCalculator
102            implements ComputeObject {
103        Map counts = null;
104        Symbol Nterm = null;
105        Symbol Cterm = null;
106        boolean hasFreeNTerm = true;
107        boolean hasFreeCTerm = true;
108
109        private ChargeCalculator(SymbolList peptide, boolean hasFreeNTerm, boolean hasFreeCTerm) {
110            counts = residueCount(peptide);
111            this.hasFreeNTerm = hasFreeNTerm;
112            this.hasFreeCTerm = hasFreeCTerm;
113        }
114
115        /**
116         * counts up number of times a relevant AA appears in protein.
117         */
118        private Map residueCount(SymbolList peptide) {
119            // iterate thru' peptide collating number of relevant residues
120            Iterator residues = peptide.iterator();
121            
122            Map symbolCounts = new HashMap();
123            
124            while (residues.hasNext()) {
125                Symbol sym = (Symbol) residues.next();
126                if(Nterm==null) Nterm = sym;
127                if(!residues.hasNext()) Cterm = sym;
128                if (pKCache.containsKey(sym)) {
129                    // count the residues
130                    Integer currCount = (Integer) symbolCounts.get(sym);
131                    if (currCount != null) {
132                        int currCountAsInt = currCount.intValue();
133                        symbolCounts.put(sym, new Integer(++currCountAsInt));
134                    } else {
135                        symbolCounts.put(sym, new Integer(1));
136                    }
137                }
138            }
139            
140            return symbolCounts;
141        }
142        
143        /**
144         * computes charge at given pH
145         */
146        public double compute(double pH) {
147            double charge = 0.0;
148            
149            // iterate thru' all counts computing the partial contribution to charge
150            Iterator aaI = counts.keySet().iterator();
151            
152            //by convention negative pK values in ResidueProperties.xml are for
153            //acids and inversely for bases.
154            while (aaI.hasNext()) {
155                // get back the symbol
156                Symbol sym = (Symbol) aaI.next();
157                
158                // retrieve the pK and count
159                Double value = (Double) pKCache.get(sym);
160                if (value != null) {
161                    double pK = value.doubleValue();
162                    double count = ((Integer) counts.get(sym)).intValue();
163                    boolean isAcid = pK<0;
164                    if(isAcid==true) {
165                        pK = -pK;
166                        double cr = Math.pow(10.0, pK - pH);
167                        charge -= count/(cr + 1.0); // -0.5 per aa at pH = pK
168                    } else {
169                        double cr = Math.pow(10.0, pH - pK);
170                        charge += count/(cr + 1.0); // +0.5 per aa at pH = pK
171                    }
172                }
173            }
174            
175            // N-terminal end charges
176            if (hasFreeNTerm) {
177                Double value = (Double) pK_NtermCache.get(Nterm);
178                if (value != null) {
179                    double pK = + value.doubleValue();
180                    double cr = Math.pow(10.0, pH - pK);
181                    charge += 1/(cr + 1.0);
182                }
183            }
184            
185            // C-terminal end charges
186            if(hasFreeCTerm) {
187                Double value = (Double) pK_CtermCache.get(Cterm);
188                if (value != null) {
189                    double pK = - value.doubleValue();
190                    double cr = Math.pow(10.0, pK - pH);
191                    charge -= 1/(cr + 1.0); // -0.5 per aa at pH = pK
192                }
193            }
194
195            return charge;
196        }
197    }
198
199    /**
200     * Computes isoelectric point of specified peptide.
201     *
202     * @param peptide peptide of which pI is required.
203     * @param hasFreeNTerm has free N-terminal amino group.
204     * @param hasFreeCTerm has free C-terminal carboxyl group.
205     */
206    public double getPI(SymbolList peptide, boolean hasFreeNTerm, boolean hasFreeCTerm)
207        throws IllegalAlphabetException, BioException
208    {
209        // verify that the peptide is really a peptide
210        if ( (peptide.getAlphabet() == ProteinTools.getTAlphabet())
211        || (peptide.getAlphabet() == ProteinTools.getAlphabet()) ) {
212            
213            // create object to handle the peptide
214            ComputeObject computeObj = new ChargeCalculator(peptide, hasFreeNTerm, hasFreeCTerm);
215            
216            // solve the charge equation
217            double pI = 0.0;
218            try {
219                pI = BinarySearch.solve(PH_MIN, PH_MAX, EPSI, computeObj);
220            } catch( BioException ex) {
221                BioException ex2 =  new BioException("Error, the peptide probably contains only positive or negative charges");
222                ex2.initCause(ex);
223                throw ex2;
224            }
225            return pI;
226        } else {
227            // not a peptide
228            throw new IllegalAlphabetException();
229        }
230    }
231    
232    private static IsoelectricPointCalc calculator;
233    
234    /** Static public method to compute the pI for a polypeptide in
235     * denaturating and reduced conditions with both free ends. 
236     * Various ambiguity symbols, symbols for which pK data are not available, or 
237     * illegal symbols are not contributing to the calculated pI.<p>
238     * This method returns the same values as ExPASy's Compute pI/Mw program
239     *
240     * @param peptide peptide of which pI is required.
241     * @return the calculated pI
242     * @since 1.5
243     */
244    public static double getIsoelectricPoint(SymbolList peptide) 
245    throws IllegalAlphabetException, BioException {
246        if(calculator==null) {
247            calculator = new IsoelectricPointCalc();
248        }
249        double pi =  calculator.getPI(peptide,true,true);
250        return (Math.round(pi*100))/100.0;
251    }
252}
253