001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.proteomics; 023 024import java.util.HashMap; 025import java.util.Iterator; 026import java.util.Map; 027 028import org.biojava.bio.BioException; 029import org.biojava.bio.seq.ProteinTools; 030import org.biojava.bio.symbol.IllegalAlphabetException; 031import org.biojava.bio.symbol.IllegalSymbolException; 032import org.biojava.bio.symbol.Symbol; 033import org.biojava.bio.symbol.SymbolList; 034import org.biojava.bio.symbol.SymbolPropertyTable; 035import org.biojava.utils.math.BinarySearch; 036import org.biojava.utils.math.ComputeObject; 037 038/** Class that computes isoelectric point for denaturated proteins. These pIs 039 * are useful for predicting the position of a protein on a 2D gel.<p> 040 * The pK values are taken from Bjellqvist B. et al., "Reference 041 * points for comparisons of two-dimensional maps of proteins from different 042 * human cell types defined in a pH scale where isoelectric points correlate 043 * with polypeptide compositions", Electrophoresis 1994, 15, 529-539.<p> 044 * 045 * @author David Huen 046 * @author George Waldon 047 * @since 1.22 048 * 049 */ 050public class IsoelectricPointCalc { 051 052 /** minimum pH value */ 053 private static double PH_MIN = 0.0; 054 055 /** maximum pH value */ 056 private static double PH_MAX = 14.0; 057 058 /** desired precision */ 059 private static double EPSI = 0.0001; 060 061 private static Map pK_NtermCache = new HashMap(); 062 private static Map pKCache = new HashMap(); 063 private static Map pK_CtermCache = new HashMap(); 064 065 public IsoelectricPointCalc() { 066 // recover pK and pK_NTerm tables and cache only relevant residues 067 SymbolPropertyTable PK_NtermTable = ProteinTools.getSymbolPropertyTable(SymbolPropertyTable.PK_Nterm); 068 SymbolPropertyTable pKTable = ProteinTools.getSymbolPropertyTable(SymbolPropertyTable.PK); 069 SymbolPropertyTable PK_CtermTable = ProteinTools.getSymbolPropertyTable(SymbolPropertyTable.PK_Cterm); 070 071 Iterator aaSyms = ProteinTools.getAlphabet().iterator(); 072 073 // iterate thru' all AA symbols and cache the non-zero pKs 074 while (aaSyms.hasNext()) { 075 Symbol sym = (Symbol) aaSyms.next(); 076 077 // only cache symbols that have a non-zero pK 078 try { 079 double pK = PK_NtermTable.getDoubleValue(sym); 080 if (Math.abs(pK) > 0.01) { 081 pK_NtermCache.put(sym, new Double(pK)); 082 } 083 084 pK = pKTable.getDoubleValue(sym); 085 if (Math.abs(pK) > 0.01) { 086 pKCache.put(sym, new Double(pK)); 087 } 088 089 pK = PK_CtermTable.getDoubleValue(sym); 090 if (Math.abs(pK) > 0.01) { 091 pK_CtermCache.put(sym, new Double(pK)); 092 } 093 094 } catch (IllegalSymbolException ise) { 095 // SimpleSymbolPropertyTable throws this if there is no value for the symbol 096 // just ignore. 097 } 098 } 099 } 100 101 public class ChargeCalculator 102 implements ComputeObject { 103 Map counts = null; 104 Symbol Nterm = null; 105 Symbol Cterm = null; 106 boolean hasFreeNTerm = true; 107 boolean hasFreeCTerm = true; 108 109 private ChargeCalculator(SymbolList peptide, boolean hasFreeNTerm, boolean hasFreeCTerm) { 110 counts = residueCount(peptide); 111 this.hasFreeNTerm = hasFreeNTerm; 112 this.hasFreeCTerm = hasFreeCTerm; 113 } 114 115 /** 116 * counts up number of times a relevant AA appears in protein. 117 */ 118 private Map residueCount(SymbolList peptide) { 119 // iterate thru' peptide collating number of relevant residues 120 Iterator residues = peptide.iterator(); 121 122 Map symbolCounts = new HashMap(); 123 124 while (residues.hasNext()) { 125 Symbol sym = (Symbol) residues.next(); 126 if(Nterm==null) Nterm = sym; 127 if(!residues.hasNext()) Cterm = sym; 128 if (pKCache.containsKey(sym)) { 129 // count the residues 130 Integer currCount = (Integer) symbolCounts.get(sym); 131 if (currCount != null) { 132 int currCountAsInt = currCount.intValue(); 133 symbolCounts.put(sym, new Integer(++currCountAsInt)); 134 } else { 135 symbolCounts.put(sym, new Integer(1)); 136 } 137 } 138 } 139 140 return symbolCounts; 141 } 142 143 /** 144 * computes charge at given pH 145 */ 146 public double compute(double pH) { 147 double charge = 0.0; 148 149 // iterate thru' all counts computing the partial contribution to charge 150 Iterator aaI = counts.keySet().iterator(); 151 152 //by convention negative pK values in ResidueProperties.xml are for 153 //acids and inversely for bases. 154 while (aaI.hasNext()) { 155 // get back the symbol 156 Symbol sym = (Symbol) aaI.next(); 157 158 // retrieve the pK and count 159 Double value = (Double) pKCache.get(sym); 160 if (value != null) { 161 double pK = value.doubleValue(); 162 double count = ((Integer) counts.get(sym)).intValue(); 163 boolean isAcid = pK<0; 164 if(isAcid==true) { 165 pK = -pK; 166 double cr = Math.pow(10.0, pK - pH); 167 charge -= count/(cr + 1.0); // -0.5 per aa at pH = pK 168 } else { 169 double cr = Math.pow(10.0, pH - pK); 170 charge += count/(cr + 1.0); // +0.5 per aa at pH = pK 171 } 172 } 173 } 174 175 // N-terminal end charges 176 if (hasFreeNTerm) { 177 Double value = (Double) pK_NtermCache.get(Nterm); 178 if (value != null) { 179 double pK = + value.doubleValue(); 180 double cr = Math.pow(10.0, pH - pK); 181 charge += 1/(cr + 1.0); 182 } 183 } 184 185 // C-terminal end charges 186 if(hasFreeCTerm) { 187 Double value = (Double) pK_CtermCache.get(Cterm); 188 if (value != null) { 189 double pK = - value.doubleValue(); 190 double cr = Math.pow(10.0, pK - pH); 191 charge -= 1/(cr + 1.0); // -0.5 per aa at pH = pK 192 } 193 } 194 195 return charge; 196 } 197 } 198 199 /** 200 * Computes isoelectric point of specified peptide. 201 * 202 * @param peptide peptide of which pI is required. 203 * @param hasFreeNTerm has free N-terminal amino group. 204 * @param hasFreeCTerm has free C-terminal carboxyl group. 205 */ 206 public double getPI(SymbolList peptide, boolean hasFreeNTerm, boolean hasFreeCTerm) 207 throws IllegalAlphabetException, BioException 208 { 209 // verify that the peptide is really a peptide 210 if ( (peptide.getAlphabet() == ProteinTools.getTAlphabet()) 211 || (peptide.getAlphabet() == ProteinTools.getAlphabet()) ) { 212 213 // create object to handle the peptide 214 ComputeObject computeObj = new ChargeCalculator(peptide, hasFreeNTerm, hasFreeCTerm); 215 216 // solve the charge equation 217 double pI = 0.0; 218 try { 219 pI = BinarySearch.solve(PH_MIN, PH_MAX, EPSI, computeObj); 220 } catch( BioException ex) { 221 BioException ex2 = new BioException("Error, the peptide probably contains only positive or negative charges"); 222 ex2.initCause(ex); 223 throw ex2; 224 } 225 return pI; 226 } else { 227 // not a peptide 228 throw new IllegalAlphabetException(); 229 } 230 } 231 232 private static IsoelectricPointCalc calculator; 233 234 /** Static public method to compute the pI for a polypeptide in 235 * denaturating and reduced conditions with both free ends. 236 * Various ambiguity symbols, symbols for which pK data are not available, or 237 * illegal symbols are not contributing to the calculated pI.<p> 238 * This method returns the same values as ExPASy's Compute pI/Mw program 239 * 240 * @param peptide peptide of which pI is required. 241 * @return the calculated pI 242 * @since 1.5 243 */ 244 public static double getIsoelectricPoint(SymbolList peptide) 245 throws IllegalAlphabetException, BioException { 246 if(calculator==null) { 247 calculator = new IsoelectricPointCalc(); 248 } 249 double pi = calculator.getPI(peptide,true,true); 250 return (Math.round(pi*100))/100.0; 251 } 252} 253