001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.aaproperties; 022 023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable; 024import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import javax.xml.bind.JAXBException; 032import java.io.File; 033import java.io.FileNotFoundException; 034import java.util.HashMap; 035import java.util.HashSet; 036import java.util.Map; 037import java.util.Set; 038 039/** 040 * This is an adaptor class which enable the ease of generating protein properties. 041 * At least one adaptor method is written for each available properties provided in IPeptideProperties. 042 * 043 * @author kohchuanhock 044 * @version 2011.08.22 045 * @since 3.0.2 046 * @see IPeptideProperties 047 * @see PeptidePropertiesImpl 048 */ 049public class PeptideProperties { 050 051 private final static Logger logger = LoggerFactory.getLogger(PeptideProperties.class); 052 053 /** 054 * Enumeration of 20 standard amino acid code 055 */ 056 public enum SingleLetterAACode { W, C, M, H, Y, F, Q, N, I, R, D, P, T, K, E, V, S, G, A, L} 057 058 /** 059 * Contains the 20 standard AA code in a set 060 */ 061 public static Set<Character> standardAASet; 062 063 /** 064 * To initialize the standardAASet 065 */ 066 static{ 067 standardAASet = new HashSet<Character>(); 068 for(SingleLetterAACode c:SingleLetterAACode.values()) standardAASet.add(c.toString().charAt(0)); 069 } 070 071 /** 072 * An adaptor method to return the molecular weight of sequence. 073 * The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 074 * This method will sum the molecular weight of each amino acid in the 075 * sequence. Molecular weights are based on <a href="http://web.expasy.org/findmod/findmod_masses.html">here</a>. 076 * 077 * @param sequence 078 * a protein sequence consisting of non-ambiguous characters only 079 * @return the total molecular weight of sequence + weight of water molecule 080 */ 081 public static final double getMolecularWeight(String sequence){ 082 sequence = Utils.checkSequence(sequence); 083 ProteinSequence pSequence = null; 084 try { 085 pSequence = new ProteinSequence(sequence); 086 } catch (CompoundNotFoundException e) { 087 // the sequence was checked with Utils.checkSequence, this shouldn't happen 088 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 089 } 090 IPeptideProperties pp = new PeptidePropertiesImpl(); 091 return pp.getMolecularWeight(pSequence); 092 } 093 094 /** 095 * An adaptor method to return the molecular weight of sequence. 096 * The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 097 * This method will sum the molecular weight of each amino acid in the 098 * sequence. Molecular weights are based on the input xml file. 099 * 100 * @param sequence 101 * a protein sequence consisting of non-ambiguous characters only 102 * @param elementMassFile 103 * xml file that details the mass of each elements and isotopes 104 * @param aminoAcidCompositionFile 105 * xml file that details the composition of amino acids 106 * @return the total molecular weight of sequence + weight of water molecule 107 * @throws FileNotFoundException 108 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 109 * @throws JAXBException 110 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 111 */ 112 public static final double getMolecularWeight(String sequence, File elementMassFile, File aminoAcidCompositionFile) 113 throws FileNotFoundException, JAXBException{ 114 sequence = Utils.checkSequence(sequence); 115 ProteinSequence pSequence = null; 116 try { 117 pSequence = new ProteinSequence(sequence); 118 } catch (CompoundNotFoundException e) { 119 // the sequence was checked with Utils.checkSequence, this shouldn't happen 120 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 121 } 122 IPeptideProperties pp = new PeptidePropertiesImpl(); 123 return pp.getMolecularWeight(pSequence, elementMassFile, aminoAcidCompositionFile); 124 } 125 126 /** 127 * An adaptor method to return the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 128 * This method will sum the molecular weight of each amino acid in the 129 * sequence. Molecular weights are based on the input files. These input files must be XML using the defined schema. 130 * Note that it assumes that ElementMass.xml file can be found in default location. 131 * 132 * @param sequence 133 * a protein sequence consisting of non-ambiguous characters only 134 * xml file that details the mass of each elements and isotopes 135 * @param aminoAcidCompositionFile 136 * xml file that details the composition of amino acids 137 * @return the total molecular weight of sequence + weight of water molecule 138 * @throws JAXBException 139 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 140 * @throws FileNotFoundException 141 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 142 */ 143 public static final double getMolecularWeight(String sequence, File aminoAcidCompositionFile) throws FileNotFoundException, JAXBException{ 144 sequence = Utils.checkSequence(sequence); 145 ProteinSequence pSequence = null; 146 try { 147 pSequence = new ProteinSequence(sequence); 148 } catch (CompoundNotFoundException e) { 149 // the sequence was checked with Utils.checkSequence, this shouldn't happen 150 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 151 } 152 IPeptideProperties pp = new PeptidePropertiesImpl(); 153 return pp.getMolecularWeight(pSequence, aminoAcidCompositionFile); 154 } 155 156 /** 157 * An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to 158 * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). 159 * Note that ElementMass.xml is assumed to be able to be seen in default location. 160 * 161 * @param aminoAcidCompositionFile 162 * xml file that details the composition of amino acids 163 * @return the initialized amino acid composition table 164 * @throws JAXBException 165 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 166 * @throws FileNotFoundException 167 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 168 */ 169 public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile) 170 throws JAXBException, FileNotFoundException{ 171 IPeptideProperties pp = new PeptidePropertiesImpl(); 172 return pp.obtainAminoAcidCompositionTable(aminoAcidCompositionFile); 173 } 174 175 /** 176 * An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to 177 * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). 178 * 179 * @param elementMassFile 180 * xml file that details the mass of each elements and isotopes 181 * @param aminoAcidCompositionFile 182 * xml file that details the composition of amino acids 183 * @return the initialized amino acid composition table 184 * @throws JAXBException 185 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 186 * @throws FileNotFoundException 187 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 188 */ 189 public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile) 190 throws JAXBException, FileNotFoundException{ 191 IPeptideProperties pp = new PeptidePropertiesImpl(); 192 return pp.obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile); 193 } 194 195 /** 196 * An adaptor method that returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 197 * This method will sum the molecular weight of each amino acid in the 198 * sequence. Molecular weights are based on the AminoAcidCompositionTable. 199 * Those input files must be XML using the defined schema. 200 * 201 * @param sequence 202 * a protein sequence consisting of non-ambiguous characters only 203 * @param aminoAcidCompositionTable 204 * a amino acid composition table obtained by calling IPeptideProperties.obtainAminoAcidCompositionTable 205 * @return the total molecular weight of sequence + weight of water molecule 206 * thrown if the method IPeptideProperties.setMolecularWeightXML(File, File) is not successfully called before calling this method. 207 */ 208 public static double getMolecularWeightBasedOnXML(String sequence, AminoAcidCompositionTable aminoAcidCompositionTable){ 209 sequence = Utils.checkSequence(sequence, aminoAcidCompositionTable.getSymbolSet()); 210 ProteinSequence pSequence = null; 211 try { 212 pSequence = new ProteinSequence(sequence, aminoAcidCompositionTable.getAminoAcidCompoundSet()); 213 } catch (CompoundNotFoundException e) { 214 // the sequence was checked with Utils.checkSequence, this shouldn't happen 215 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 216 } 217 IPeptideProperties pp = new PeptidePropertiesImpl(); 218 return pp.getMolecularWeightBasedOnXML(pSequence, aminoAcidCompositionTable); 219 } 220 221 /** 222 * An adaptor method to returns the absorbance (optical density) of sequence. The sequence argument 223 * must be a protein sequence consisting of only non-ambiguous characters. 224 * The computation of absorbance (optical density) follows the 225 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 226 * 227 * @param sequence 228 * a protein sequence consisting of non-ambiguous characters only 229 * @param assumeCysReduced 230 * true if Cys are assumed to be reduced and false if Cys are assumed to form cystines 231 * @return the absorbance (optical density) of sequence 232 */ 233 public static final double getAbsorbance(String sequence, boolean assumeCysReduced){ 234 sequence = Utils.checkSequence(sequence); 235 ProteinSequence pSequence = null; 236 try { 237 pSequence = new ProteinSequence(sequence); 238 } catch (CompoundNotFoundException e) { 239 // the sequence was checked with Utils.checkSequence, this shouldn't happen 240 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 241 } 242 IPeptideProperties pp = new PeptidePropertiesImpl(); 243 return pp.getAbsorbance(pSequence, assumeCysReduced); 244 } 245 246 /** 247 * An adaptor method to return the extinction coefficient of sequence. The sequence argument 248 * must be a protein sequence consisting of only non-ambiguous characters. 249 * The extinction coefficient indicates how much light a protein absorbs at 250 * a certain wavelength. It is useful to have an estimation of this 251 * coefficient for following a protein which a spectrophotometer when 252 * purifying it. The computation of extinction coefficient follows the 253 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 254 * 255 * @param sequence 256 * a protein sequence consisting of non-ambiguous characters only 257 * @param assumeCysReduced 258 * true if Cys are assumed to be reduced and false if Cys are 259 * assumed to form cystines 260 * @return the extinction coefficient of sequence 261 */ 262 public static final double getExtinctionCoefficient(String sequence, boolean assumeCysReduced) { 263 sequence = Utils.checkSequence(sequence); 264 ProteinSequence pSequence = null; 265 try { 266 pSequence = new ProteinSequence(sequence); 267 } catch (CompoundNotFoundException e) { 268 // the sequence was checked with Utils.checkSequence, this shouldn't happen 269 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 270 } 271 IPeptideProperties pp = new PeptidePropertiesImpl(); 272 return pp.getExtinctionCoefficient(pSequence, assumeCysReduced); 273 } 274 275 /** 276 * An adaptor method to return the instability index of sequence. The sequence argument must be 277 * a protein sequence consisting of only non-ambiguous characters. 278 * The instability index provides an estimate of the stability of your 279 * protein in a test tube. The computation of instability index follows the 280 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 281 * 282 * @param sequence 283 * a protein sequence consisting of non-ambiguous characters only 284 * @return the instability index of sequence 285 */ 286 public static final double getInstabilityIndex(String sequence) { 287 sequence = Utils.checkSequence(sequence); 288 ProteinSequence pSequence = null; 289 try { 290 pSequence = new ProteinSequence(sequence); 291 } catch (CompoundNotFoundException e) { 292 // the sequence was checked with Utils.checkSequence, this shouldn't happen 293 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 294 } 295 IPeptideProperties pp = new PeptidePropertiesImpl(); 296 return pp.getInstabilityIndex(pSequence); 297 } 298 299 /** 300 * An adaptor method to return the apliphatic index of sequence. The sequence argument must be a 301 * protein sequence consisting of only non-ambiguous characters. 302 * The aliphatic index of a protein is defined as the relative volume 303 * occupied by aliphatic side chains (alanine, valine, isoleucine, and 304 * leucine). It may be regarded as a positive factor for the increase of 305 * thermostability of globular proteins. The computation of aliphatic index 306 * follows the documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 307 * A protein whose instability index is smaller than 40 is predicted as stable, a value above 40 predicts that the protein may be unstable. 308 * 309 * @param sequence 310 * a protein sequence consisting of non-ambiguous characters only 311 * @return the aliphatic index of sequence 312 */ 313 public static final double getApliphaticIndex(String sequence) { 314 sequence = Utils.checkSequence(sequence); 315 ProteinSequence pSequence = null; 316 try { 317 pSequence = new ProteinSequence(sequence); 318 } catch (CompoundNotFoundException e) { 319 // the sequence was checked with Utils.checkSequence, this shouldn't happen 320 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 321 } 322 323 IPeptideProperties pp = new PeptidePropertiesImpl(); 324 return pp.getApliphaticIndex(pSequence); 325 } 326 327 /** 328 * An adaptor method to return the average hydropathy value of sequence. The sequence argument 329 * must be a protein sequence consisting of only non-ambiguous characters. 330 * The average value for a sequence is calculated as the sum of hydropathy 331 * values of all the amino acids, divided by the number of residues in the 332 * sequence. Hydropathy values are based on (Kyte, J. and Doolittle, R.F. 333 * (1982) A simple method for displaying the hydropathic character of a 334 * protein. J. Mol. Biol. 157, 105-132). 335 * 336 * @param sequence 337 * a protein sequence consisting of non-ambiguous characters only 338 * @return the average hydropathy value of sequence 339 */ 340 public static final double getAvgHydropathy(String sequence) { 341 sequence = Utils.checkSequence(sequence); 342 ProteinSequence pSequence = null; 343 try { 344 pSequence = new ProteinSequence(sequence); 345 } catch (CompoundNotFoundException e) { 346 // the sequence was checked with Utils.checkSequence, this shouldn't happen 347 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 348 } 349 IPeptideProperties pp = new PeptidePropertiesImpl(); 350 return pp.getAvgHydropathy(pSequence); 351 } 352 353 /** 354 * An adaptor method to return the isoelectric point of sequence. The sequence argument must be 355 * a protein sequence consisting of only non-ambiguous characters. 356 * The isoelectric point is the pH at which the protein carries no net 357 * electrical charge. The isoelectric point will be computed based on 358 * approach stated in 359 * <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a> 360 * 361 * pKa values used will be either 362 * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" 363 * OR 364 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. 365 * 366 * @param sequence 367 * a protein sequence consisting of non-ambiguous characters only 368 * @param useExpasyValues 369 * whether to use Expasy values (Default) or Innovagen values 370 * @return the isoelectric point of sequence 371 */ 372 public static final double getIsoelectricPoint(String sequence, boolean useExpasyValues) { 373 sequence = Utils.checkSequence(sequence); 374 ProteinSequence pSequence = null; 375 try { 376 pSequence = new ProteinSequence(sequence); 377 } catch (CompoundNotFoundException e) { 378 // the sequence was checked with Utils.checkSequence, this shouldn't happen 379 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 380 } 381 IPeptideProperties pp = new PeptidePropertiesImpl(); 382 return pp.getIsoelectricPoint(pSequence, useExpasyValues); 383 } 384 385 public static final double getIsoelectricPoint(String sequence){ 386 return getIsoelectricPoint(sequence, true); 387 } 388 389 /** 390 * An adaptor method to return the net charge of sequence at pH 7. The sequence argument must be 391 * a protein sequence consisting of only non-ambiguous characters. 392 * The net charge will be computed using the approach stated in 393 * <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a> 394 * 395 * pKa values used will be either 396 * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" 397 * OR 398 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. 399 * 400 * @param sequence 401 * a protein sequence consisting of non-ambiguous characters only 402 * @param useExpasyValues 403 * whether to use Expasy values (Default) or Innovagen values 404 * @param pHPoint 405 * the pH value to use for computation of the net charge. Default at 7. 406 * @return the net charge of sequence at given pHPoint 407 */ 408 public static final double getNetCharge(String sequence, boolean useExpasyValues, double pHPoint){ 409 sequence = Utils.checkSequence(sequence); 410 ProteinSequence pSequence = null; 411 try { 412 pSequence = new ProteinSequence(sequence); 413 } catch (CompoundNotFoundException e) { 414 // the sequence was checked with Utils.checkSequence, this shouldn't happen 415 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 416 } 417 IPeptideProperties pp = new PeptidePropertiesImpl(); 418 return pp.getNetCharge(pSequence, useExpasyValues, pHPoint); 419 } 420 421 public static final double getNetCharge(String sequence, boolean useExpasyValues) { 422 return getNetCharge(sequence, useExpasyValues, 7.0); 423 } 424 425 public static final double getNetCharge(String sequence){ 426 return getNetCharge(sequence, true); 427 } 428 429 /** 430 * An adaptor method to return the composition of specified amino acid in the sequence. The 431 * sequence argument must be a protein sequence consisting of only 432 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 433 * character. 434 * The composition of an amino acid is the total number of its occurrence, 435 * divided by the total length of the sequence. 436 * 437 * @param sequence 438 * a protein sequence consisting of non-ambiguous characters only 439 * @param aminoAcidCode 440 * the code of the amino acid to compute 441 * @return the composition of specified amino acid in the sequence 442 * @see SingleLetterAACode 443 */ 444 public static final double getEnrichment(String sequence, SingleLetterAACode aminoAcidCode) { 445 return getEnrichment(sequence, aminoAcidCode.toString()); 446 } 447 448 /** 449 * An adaptor method to return the composition of specified amino acid in the sequence. The 450 * sequence argument must be a protein sequence consisting of only 451 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 452 * character. 453 * The composition of an amino acid is the total number of its occurrence, 454 * divided by the total length of the sequence. 455 * 456 * @param sequence 457 * a protein sequence consisting of non-ambiguous characters only 458 * @param aminoAcidCode 459 * the code of the amino acid to compute 460 * @return the composition of specified amino acid in the sequence 461 */ 462 public static final double getEnrichment(String sequence, char aminoAcidCode){ 463 return getEnrichment(sequence, aminoAcidCode); 464 } 465 466 /** 467 * An adaptor method to return the composition of specified amino acid in the sequence. The 468 * sequence argument must be a protein sequence consisting of only 469 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 470 * character. 471 * The composition of an amino acid is the total number of its occurrence, 472 * divided by the total length of the sequence. 473 * 474 * @param sequence 475 * a protein sequence consisting of non-ambiguous characters only 476 * @param aminoAcidCode 477 * the code of the amino acid to compute 478 * @return the composition of specified amino acid in the sequence 479 */ 480 public static final double getEnrichment(String sequence, String aminoAcidCode){ 481 sequence = Utils.checkSequence(sequence); 482 ProteinSequence pSequence = null; 483 try { 484 pSequence = new ProteinSequence(sequence); 485 } catch (CompoundNotFoundException e) { 486 // the sequence was checked with Utils.checkSequence, this shouldn't happen 487 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 488 } 489 IPeptideProperties pp = new PeptidePropertiesImpl(); 490 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 491 return pp.getEnrichment(pSequence, aaSet.getCompoundForString(aminoAcidCode)); 492 } 493 494 /** 495 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 496 * The sequence argument must be a protein sequence consisting of only 497 * non-ambiguous characters. 498 * The composition of an amino acid is the total number of its occurrence, 499 * divided by the total length of the sequence. 500 * 501 * @param sequence 502 * a protein sequence consisting of non-ambiguous characters only 503 * @return the composition of the 20 standard amino acid in the sequence 504 * @see AminoAcidCompound 505 */ 506 public static final Map<AminoAcidCompound, Double> getAAComposition(String sequence) { 507 sequence = Utils.checkSequence(sequence); 508 ProteinSequence pSequence = null; 509 try { 510 pSequence = new ProteinSequence(sequence); 511 } catch (CompoundNotFoundException e) { 512 // the sequence was checked with Utils.checkSequence, this shouldn't happen 513 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 514 } 515 IPeptideProperties pp = new PeptidePropertiesImpl(); 516 return pp.getAAComposition(pSequence); 517 } 518 519 /** 520 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 521 * The sequence argument must be a protein sequence consisting of only 522 * non-ambiguous characters. 523 * The composition of an amino acid is the total number of its occurrence, 524 * divided by the total length of the sequence. 525 * 526 * @param sequence 527 * a protein sequence consisting of non-ambiguous characters only 528 * @return the composition of the 20 standard amino acid in the sequence 529 */ 530 public static final Map<String, Double> getAACompositionString(String sequence){ 531 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 532 Map<String, Double> aaString2Composition = new HashMap<String, Double>(); 533 for(AminoAcidCompound aaCompound:aa2Composition.keySet()){ 534 aaString2Composition.put(aaCompound.getShortName(), aa2Composition.get(aaCompound)); 535 } 536 return aaString2Composition; 537 } 538 539 /** 540 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 541 * The sequence argument must be a protein sequence consisting of only 542 * non-ambiguous characters. 543 * The composition of an amino acid is the total number of its occurrence, 544 * divided by the total length of the sequence. 545 * 546 * @param sequence 547 * a protein sequence consisting of non-ambiguous characters only 548 * @return the composition of the 20 standard amino acid in the sequence 549 */ 550 public static final Map<Character, Double> getAACompositionChar(String sequence){ 551 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 552 Map<Character, Double> aaChar2Composition = new HashMap<Character, Double>(); 553 for(AminoAcidCompound aaCompound:aa2Composition.keySet()){ 554 aaChar2Composition.put(aaCompound.getShortName().charAt(0), aa2Composition.get(aaCompound)); 555 } 556 return aaChar2Composition; 557 } 558 559 /** 560 * Returns the array of charges of each amino acid in a protein. At pH=7, two are negative charged: aspartic acid (Asp, D) and glutamic acid (Glu, E) (acidic side chains), 561 * and three are positive charged: lysine (Lys, K), arginine (Arg, R) and histidine (His, H) (basic side chains). 562 * 563 * @param sequence 564 * a protein sequence consisting of non-ambiguous characters only 565 * @return the array of charges of amino acids in the protein (1 if amino acid is positively charged, -1 if negatively charged, 0 if not charged) 566 */ 567 public static final int[] getChargesOfAminoAcids(String sequence) { 568 int[] charges = new int[sequence.length()]; 569 for ( int i=0; i < sequence.length(); i++ ) { 570 char aa = sequence.toCharArray()[i]; 571 charges[i] = AminoAcidProperties.getChargeOfAminoAcid(aa); 572 } 573 return charges; 574 } 575 576 /** 577 * Returns the array of polarity values of each amino acid in a protein sequence. 578 * 579 * @param sequence 580 * a protein sequence consisting of non-ambiguous characters only 581 * @return the array of polarity of amino acids in the protein (1 if amino acid is polar, 0 if not) 582 */ 583 public static final int[] getPolarityOfAminoAcids(String sequence) { 584 int[] polarity = new int[sequence.length()]; 585 for ( int i=0; i < sequence.length(); i++ ) { 586 char aa = sequence.toCharArray()[i]; 587 polarity[i] = AminoAcidProperties.getPolarityOfAminoAcid(aa); 588 } 589 return polarity; 590 } 591}