001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.aaproperties; 022 023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable; 024import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import jakarta.xml.bind.JAXBException; 032import java.io.File; 033import java.io.FileNotFoundException; 034import java.util.Arrays; 035import java.util.HashMap; 036import java.util.HashSet; 037import java.util.Map; 038import java.util.Set; 039import java.util.stream.Collectors; 040 041/** 042 * This is an adaptor class which enable the ease of generating protein properties. 043 * At least one adaptor method is written for each available properties provided in IPeptideProperties. 044 * 045 * @author kohchuanhock 046 * @version 2011.08.22 047 * @since 3.0.2 048 * @see IPeptideProperties 049 * @see PeptidePropertiesImpl 050 */ 051public class PeptideProperties { 052 053 private final static Logger logger = LoggerFactory.getLogger(PeptideProperties.class); 054 055 /** 056 * Enumeration of 20 standard amino acid code 057 */ 058 public enum SingleLetterAACode { W, C, M, H, Y, F, Q, N, I, R, D, P, T, K, E, V, S, G, A, L} 059 060 /** 061 * Contains the 20 standard AA code in a set 062 */ 063 public static Set<Character> standardAASet; 064 065 /** 066 * To initialize the standardAASet 067 */ 068 static{ 069 standardAASet = Arrays.stream(SingleLetterAACode.values()) 070 .map(singleLetterAACode -> singleLetterAACode.toString().charAt(0)) 071 .collect(Collectors.toCollection(HashSet::new)); 072 } 073 074 /** 075 * An adaptor method to return the molecular weight of sequence. 076 * The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 077 * This method will sum the molecular weight of each amino acid in the 078 * sequence. Molecular weights are based on <a href="http://web.expasy.org/findmod/findmod_masses.html">here</a>. 079 * 080 * @param sequence 081 * a protein sequence consisting of non-ambiguous characters only 082 * @return the total molecular weight of sequence + weight of water molecule 083 */ 084 public static final double getMolecularWeight(String sequence){ 085 sequence = Utils.checkSequence(sequence); 086 ProteinSequence pSequence = null; 087 try { 088 pSequence = new ProteinSequence(sequence); 089 } catch (CompoundNotFoundException e) { 090 // the sequence was checked with Utils.checkSequence, this shouldn't happen 091 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 092 } 093 IPeptideProperties pp = new PeptidePropertiesImpl(); 094 return pp.getMolecularWeight(pSequence); 095 } 096 097 /** 098 * An adaptor method to return the molecular weight of sequence. 099 * The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 100 * This method will sum the molecular weight of each amino acid in the 101 * sequence. Molecular weights are based on the input xml file. 102 * 103 * @param sequence 104 * a protein sequence consisting of non-ambiguous characters only 105 * @param elementMassFile 106 * xml file that details the mass of each elements and isotopes 107 * @param aminoAcidCompositionFile 108 * xml file that details the composition of amino acids 109 * @return the total molecular weight of sequence + weight of water molecule 110 * @throws FileNotFoundException 111 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 112 * @throws JAXBException 113 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 114 */ 115 public static final double getMolecularWeight(String sequence, File elementMassFile, File aminoAcidCompositionFile) 116 throws FileNotFoundException, JAXBException{ 117 sequence = Utils.checkSequence(sequence); 118 ProteinSequence pSequence = null; 119 try { 120 pSequence = new ProteinSequence(sequence); 121 } catch (CompoundNotFoundException e) { 122 // the sequence was checked with Utils.checkSequence, this shouldn't happen 123 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 124 } 125 IPeptideProperties pp = new PeptidePropertiesImpl(); 126 return pp.getMolecularWeight(pSequence, elementMassFile, aminoAcidCompositionFile); 127 } 128 129 /** 130 * An adaptor method to return the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 131 * This method will sum the molecular weight of each amino acid in the 132 * sequence. Molecular weights are based on the input files. These input files must be XML using the defined schema. 133 * Note that it assumes that ElementMass.xml file can be found in default location. 134 * 135 * @param sequence 136 * a protein sequence consisting of non-ambiguous characters only 137 * xml file that details the mass of each elements and isotopes 138 * @param aminoAcidCompositionFile 139 * xml file that details the composition of amino acids 140 * @return the total molecular weight of sequence + weight of water molecule 141 * @throws JAXBException 142 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 143 * @throws FileNotFoundException 144 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 145 */ 146 public static final double getMolecularWeight(String sequence, File aminoAcidCompositionFile) throws FileNotFoundException, JAXBException{ 147 sequence = Utils.checkSequence(sequence); 148 ProteinSequence pSequence = null; 149 try { 150 pSequence = new ProteinSequence(sequence); 151 } catch (CompoundNotFoundException e) { 152 // the sequence was checked with Utils.checkSequence, this shouldn't happen 153 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 154 } 155 IPeptideProperties pp = new PeptidePropertiesImpl(); 156 return pp.getMolecularWeight(pSequence, aminoAcidCompositionFile); 157 } 158 159 /** 160 * An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to 161 * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). 162 * Note that ElementMass.xml is assumed to be able to be seen in default location. 163 * 164 * @param aminoAcidCompositionFile 165 * xml file that details the composition of amino acids 166 * @return the initialized amino acid composition table 167 * @throws JAXBException 168 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 169 * @throws FileNotFoundException 170 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 171 */ 172 public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile) 173 throws JAXBException, FileNotFoundException{ 174 IPeptideProperties pp = new PeptidePropertiesImpl(); 175 return pp.obtainAminoAcidCompositionTable(aminoAcidCompositionFile); 176 } 177 178 /** 179 * An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to 180 * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). 181 * 182 * @param elementMassFile 183 * xml file that details the mass of each elements and isotopes 184 * @param aminoAcidCompositionFile 185 * xml file that details the composition of amino acids 186 * @return the initialized amino acid composition table 187 * @throws JAXBException 188 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 189 * @throws FileNotFoundException 190 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 191 */ 192 public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile) 193 throws JAXBException, FileNotFoundException{ 194 IPeptideProperties pp = new PeptidePropertiesImpl(); 195 return pp.obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile); 196 } 197 198 /** 199 * An adaptor method that returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 200 * This method will sum the molecular weight of each amino acid in the 201 * sequence. Molecular weights are based on the AminoAcidCompositionTable. 202 * Those input files must be XML using the defined schema. 203 * 204 * @param sequence 205 * a protein sequence consisting of non-ambiguous characters only 206 * @param aminoAcidCompositionTable 207 * a amino acid composition table obtained by calling IPeptideProperties.obtainAminoAcidCompositionTable 208 * @return the total molecular weight of sequence + weight of water molecule 209 * thrown if the method IPeptideProperties.setMolecularWeightXML(File, File) is not successfully called before calling this method. 210 */ 211 public static double getMolecularWeightBasedOnXML(String sequence, AminoAcidCompositionTable aminoAcidCompositionTable){ 212 sequence = Utils.checkSequence(sequence, aminoAcidCompositionTable.getSymbolSet()); 213 ProteinSequence pSequence = null; 214 try { 215 pSequence = new ProteinSequence(sequence, aminoAcidCompositionTable.getAminoAcidCompoundSet()); 216 } catch (CompoundNotFoundException e) { 217 // the sequence was checked with Utils.checkSequence, this shouldn't happen 218 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 219 } 220 IPeptideProperties pp = new PeptidePropertiesImpl(); 221 return pp.getMolecularWeightBasedOnXML(pSequence, aminoAcidCompositionTable); 222 } 223 224 /** 225 * An adaptor method to returns the absorbance (optical density) of sequence. The sequence argument 226 * must be a protein sequence consisting of only non-ambiguous characters. 227 * The computation of absorbance (optical density) follows the 228 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 229 * 230 * @param sequence 231 * a protein sequence consisting of non-ambiguous characters only 232 * @param assumeCysReduced 233 * true if Cys are assumed to be reduced and false if Cys are assumed to form cystines 234 * @return the absorbance (optical density) of sequence 235 */ 236 public static final double getAbsorbance(String sequence, boolean assumeCysReduced){ 237 sequence = Utils.checkSequence(sequence); 238 ProteinSequence pSequence = null; 239 try { 240 pSequence = new ProteinSequence(sequence); 241 } catch (CompoundNotFoundException e) { 242 // the sequence was checked with Utils.checkSequence, this shouldn't happen 243 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 244 } 245 IPeptideProperties pp = new PeptidePropertiesImpl(); 246 return pp.getAbsorbance(pSequence, assumeCysReduced); 247 } 248 249 /** 250 * An adaptor method to return the extinction coefficient of sequence. The sequence argument 251 * must be a protein sequence consisting of only non-ambiguous characters. 252 * The extinction coefficient indicates how much light a protein absorbs at 253 * a certain wavelength. It is useful to have an estimation of this 254 * coefficient for following a protein which a spectrophotometer when 255 * purifying it. The computation of extinction coefficient follows the 256 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 257 * 258 * @param sequence 259 * a protein sequence consisting of non-ambiguous characters only 260 * @param assumeCysReduced 261 * true if Cys are assumed to be reduced and false if Cys are 262 * assumed to form cystines 263 * @return the extinction coefficient of sequence 264 */ 265 public static final double getExtinctionCoefficient(String sequence, boolean assumeCysReduced) { 266 sequence = Utils.checkSequence(sequence); 267 ProteinSequence pSequence = null; 268 try { 269 pSequence = new ProteinSequence(sequence); 270 } catch (CompoundNotFoundException e) { 271 // the sequence was checked with Utils.checkSequence, this shouldn't happen 272 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 273 } 274 IPeptideProperties pp = new PeptidePropertiesImpl(); 275 return pp.getExtinctionCoefficient(pSequence, assumeCysReduced); 276 } 277 278 /** 279 * An adaptor method to return the instability index of sequence. The sequence argument must be 280 * a protein sequence consisting of only non-ambiguous characters. 281 * The instability index provides an estimate of the stability of your 282 * protein in a test tube. The computation of instability index follows the 283 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 284 * 285 * @param sequence 286 * a protein sequence consisting of non-ambiguous characters only 287 * @return the instability index of sequence 288 */ 289 public static final double getInstabilityIndex(String sequence) { 290 sequence = Utils.checkSequence(sequence); 291 ProteinSequence pSequence = null; 292 try { 293 pSequence = new ProteinSequence(sequence); 294 } catch (CompoundNotFoundException e) { 295 // the sequence was checked with Utils.checkSequence, this shouldn't happen 296 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 297 } 298 IPeptideProperties pp = new PeptidePropertiesImpl(); 299 return pp.getInstabilityIndex(pSequence); 300 } 301 302 /** 303 * An adaptor method to return the apliphatic index of sequence. The sequence argument must be a 304 * protein sequence consisting of only non-ambiguous characters. 305 * The aliphatic index of a protein is defined as the relative volume 306 * occupied by aliphatic side chains (alanine, valine, isoleucine, and 307 * leucine). It may be regarded as a positive factor for the increase of 308 * thermostability of globular proteins. The computation of aliphatic index 309 * follows the documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 310 * A protein whose instability index is smaller than 40 is predicted as stable, a value above 40 predicts that the protein may be unstable. 311 * 312 * @param sequence 313 * a protein sequence consisting of non-ambiguous characters only 314 * @return the aliphatic index of sequence 315 */ 316 public static final double getApliphaticIndex(String sequence) { 317 sequence = Utils.checkSequence(sequence); 318 ProteinSequence pSequence = null; 319 try { 320 pSequence = new ProteinSequence(sequence); 321 } catch (CompoundNotFoundException e) { 322 // the sequence was checked with Utils.checkSequence, this shouldn't happen 323 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 324 } 325 326 IPeptideProperties pp = new PeptidePropertiesImpl(); 327 return pp.getApliphaticIndex(pSequence); 328 } 329 330 /** 331 * An adaptor method to return the average hydropathy value of sequence. The sequence argument 332 * must be a protein sequence consisting of only non-ambiguous characters. 333 * The average value for a sequence is calculated as the sum of hydropathy 334 * values of all the amino acids, divided by the number of residues in the 335 * sequence. Hydropathy values are based on (Kyte, J. and Doolittle, R.F. 336 * (1982) A simple method for displaying the hydropathic character of a 337 * protein. J. Mol. Biol. 157, 105-132). 338 * 339 * @param sequence 340 * a protein sequence consisting of non-ambiguous characters only 341 * @return the average hydropathy value of sequence 342 */ 343 public static final double getAvgHydropathy(String sequence) { 344 sequence = Utils.checkSequence(sequence); 345 ProteinSequence pSequence = null; 346 try { 347 pSequence = new ProteinSequence(sequence); 348 } catch (CompoundNotFoundException e) { 349 // the sequence was checked with Utils.checkSequence, this shouldn't happen 350 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 351 } 352 IPeptideProperties pp = new PeptidePropertiesImpl(); 353 return pp.getAvgHydropathy(pSequence); 354 } 355 356 /** 357 * An adaptor method to return the isoelectric point of sequence. The sequence argument must be 358 * a protein sequence consisting of only non-ambiguous characters. 359 * The isoelectric point is the pH at which the protein carries no net 360 * electrical charge. The isoelectric point will be computed based on 361 * approach stated in 362 * <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a> 363 * 364 * pKa values used will be either 365 * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" 366 * OR 367 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. 368 * 369 * @param sequence 370 * a protein sequence consisting of non-ambiguous characters only 371 * @param useExpasyValues 372 * whether to use Expasy values (Default) or Innovagen values 373 * @return the isoelectric point of sequence 374 */ 375 public static final double getIsoelectricPoint(String sequence, boolean useExpasyValues) { 376 sequence = Utils.checkSequence(sequence); 377 ProteinSequence pSequence = null; 378 try { 379 pSequence = new ProteinSequence(sequence); 380 } catch (CompoundNotFoundException e) { 381 // the sequence was checked with Utils.checkSequence, this shouldn't happen 382 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 383 } 384 IPeptideProperties pp = new PeptidePropertiesImpl(); 385 return pp.getIsoelectricPoint(pSequence, useExpasyValues); 386 } 387 388 public static final double getIsoelectricPoint(String sequence){ 389 return getIsoelectricPoint(sequence, true); 390 } 391 392 /** 393 * An adaptor method to return the net charge of sequence at pH 7. The sequence argument must be 394 * a protein sequence consisting of only non-ambiguous characters. 395 * The net charge will be computed using the approach stated in 396 * <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a> 397 * 398 * pKa values used will be either 399 * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" 400 * OR 401 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. 402 * 403 * @param sequence 404 * a protein sequence consisting of non-ambiguous characters only 405 * @param useExpasyValues 406 * whether to use Expasy values (Default) or Innovagen values 407 * @param pHPoint 408 * the pH value to use for computation of the net charge. Default at 7. 409 * @return the net charge of sequence at given pHPoint 410 */ 411 public static final double getNetCharge(String sequence, boolean useExpasyValues, double pHPoint){ 412 sequence = Utils.checkSequence(sequence); 413 ProteinSequence pSequence = null; 414 try { 415 pSequence = new ProteinSequence(sequence); 416 } catch (CompoundNotFoundException e) { 417 // the sequence was checked with Utils.checkSequence, this shouldn't happen 418 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 419 } 420 IPeptideProperties pp = new PeptidePropertiesImpl(); 421 return pp.getNetCharge(pSequence, useExpasyValues, pHPoint); 422 } 423 424 public static final double getNetCharge(String sequence, boolean useExpasyValues) { 425 return getNetCharge(sequence, useExpasyValues, 7.0); 426 } 427 428 public static final double getNetCharge(String sequence){ 429 return getNetCharge(sequence, true); 430 } 431 432 /** 433 * An adaptor method to return the composition of specified amino acid in the sequence. The 434 * sequence argument must be a protein sequence consisting of only 435 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 436 * character. 437 * The composition of an amino acid is the total number of its occurrence, 438 * divided by the total length of the sequence. 439 * 440 * @param sequence 441 * a protein sequence consisting of non-ambiguous characters only 442 * @param aminoAcidCode 443 * the code of the amino acid to compute 444 * @return the composition of specified amino acid in the sequence 445 * @see SingleLetterAACode 446 */ 447 public static final double getEnrichment(String sequence, SingleLetterAACode aminoAcidCode) { 448 return getEnrichment(sequence, aminoAcidCode.toString()); 449 } 450 451 /** 452 * An adaptor method to return the composition of specified amino acid in the sequence. The 453 * sequence argument must be a protein sequence consisting of only 454 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 455 * character. 456 * The composition of an amino acid is the total number of its occurrence, 457 * divided by the total length of the sequence. 458 * 459 * @param sequence 460 * a protein sequence consisting of non-ambiguous characters only 461 * @param aminoAcidCode 462 * the code of the amino acid to compute 463 * @return the composition of specified amino acid in the sequence 464 */ 465 public static final double getEnrichment(String sequence, char aminoAcidCode){ 466 return getEnrichment(sequence, aminoAcidCode); 467 } 468 469 /** 470 * An adaptor method to return the composition of specified amino acid in the sequence. The 471 * sequence argument must be a protein sequence consisting of only 472 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 473 * character. 474 * The composition of an amino acid is the total number of its occurrence, 475 * divided by the total length of the sequence. 476 * 477 * @param sequence 478 * a protein sequence consisting of non-ambiguous characters only 479 * @param aminoAcidCode 480 * the code of the amino acid to compute 481 * @return the composition of specified amino acid in the sequence 482 */ 483 public static final double getEnrichment(String sequence, String aminoAcidCode){ 484 sequence = Utils.checkSequence(sequence); 485 ProteinSequence pSequence = null; 486 try { 487 pSequence = new ProteinSequence(sequence); 488 } catch (CompoundNotFoundException e) { 489 // the sequence was checked with Utils.checkSequence, this shouldn't happen 490 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 491 } 492 IPeptideProperties pp = new PeptidePropertiesImpl(); 493 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 494 return pp.getEnrichment(pSequence, aaSet.getCompoundForString(aminoAcidCode)); 495 } 496 497 /** 498 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 499 * The sequence argument must be a protein sequence consisting of only 500 * non-ambiguous characters. 501 * The composition of an amino acid is the total number of its occurrence, 502 * divided by the total length of the sequence. 503 * 504 * @param sequence 505 * a protein sequence consisting of non-ambiguous characters only 506 * @return the composition of the 20 standard amino acid in the sequence 507 * @see AminoAcidCompound 508 */ 509 public static final Map<AminoAcidCompound, Double> getAAComposition(String sequence) { 510 sequence = Utils.checkSequence(sequence); 511 ProteinSequence pSequence = null; 512 try { 513 pSequence = new ProteinSequence(sequence); 514 } catch (CompoundNotFoundException e) { 515 // the sequence was checked with Utils.checkSequence, this shouldn't happen 516 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 517 } 518 IPeptideProperties pp = new PeptidePropertiesImpl(); 519 return pp.getAAComposition(pSequence); 520 } 521 522 /** 523 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 524 * The sequence argument must be a protein sequence consisting of only 525 * non-ambiguous characters. 526 * The composition of an amino acid is the total number of its occurrence, 527 * divided by the total length of the sequence. 528 * 529 * @param sequence 530 * a protein sequence consisting of non-ambiguous characters only 531 * @return the composition of the 20 standard amino acid in the sequence 532 */ 533 public static final Map<String, Double> getAACompositionString(String sequence){ 534 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 535 Map<String, Double> aaString2Composition = new HashMap<>(); 536 aaString2Composition = aa2Composition.keySet().stream() .collect(Collectors.toMap(aaCompound -> aaCompound.getShortName(),aaCompound ->aa2Composition.get(aaCompound))); 537 return aaString2Composition; 538 } 539 540 /** 541 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 542 * The sequence argument must be a protein sequence consisting of only 543 * non-ambiguous characters. 544 * The composition of an amino acid is the total number of its occurrence, 545 * divided by the total length of the sequence. 546 * 547 * @param sequence 548 * a protein sequence consisting of non-ambiguous characters only 549 * @return the composition of the 20 standard amino acid in the sequence 550 */ 551 public static final Map<Character, Double> getAACompositionChar(String sequence){ 552 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 553 Map<Character, Double> aaChar2Composition = new HashMap<>(); 554 for(AminoAcidCompound aaCompound:aa2Composition.keySet()){ 555 aaChar2Composition.put(aaCompound.getShortName().charAt(0), aa2Composition.get(aaCompound)); 556 } 557 return aaChar2Composition; 558 } 559 560 /** 561 * Returns the array of charges of each amino acid in a protein. At pH=7, two are negative charged: aspartic acid (Asp, D) and glutamic acid (Glu, E) (acidic side chains), 562 * and three are positive charged: lysine (Lys, K), arginine (Arg, R) and histidine (His, H) (basic side chains). 563 * 564 * @param sequence 565 * a protein sequence consisting of non-ambiguous characters only 566 * @return the array of charges of amino acids in the protein (1 if amino acid is positively charged, -1 if negatively charged, 0 if not charged) 567 */ 568 public static final int[] getChargesOfAminoAcids(String sequence) { 569 int[] charges = new int[sequence.length()]; 570 for ( int i=0; i < sequence.length(); i++ ) { 571 char aa = sequence.toCharArray()[i]; 572 charges[i] = AminoAcidProperties.getChargeOfAminoAcid(aa); 573 } 574 return charges; 575 } 576 577 /** 578 * Returns the array of polarity values of each amino acid in a protein sequence. 579 * 580 * @param sequence 581 * a protein sequence consisting of non-ambiguous characters only 582 * @return the array of polarity of amino acids in the protein (1 if amino acid is polar, 0 if not) 583 */ 584 public static final int[] getPolarityOfAminoAcids(String sequence) { 585 int[] polarity = new int[sequence.length()]; 586 for ( int i=0; i < sequence.length(); i++ ) { 587 char aa = sequence.toCharArray()[i]; 588 polarity[i] = AminoAcidProperties.getPolarityOfAminoAcid(aa); 589 } 590 return polarity; 591 } 592 593 /** 594 * An adaptor method to return the aromaticity value of sequence. The sequence argument 595 * must be a protein sequence consisting of only non-ambiguous characters. 596 * <p> 597 * Calculates the aromaticity value of a protein according to Lobry, 1994. 598 * It is simply the relative frequency of Phe+Trp+Tyr. 599 * * 600 * 601 * @param sequence a protein sequence consisting of non-ambiguous characters only 602 * @return the aromaticity value of sequence 603 */ 604 public static final double getAromaticity(String sequence) { 605 sequence = Utils.checkSequence(sequence); 606 ProteinSequence pSequence = null; 607 try { 608 pSequence = new ProteinSequence(sequence); 609 } catch (CompoundNotFoundException e) { 610 // the sequence was checked with Utils.checkSequence, this shouldn't happen 611 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 612 } 613 IPeptideProperties pp = new PeptidePropertiesImpl(); 614 return pp.getAromaticity(pSequence); 615 } 616}