001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.aaproperties; 022 023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable; 024import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import javax.xml.bind.JAXBException; 032import java.io.File; 033import java.io.FileNotFoundException; 034import java.util.Arrays; 035import java.util.HashMap; 036import java.util.HashSet; 037import java.util.Map; 038import java.util.Set; 039import java.util.stream.Collectors; 040import java.util.stream.Stream; 041 042/** 043 * This is an adaptor class which enable the ease of generating protein properties. 044 * At least one adaptor method is written for each available properties provided in IPeptideProperties. 045 * 046 * @author kohchuanhock 047 * @version 2011.08.22 048 * @since 3.0.2 049 * @see IPeptideProperties 050 * @see PeptidePropertiesImpl 051 */ 052public class PeptideProperties { 053 054 private final static Logger logger = LoggerFactory.getLogger(PeptideProperties.class); 055 056 /** 057 * Enumeration of 20 standard amino acid code 058 */ 059 public enum SingleLetterAACode { W, C, M, H, Y, F, Q, N, I, R, D, P, T, K, E, V, S, G, A, L} 060 061 /** 062 * Contains the 20 standard AA code in a set 063 */ 064 public static Set<Character> standardAASet; 065 066 /** 067 * To initialize the standardAASet 068 */ 069 static{ 070 standardAASet = Arrays.stream(SingleLetterAACode.values()) 071 .map(singleLetterAACode -> singleLetterAACode.toString().charAt(0)) 072 .collect(Collectors.toCollection(HashSet::new)); 073 } 074 075 /** 076 * An adaptor method to return the molecular weight of sequence. 077 * The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 078 * This method will sum the molecular weight of each amino acid in the 079 * sequence. Molecular weights are based on <a href="http://web.expasy.org/findmod/findmod_masses.html">here</a>. 080 * 081 * @param sequence 082 * a protein sequence consisting of non-ambiguous characters only 083 * @return the total molecular weight of sequence + weight of water molecule 084 */ 085 public static final double getMolecularWeight(String sequence){ 086 sequence = Utils.checkSequence(sequence); 087 ProteinSequence pSequence = null; 088 try { 089 pSequence = new ProteinSequence(sequence); 090 } catch (CompoundNotFoundException e) { 091 // the sequence was checked with Utils.checkSequence, this shouldn't happen 092 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 093 } 094 IPeptideProperties pp = new PeptidePropertiesImpl(); 095 return pp.getMolecularWeight(pSequence); 096 } 097 098 /** 099 * An adaptor method to return the molecular weight of sequence. 100 * The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 101 * This method will sum the molecular weight of each amino acid in the 102 * sequence. Molecular weights are based on the input xml file. 103 * 104 * @param sequence 105 * a protein sequence consisting of non-ambiguous characters only 106 * @param elementMassFile 107 * xml file that details the mass of each elements and isotopes 108 * @param aminoAcidCompositionFile 109 * xml file that details the composition of amino acids 110 * @return the total molecular weight of sequence + weight of water molecule 111 * @throws FileNotFoundException 112 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 113 * @throws JAXBException 114 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 115 */ 116 public static final double getMolecularWeight(String sequence, File elementMassFile, File aminoAcidCompositionFile) 117 throws FileNotFoundException, JAXBException{ 118 sequence = Utils.checkSequence(sequence); 119 ProteinSequence pSequence = null; 120 try { 121 pSequence = new ProteinSequence(sequence); 122 } catch (CompoundNotFoundException e) { 123 // the sequence was checked with Utils.checkSequence, this shouldn't happen 124 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 125 } 126 IPeptideProperties pp = new PeptidePropertiesImpl(); 127 return pp.getMolecularWeight(pSequence, elementMassFile, aminoAcidCompositionFile); 128 } 129 130 /** 131 * An adaptor method to return the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 132 * This method will sum the molecular weight of each amino acid in the 133 * sequence. Molecular weights are based on the input files. These input files must be XML using the defined schema. 134 * Note that it assumes that ElementMass.xml file can be found in default location. 135 * 136 * @param sequence 137 * a protein sequence consisting of non-ambiguous characters only 138 * xml file that details the mass of each elements and isotopes 139 * @param aminoAcidCompositionFile 140 * xml file that details the composition of amino acids 141 * @return the total molecular weight of sequence + weight of water molecule 142 * @throws JAXBException 143 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 144 * @throws FileNotFoundException 145 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 146 */ 147 public static final double getMolecularWeight(String sequence, File aminoAcidCompositionFile) throws FileNotFoundException, JAXBException{ 148 sequence = Utils.checkSequence(sequence); 149 ProteinSequence pSequence = null; 150 try { 151 pSequence = new ProteinSequence(sequence); 152 } catch (CompoundNotFoundException e) { 153 // the sequence was checked with Utils.checkSequence, this shouldn't happen 154 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 155 } 156 IPeptideProperties pp = new PeptidePropertiesImpl(); 157 return pp.getMolecularWeight(pSequence, aminoAcidCompositionFile); 158 } 159 160 /** 161 * An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to 162 * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). 163 * Note that ElementMass.xml is assumed to be able to be seen in default location. 164 * 165 * @param aminoAcidCompositionFile 166 * xml file that details the composition of amino acids 167 * @return the initialized amino acid composition table 168 * @throws JAXBException 169 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 170 * @throws FileNotFoundException 171 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 172 */ 173 public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile) 174 throws JAXBException, FileNotFoundException{ 175 IPeptideProperties pp = new PeptidePropertiesImpl(); 176 return pp.obtainAminoAcidCompositionTable(aminoAcidCompositionFile); 177 } 178 179 /** 180 * An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to 181 * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). 182 * 183 * @param elementMassFile 184 * xml file that details the mass of each elements and isotopes 185 * @param aminoAcidCompositionFile 186 * xml file that details the composition of amino acids 187 * @return the initialized amino acid composition table 188 * @throws JAXBException 189 * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile 190 * @throws FileNotFoundException 191 * thrown if either elementMassFile or aminoAcidCompositionFile are not found 192 */ 193 public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile) 194 throws JAXBException, FileNotFoundException{ 195 IPeptideProperties pp = new PeptidePropertiesImpl(); 196 return pp.obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile); 197 } 198 199 /** 200 * An adaptor method that returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. 201 * This method will sum the molecular weight of each amino acid in the 202 * sequence. Molecular weights are based on the AminoAcidCompositionTable. 203 * Those input files must be XML using the defined schema. 204 * 205 * @param sequence 206 * a protein sequence consisting of non-ambiguous characters only 207 * @param aminoAcidCompositionTable 208 * a amino acid composition table obtained by calling IPeptideProperties.obtainAminoAcidCompositionTable 209 * @return the total molecular weight of sequence + weight of water molecule 210 * thrown if the method IPeptideProperties.setMolecularWeightXML(File, File) is not successfully called before calling this method. 211 */ 212 public static double getMolecularWeightBasedOnXML(String sequence, AminoAcidCompositionTable aminoAcidCompositionTable){ 213 sequence = Utils.checkSequence(sequence, aminoAcidCompositionTable.getSymbolSet()); 214 ProteinSequence pSequence = null; 215 try { 216 pSequence = new ProteinSequence(sequence, aminoAcidCompositionTable.getAminoAcidCompoundSet()); 217 } catch (CompoundNotFoundException e) { 218 // the sequence was checked with Utils.checkSequence, this shouldn't happen 219 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 220 } 221 IPeptideProperties pp = new PeptidePropertiesImpl(); 222 return pp.getMolecularWeightBasedOnXML(pSequence, aminoAcidCompositionTable); 223 } 224 225 /** 226 * An adaptor method to returns the absorbance (optical density) of sequence. The sequence argument 227 * must be a protein sequence consisting of only non-ambiguous characters. 228 * The computation of absorbance (optical density) follows the 229 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 230 * 231 * @param sequence 232 * a protein sequence consisting of non-ambiguous characters only 233 * @param assumeCysReduced 234 * true if Cys are assumed to be reduced and false if Cys are assumed to form cystines 235 * @return the absorbance (optical density) of sequence 236 */ 237 public static final double getAbsorbance(String sequence, boolean assumeCysReduced){ 238 sequence = Utils.checkSequence(sequence); 239 ProteinSequence pSequence = null; 240 try { 241 pSequence = new ProteinSequence(sequence); 242 } catch (CompoundNotFoundException e) { 243 // the sequence was checked with Utils.checkSequence, this shouldn't happen 244 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 245 } 246 IPeptideProperties pp = new PeptidePropertiesImpl(); 247 return pp.getAbsorbance(pSequence, assumeCysReduced); 248 } 249 250 /** 251 * An adaptor method to return the extinction coefficient of sequence. The sequence argument 252 * must be a protein sequence consisting of only non-ambiguous characters. 253 * The extinction coefficient indicates how much light a protein absorbs at 254 * a certain wavelength. It is useful to have an estimation of this 255 * coefficient for following a protein which a spectrophotometer when 256 * purifying it. The computation of extinction coefficient follows the 257 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 258 * 259 * @param sequence 260 * a protein sequence consisting of non-ambiguous characters only 261 * @param assumeCysReduced 262 * true if Cys are assumed to be reduced and false if Cys are 263 * assumed to form cystines 264 * @return the extinction coefficient of sequence 265 */ 266 public static final double getExtinctionCoefficient(String sequence, boolean assumeCysReduced) { 267 sequence = Utils.checkSequence(sequence); 268 ProteinSequence pSequence = null; 269 try { 270 pSequence = new ProteinSequence(sequence); 271 } catch (CompoundNotFoundException e) { 272 // the sequence was checked with Utils.checkSequence, this shouldn't happen 273 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 274 } 275 IPeptideProperties pp = new PeptidePropertiesImpl(); 276 return pp.getExtinctionCoefficient(pSequence, assumeCysReduced); 277 } 278 279 /** 280 * An adaptor method to return the instability index of sequence. The sequence argument must be 281 * a protein sequence consisting of only non-ambiguous characters. 282 * The instability index provides an estimate of the stability of your 283 * protein in a test tube. The computation of instability index follows the 284 * documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 285 * 286 * @param sequence 287 * a protein sequence consisting of non-ambiguous characters only 288 * @return the instability index of sequence 289 */ 290 public static final double getInstabilityIndex(String sequence) { 291 sequence = Utils.checkSequence(sequence); 292 ProteinSequence pSequence = null; 293 try { 294 pSequence = new ProteinSequence(sequence); 295 } catch (CompoundNotFoundException e) { 296 // the sequence was checked with Utils.checkSequence, this shouldn't happen 297 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 298 } 299 IPeptideProperties pp = new PeptidePropertiesImpl(); 300 return pp.getInstabilityIndex(pSequence); 301 } 302 303 /** 304 * An adaptor method to return the apliphatic index of sequence. The sequence argument must be a 305 * protein sequence consisting of only non-ambiguous characters. 306 * The aliphatic index of a protein is defined as the relative volume 307 * occupied by aliphatic side chains (alanine, valine, isoleucine, and 308 * leucine). It may be regarded as a positive factor for the increase of 309 * thermostability of globular proteins. The computation of aliphatic index 310 * follows the documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>. 311 * A protein whose instability index is smaller than 40 is predicted as stable, a value above 40 predicts that the protein may be unstable. 312 * 313 * @param sequence 314 * a protein sequence consisting of non-ambiguous characters only 315 * @return the aliphatic index of sequence 316 */ 317 public static final double getApliphaticIndex(String sequence) { 318 sequence = Utils.checkSequence(sequence); 319 ProteinSequence pSequence = null; 320 try { 321 pSequence = new ProteinSequence(sequence); 322 } catch (CompoundNotFoundException e) { 323 // the sequence was checked with Utils.checkSequence, this shouldn't happen 324 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 325 } 326 327 IPeptideProperties pp = new PeptidePropertiesImpl(); 328 return pp.getApliphaticIndex(pSequence); 329 } 330 331 /** 332 * An adaptor method to return the average hydropathy value of sequence. The sequence argument 333 * must be a protein sequence consisting of only non-ambiguous characters. 334 * The average value for a sequence is calculated as the sum of hydropathy 335 * values of all the amino acids, divided by the number of residues in the 336 * sequence. Hydropathy values are based on (Kyte, J. and Doolittle, R.F. 337 * (1982) A simple method for displaying the hydropathic character of a 338 * protein. J. Mol. Biol. 157, 105-132). 339 * 340 * @param sequence 341 * a protein sequence consisting of non-ambiguous characters only 342 * @return the average hydropathy value of sequence 343 */ 344 public static final double getAvgHydropathy(String sequence) { 345 sequence = Utils.checkSequence(sequence); 346 ProteinSequence pSequence = null; 347 try { 348 pSequence = new ProteinSequence(sequence); 349 } catch (CompoundNotFoundException e) { 350 // the sequence was checked with Utils.checkSequence, this shouldn't happen 351 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 352 } 353 IPeptideProperties pp = new PeptidePropertiesImpl(); 354 return pp.getAvgHydropathy(pSequence); 355 } 356 357 /** 358 * An adaptor method to return the isoelectric point of sequence. The sequence argument must be 359 * a protein sequence consisting of only non-ambiguous characters. 360 * The isoelectric point is the pH at which the protein carries no net 361 * electrical charge. The isoelectric point will be computed based on 362 * approach stated in 363 * <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a> 364 * 365 * pKa values used will be either 366 * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" 367 * OR 368 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. 369 * 370 * @param sequence 371 * a protein sequence consisting of non-ambiguous characters only 372 * @param useExpasyValues 373 * whether to use Expasy values (Default) or Innovagen values 374 * @return the isoelectric point of sequence 375 */ 376 public static final double getIsoelectricPoint(String sequence, boolean useExpasyValues) { 377 sequence = Utils.checkSequence(sequence); 378 ProteinSequence pSequence = null; 379 try { 380 pSequence = new ProteinSequence(sequence); 381 } catch (CompoundNotFoundException e) { 382 // the sequence was checked with Utils.checkSequence, this shouldn't happen 383 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 384 } 385 IPeptideProperties pp = new PeptidePropertiesImpl(); 386 return pp.getIsoelectricPoint(pSequence, useExpasyValues); 387 } 388 389 public static final double getIsoelectricPoint(String sequence){ 390 return getIsoelectricPoint(sequence, true); 391 } 392 393 /** 394 * An adaptor method to return the net charge of sequence at pH 7. The sequence argument must be 395 * a protein sequence consisting of only non-ambiguous characters. 396 * The net charge will be computed using the approach stated in 397 * <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a> 398 * 399 * pKa values used will be either 400 * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" 401 * OR 402 * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. 403 * 404 * @param sequence 405 * a protein sequence consisting of non-ambiguous characters only 406 * @param useExpasyValues 407 * whether to use Expasy values (Default) or Innovagen values 408 * @param pHPoint 409 * the pH value to use for computation of the net charge. Default at 7. 410 * @return the net charge of sequence at given pHPoint 411 */ 412 public static final double getNetCharge(String sequence, boolean useExpasyValues, double pHPoint){ 413 sequence = Utils.checkSequence(sequence); 414 ProteinSequence pSequence = null; 415 try { 416 pSequence = new ProteinSequence(sequence); 417 } catch (CompoundNotFoundException e) { 418 // the sequence was checked with Utils.checkSequence, this shouldn't happen 419 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 420 } 421 IPeptideProperties pp = new PeptidePropertiesImpl(); 422 return pp.getNetCharge(pSequence, useExpasyValues, pHPoint); 423 } 424 425 public static final double getNetCharge(String sequence, boolean useExpasyValues) { 426 return getNetCharge(sequence, useExpasyValues, 7.0); 427 } 428 429 public static final double getNetCharge(String sequence){ 430 return getNetCharge(sequence, true); 431 } 432 433 /** 434 * An adaptor method to return the composition of specified amino acid in the sequence. The 435 * sequence argument must be a protein sequence consisting of only 436 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 437 * character. 438 * The composition of an amino acid is the total number of its occurrence, 439 * divided by the total length of the sequence. 440 * 441 * @param sequence 442 * a protein sequence consisting of non-ambiguous characters only 443 * @param aminoAcidCode 444 * the code of the amino acid to compute 445 * @return the composition of specified amino acid in the sequence 446 * @see SingleLetterAACode 447 */ 448 public static final double getEnrichment(String sequence, SingleLetterAACode aminoAcidCode) { 449 return getEnrichment(sequence, aminoAcidCode.toString()); 450 } 451 452 /** 453 * An adaptor method to return the composition of specified amino acid in the sequence. The 454 * sequence argument must be a protein sequence consisting of only 455 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 456 * character. 457 * The composition of an amino acid is the total number of its occurrence, 458 * divided by the total length of the sequence. 459 * 460 * @param sequence 461 * a protein sequence consisting of non-ambiguous characters only 462 * @param aminoAcidCode 463 * the code of the amino acid to compute 464 * @return the composition of specified amino acid in the sequence 465 */ 466 public static final double getEnrichment(String sequence, char aminoAcidCode){ 467 return getEnrichment(sequence, aminoAcidCode); 468 } 469 470 /** 471 * An adaptor method to return the composition of specified amino acid in the sequence. The 472 * sequence argument must be a protein sequence consisting of only 473 * non-ambiguous characters. The aminoAcidCode must be a non-ambiguous 474 * character. 475 * The composition of an amino acid is the total number of its occurrence, 476 * divided by the total length of the sequence. 477 * 478 * @param sequence 479 * a protein sequence consisting of non-ambiguous characters only 480 * @param aminoAcidCode 481 * the code of the amino acid to compute 482 * @return the composition of specified amino acid in the sequence 483 */ 484 public static final double getEnrichment(String sequence, String aminoAcidCode){ 485 sequence = Utils.checkSequence(sequence); 486 ProteinSequence pSequence = null; 487 try { 488 pSequence = new ProteinSequence(sequence); 489 } catch (CompoundNotFoundException e) { 490 // the sequence was checked with Utils.checkSequence, this shouldn't happen 491 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 492 } 493 IPeptideProperties pp = new PeptidePropertiesImpl(); 494 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 495 return pp.getEnrichment(pSequence, aaSet.getCompoundForString(aminoAcidCode)); 496 } 497 498 /** 499 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 500 * The sequence argument must be a protein sequence consisting of only 501 * non-ambiguous characters. 502 * The composition of an amino acid is the total number of its occurrence, 503 * divided by the total length of the sequence. 504 * 505 * @param sequence 506 * a protein sequence consisting of non-ambiguous characters only 507 * @return the composition of the 20 standard amino acid in the sequence 508 * @see AminoAcidCompound 509 */ 510 public static final Map<AminoAcidCompound, Double> getAAComposition(String sequence) { 511 sequence = Utils.checkSequence(sequence); 512 ProteinSequence pSequence = null; 513 try { 514 pSequence = new ProteinSequence(sequence); 515 } catch (CompoundNotFoundException e) { 516 // the sequence was checked with Utils.checkSequence, this shouldn't happen 517 logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage()); 518 } 519 IPeptideProperties pp = new PeptidePropertiesImpl(); 520 return pp.getAAComposition(pSequence); 521 } 522 523 /** 524 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 525 * The sequence argument must be a protein sequence consisting of only 526 * non-ambiguous characters. 527 * The composition of an amino acid is the total number of its occurrence, 528 * divided by the total length of the sequence. 529 * 530 * @param sequence 531 * a protein sequence consisting of non-ambiguous characters only 532 * @return the composition of the 20 standard amino acid in the sequence 533 */ 534 public static final Map<String, Double> getAACompositionString(String sequence){ 535 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 536 Map<String, Double> aaString2Composition = new HashMap<String, Double>(); 537 aaString2Composition = aa2Composition.keySet().stream() .collect(Collectors.toMap(aaCompound -> aaCompound.getShortName(),aaCompound ->aa2Composition.get(aaCompound))); 538 return aaString2Composition; 539 } 540 541 /** 542 * An adaptor method to return the composition of the 20 standard amino acid in the sequence. 543 * The sequence argument must be a protein sequence consisting of only 544 * non-ambiguous characters. 545 * The composition of an amino acid is the total number of its occurrence, 546 * divided by the total length of the sequence. 547 * 548 * @param sequence 549 * a protein sequence consisting of non-ambiguous characters only 550 * @return the composition of the 20 standard amino acid in the sequence 551 */ 552 public static final Map<Character, Double> getAACompositionChar(String sequence){ 553 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 554 Map<Character, Double> aaChar2Composition = new HashMap<Character, Double>(); 555 for(AminoAcidCompound aaCompound:aa2Composition.keySet()){ 556 aaChar2Composition.put(aaCompound.getShortName().charAt(0), aa2Composition.get(aaCompound)); 557 } 558 return aaChar2Composition; 559 } 560 561 /** 562 * Returns the array of charges of each amino acid in a protein. At pH=7, two are negative charged: aspartic acid (Asp, D) and glutamic acid (Glu, E) (acidic side chains), 563 * and three are positive charged: lysine (Lys, K), arginine (Arg, R) and histidine (His, H) (basic side chains). 564 * 565 * @param sequence 566 * a protein sequence consisting of non-ambiguous characters only 567 * @return the array of charges of amino acids in the protein (1 if amino acid is positively charged, -1 if negatively charged, 0 if not charged) 568 */ 569 public static final int[] getChargesOfAminoAcids(String sequence) { 570 int[] charges = new int[sequence.length()]; 571 for ( int i=0; i < sequence.length(); i++ ) { 572 char aa = sequence.toCharArray()[i]; 573 charges[i] = AminoAcidProperties.getChargeOfAminoAcid(aa); 574 } 575 return charges; 576 } 577 578 /** 579 * Returns the array of polarity values of each amino acid in a protein sequence. 580 * 581 * @param sequence 582 * a protein sequence consisting of non-ambiguous characters only 583 * @return the array of polarity of amino acids in the protein (1 if amino acid is polar, 0 if not) 584 */ 585 public static final int[] getPolarityOfAminoAcids(String sequence) { 586 int[] polarity = new int[sequence.length()]; 587 for ( int i=0; i < sequence.length(); i++ ) { 588 char aa = sequence.toCharArray()[i]; 589 polarity[i] = AminoAcidProperties.getPolarityOfAminoAcid(aa); 590 } 591 return polarity; 592 } 593}