001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.aaproperties; 022 023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable; 024import org.biojava.nbio.aaproperties.xml.ElementTable; 025import org.biojava.nbio.aaproperties.xml.MyValidationEventHandler; 026import org.biojava.nbio.core.sequence.ProteinSequence; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import jakarta.xml.bind.JAXBContext; 033import jakarta.xml.bind.JAXBException; 034import jakarta.xml.bind.Unmarshaller; 035import java.io.File; 036import java.io.FileInputStream; 037import java.io.FileNotFoundException; 038import java.util.HashMap; 039import java.util.Map; 040 041/** 042 * This class contains the actual implementation of IPeptideProperties and is wrapped around by PeptideProperties for ease of use. 043 * 044 * @author kohchuanhock 045 * @version 2011.08.22 046 * @since 3.0.2 047 * @see IPeptideProperties 048 * @see PeptideProperties 049 */ 050public class PeptidePropertiesImpl implements IPeptideProperties{ 051 052 private final static Logger logger = LoggerFactory.getLogger(PeptidePropertiesImpl.class); 053 054 /** 055 * @return the molecular weight of water 056 */ 057 private double getWaterMoleculeWeight(){ 058 final double hydrogenMW = 1.0079; 059 final double hydroxideMW = 17.0073; 060 //H 1.0079 OH 17.0073 061 return hydrogenMW + hydroxideMW; 062 } 063 064 private char[] getSequence(String sequence, boolean ignoreCase){ 065 if(ignoreCase){ 066 return sequence.toUpperCase().toCharArray(); 067 }else{ 068 return sequence.toCharArray(); 069 } 070 } 071 072 @Override 073 public double getMolecularWeight(ProteinSequence sequence) { 074 double value = 0.0; 075 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 076 char[] seq = getSequence(sequence.toString(), true);//ignore case 077 for(char aa:seq){ 078 AminoAcidCompound c = aaSet.getCompoundForString(String.valueOf(aa)); 079 if(Constraints.aa2MolecularWeight.containsKey(c)){ 080 value += Constraints.aa2MolecularWeight.get(c); 081 } 082 } 083 if(value == 0) 084 return value; 085 else 086 return value + getWaterMoleculeWeight(); 087 } 088 089 @Override 090 public double getMolecularWeight(ProteinSequence sequence, File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException { 091 File elementMassFile = new File("./src/main/resources/ElementMass.xml"); 092 if(!elementMassFile.exists()){ 093 throw new FileNotFoundException("Cannot locate ElementMass.xml. " + 094 "Please use getMolecularWeight(ProteinSequence, File, File) to specify ElementMass.xml location."); 095 } 096 return getMolecularWeightBasedOnXML(sequence, obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile)); 097 } 098 099 @Override 100 public double getMolecularWeight(ProteinSequence sequence, File elementMassFile, File aminoAcidCompositionFile) 101 throws JAXBException, FileNotFoundException{ 102 return getMolecularWeightBasedOnXML(sequence, obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile)); 103 } 104 105 @Override 106 public double getMolecularWeightBasedOnXML(ProteinSequence sequence, AminoAcidCompositionTable aminoAcidCompositionTable){ 107 double value = 0.0; 108 char[] seq = sequence.toString().toCharArray(); 109 for(char aa:seq){ 110 Double weight = aminoAcidCompositionTable.getMolecularWeight(aa); 111 if(weight != null){ 112 value += weight; 113 } 114 } 115 if(value == 0.0) 116 return value; 117 else 118 return value + getWaterMoleculeWeight(); 119 } 120 121 @Override 122 public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile) 123 throws JAXBException, FileNotFoundException{ 124 File elementMassFile = new File("./src/main/resources/ElementMass.xml"); 125 if(!elementMassFile.exists()){ 126 throw new FileNotFoundException("Cannot locate ElementMass.xml. " + 127 "Please use getMolecularWeight(ProteinSequence, File, File) to specify ElementMass.xml location."); 128 } 129 return obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile); 130 } 131 132 @Override 133 public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile) 134 throws JAXBException, FileNotFoundException{ 135 //Parse elementMassFile 136 ElementTable iTable = new ElementTable(); 137 // Get a JAXB Context for the object we created above 138 JAXBContext jc = JAXBContext.newInstance(iTable.getClass()); 139 Unmarshaller u = jc.createUnmarshaller(); 140 u.setEventHandler(new MyValidationEventHandler()); 141 iTable = (ElementTable)u.unmarshal(new FileInputStream(elementMassFile)); 142 iTable.populateMaps(); 143 144 //Parse aminoAcidCompositionFile 145 AminoAcidCompositionTable aTable = new AminoAcidCompositionTable(); 146 // Get a JAXB Context for the object we created above 147 JAXBContext jc2 = JAXBContext.newInstance(aTable.getClass()); 148 Unmarshaller u2 = jc2.createUnmarshaller(); 149 u2.setEventHandler(new MyValidationEventHandler()); 150 aTable = (AminoAcidCompositionTable)u2.unmarshal(new FileInputStream(aminoAcidCompositionFile)); 151 aTable.computeMolecularWeight(iTable); 152 return aTable; 153 } 154 155 @Override 156 public double getExtinctionCoefficient(ProteinSequence sequence, boolean assumeCysReduced) { 157 //Tyr => Y 158 //Trp => W 159 //Cys => C 160 //E(Prot) = Numb(Tyr)*Ext(Tyr) + Numb(Trp)*Ext(Trp) + Numb(Cystine)*Ext(Cystine) 161 //where (for proteins in water measured at 280 nm): Ext(Tyr) = 1490, Ext(Trp) = 5500, Ext(Cystine) = 125; 162 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 163 Map<AminoAcidCompound, Integer> extinctAA2Count = this.getExtinctAACount(sequence); 164 165 double eProt; 166 if(!assumeCysReduced){ 167 eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) * 168 Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) + 169 extinctAA2Count.get(aaSet.getCompoundForString("W")) * 170 Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W")) + 171 extinctAA2Count.get(aaSet.getCompoundForString("C")) * 172 Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("C")); 173 }else 174 eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) * 175 Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) + 176 extinctAA2Count.get(aaSet.getCompoundForString("W")) * 177 Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W")); 178 179 return eProt; 180 } 181 182 @Override 183 public double getAbsorbance(ProteinSequence sequence, boolean assumeCysReduced){ 184 //Absorb(Prot) = E(Prot) / Molecular_weight 185 double mw = this.getMolecularWeight(sequence); 186 double eProt = this.getExtinctionCoefficient(sequence, assumeCysReduced); 187 if (mw == 0.0) { 188 logger.warn("Molecular weight is 0.0, can't divide by 0: setting absorbance to 0.0"); 189 return 0.0; 190 } 191 return eProt / mw; 192 } 193 194 private Map<AminoAcidCompound, Integer> getExtinctAACount(ProteinSequence sequence){ 195 //Cys => C, Tyr => Y, Trp => W 196 int numW = 0; 197 int smallW = 0; 198 double numC = 0; 199 double smallC = 0; 200 int numY = 0; 201 int smallY = 0; 202 for(char aa:sequence.getSequenceAsString().toCharArray()){ 203 switch(aa){ 204 case 'W': numW++; break; 205 case 'w': smallW++; break; 206 case 'C': numC += 0.5; break; 207 case 'c': smallC += 0.5; break; 208 case 'Y': numY++; break; 209 case 'y': smallY++; break; 210 } 211 } 212 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 213 Map<AminoAcidCompound, Integer> extinctAA2Count = new HashMap<>(); 214 //Ignore Case is always true 215 extinctAA2Count.put(aaSet.getCompoundForString("W"), numW + smallW); 216 extinctAA2Count.put(aaSet.getCompoundForString("C"), (int) (numC + smallC)); 217 extinctAA2Count.put(aaSet.getCompoundForString("Y"), numY + smallY); 218 return extinctAA2Count; 219 } 220 221 @Override 222 public double getInstabilityIndex(ProteinSequence sequence) { 223 double sum = 0.0; 224 String s = sequence.getSequenceAsString().toUpperCase(); 225 for(int i = 0; i < sequence.getLength() - 1; i++){ 226 String dipeptide = s.substring(i, i+2); 227 if(Constraints.diAA2Instability.containsKey(dipeptide)){ 228 sum += Constraints.diAA2Instability.get(dipeptide); 229 } 230 } 231 int denominator = s.length() - Utils.getNumberOfInvalidChar(s, null, true); 232 233 if (denominator==0) { 234 logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate instability index: setting instability index value to 0.0"); 235 return 0.0; 236 } 237 return sum * 10.0 / denominator; 238 } 239 240 @Override 241 public double getApliphaticIndex(ProteinSequence sequence) { 242// Aliphatic index = X(Ala) + a * X(Val) + b * ( X(Ile) + X(Leu) ) 243// where X(Ala), X(Val), X(Ile), and X(Leu) are mole percent (100 X mole fraction) 244// of alanine, valine, isoleucine, and leucine. 245// The coefficients a and b are the relative volume of valine side chain (a = 2.9) 246// and of Leu/Ile side chains (b = 3.9) to the side chain of alanine. 247// Ala => A, Val => V, Ile => I, Leu => L 248 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 249 Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); 250 final double a = 2.9; 251 final double b = 3.9; 252 double xAla = aa2Composition.get(aaSet.getCompoundForString("A")); 253 double xVal = aa2Composition.get(aaSet.getCompoundForString("V")); 254 double xIle = aa2Composition.get(aaSet.getCompoundForString("I")); 255 double xLeu = aa2Composition.get(aaSet.getCompoundForString("L")); 256 return (xAla + (a * xVal) + (b * (xIle + xLeu))) * 100; 257 } 258 259 @Override 260 public double getAvgHydropathy(ProteinSequence sequence) { 261 int validLength = 0; 262 double total = 0.0; 263 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 264 char[] seq = this.getSequence(sequence.toString(), true); 265 for(char aa:seq){ 266 AminoAcidCompound c = aaSet.getCompoundForString(String.valueOf(aa)); 267 if(Constraints.aa2Hydrophathicity.containsKey(c)){ 268 total += Constraints.aa2Hydrophathicity.get(c); 269 validLength++; 270 } 271 } 272 if (validLength==0) { 273 logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate average hydropathy: setting average hydropathy to 0"); 274 return 0.0; 275 } 276 277 return total / validLength; 278 } 279 280 @Override 281 public double getIsoelectricPoint(ProteinSequence sequence, boolean useExpasyValues) { 282 if(useExpasyValues){ 283 return this.getIsoelectricPointExpasy(sequence.toString().toUpperCase()); 284 }else{ 285 return this.getIsoelectricPointInnovagen(sequence); 286 } 287 } 288 289 private double getIsoelectricPointInnovagen(ProteinSequence sequence){ 290 double currentPH = 7.0; 291 double changeSize = 7.0; 292 String sequenceString = sequence.toString(); 293 char nTerminalChar = sequenceString.charAt(0); 294 char cTerminalChar = sequenceString.charAt(sequenceString.length() - 1); 295 296 Map<AminoAcidCompound, Integer> chargedAA2Count = this.getChargedAACount(sequence); 297 double margin; 298 final double difference = 0.0001; 299 300 while(true){ 301 margin = this.getNetChargeInnovagen(chargedAA2Count, currentPH, nTerminalChar, cTerminalChar); 302 //Within allowed difference 303 if(margin <= difference && margin >= -difference) break; 304 changeSize /= 2.0; 305 if(margin > 0){ 306 currentPH += changeSize; 307 }else{ 308 currentPH -= changeSize; 309 } 310 } 311 return currentPH; 312 } 313 314 /* 315 * Pseudo code obtained from email correspondance with ExPASy Helpdesk, Gregoire Rossier 316 */ 317 // 318 // Table of pk values : 319 // Note: the current algorithm does not use the last two columns. 320 // Each row corresponds to an amino acid starting with Ala. J, O and U are 321 // inexistant, but here only in order to have the complete alphabet. 322 // 323 // Ct Nt Sm Sc Sn 324 // 325 private final double[][] cPk = { 326 {3.55, 7.59, 0.0}, // A 327 {3.55, 7.50, 0.0}, // B 328 {3.55, 7.50, 9.00}, // C 329// {4.55, 7.50, 4.05}, // D 330// {4.75, 7.70, 4.45}, // E 331 {3.55, 7.50, 4.05}, // D 332 {3.55, 7.70, 4.45}, // E 333 {3.55, 7.50, 0}, // F 334 {3.55, 7.50, 0}, // G 335 {3.55, 7.50, 5.98}, // H 336 {3.55, 7.50, 0.0}, // I 337 {0.0, 0.0, 0.0}, // J 338 {3.55, 7.50, 10.00}, // K 339 {3.55, 7.50, 0.0}, // L 340 {3.55, 7.00, 0.0},// M 341 {3.55, 7.50, 0.0},// N 342 {0.00, 0.00, 0.0},// O 343 {3.55, 8.36, 0.0},// P 344 {3.55, 7.50, 0.0}, // Q 345 {3.55, 7.50, 12.0},// R 346 {3.55, 6.93, 0.0},// S 347 {3.55, 6.82, 0.0}, // T 348 {0.00, 0.00, 0.0}, // U 349 {3.55, 7.44, 0.0},// V 350 {3.55, 7.50, 0.0},// W 351 {3.55, 7.50, 0.0},// X 352 {3.55, 7.50, 10.00},// Y 353 {3.55, 7.50, 0.0}}; // Z 354 355 private final double PH_MIN = 0.0; /* minimum pH value */ 356 private final double PH_MAX = 14.0; /* maximum pH value */ 357 private final double MAXLOOP = 2000.0; /* maximum number of iterations */ 358 private final double EPSI = 0.0001; /* desired precision */ 359 360 private double exp10(double pka){ 361 return Math.pow(10, pka); 362 } 363 364 private double getIsoelectricPointExpasy(String sequence){ 365 // 366 // Compute the amino-acid composition. 367 // 368 int[] comp = new int[26]; 369 for(int i = 0; i < sequence.length(); i++){ 370 int index = sequence.charAt(i) - 'A'; 371 if(index < 0 || index >= 26) continue; 372 comp[index]++; 373 } 374 // 375 // Look up N-terminal and C-terminal residue. 376 // 377 int nTermResidue = -1; 378 int index = 0; 379 while((nTermResidue < 0 || nTermResidue >= 26) && index < 25){ 380 nTermResidue = sequence.charAt(index++) - 'A'; 381 } 382 383 int cTermResidue = -1; 384 index = 1; 385 while((cTermResidue < 0 || cTermResidue >= 26) && index < 25){ 386 cTermResidue = sequence.charAt(sequence.length() - index++) - 'A'; 387 } 388 389 double phMin = PH_MIN; 390 double phMax = PH_MAX; 391 392 double phMid = 0.0; 393 double charge = 1.0; 394 for (int i = 0; i < MAXLOOP && (phMax - phMin) > EPSI; i++){ 395 phMid = phMin + (phMax - phMin) / 2.0; 396 397 charge = getNetChargeExpasy(comp, nTermResidue, cTermResidue, phMid); 398 399 if (charge > 0.0) phMin = phMid; 400 else phMax = phMid; 401 } 402 return phMid; 403 } 404 405 @Override 406 public double getIsoelectricPoint(ProteinSequence sequence){ 407 return getIsoelectricPoint(sequence, true); 408 } 409 410 @Override 411 public double getNetCharge(ProteinSequence sequence) { 412 return getNetCharge(sequence, true); 413 } 414 415 @Override 416 public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues){ 417 return getNetCharge(sequence, true, 7.0); 418 } 419 420 @Override 421 public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues, double pHPoint){ 422 if(useExpasyValues){ 423 return getNetChargeExpasy(sequence.toString().toUpperCase(), pHPoint); 424 }else{ 425 return getNetChargeInnovagen(sequence, pHPoint); 426 } 427 } 428 429 private double getNetChargeExpasy(String sequence, double pHPoint){ 430 // 431 // Compute the amino-acid composition. 432 // 433 int[] comp = new int[26]; 434 for(int i = 0; i < sequence.length(); i++){ 435 int index = sequence.charAt(i) - 'A'; 436 if(index < 0 || index >= 26) continue; 437 comp[index]++; 438 } 439 // 440 // Look up N-terminal and C-terminal residue. 441 // 442 int nTermResidue = sequence.charAt(0) - 'A'; 443 int cTermResidue = sequence.charAt(sequence.length() - 1) - 'A'; 444 return getNetChargeExpasy(comp, nTermResidue, cTermResidue, pHPoint); 445 } 446 447 private double getNetChargeExpasy(int[] comp, int nTermResidue, int cTermResidue, double ph){ 448 double cter = 0.0; 449 if(cTermResidue >= 0 && cTermResidue < 26) cter = exp10(-cPk[cTermResidue][0]) / (exp10(-cPk[cTermResidue][0]) + exp10(-ph)); 450 double nter = 0.0; 451 if(nTermResidue >= 0 && nTermResidue < 26) nter = exp10(-ph) / (exp10(-cPk[nTermResidue][1]) + exp10(-ph)); 452 453 double carg = comp['R' - 'A'] * exp10(-ph) / (exp10(-cPk['R' - 'A'][2]) + exp10(-ph)); 454 double chis = comp['H' - 'A'] * exp10(-ph) / (exp10(-cPk['H' - 'A'][2]) + exp10(-ph)); 455 double clys = comp['K' - 'A'] * exp10(-ph) / (exp10(-cPk['K' - 'A'][2]) + exp10(-ph)); 456 457 double casp = comp['D' - 'A'] * exp10(-cPk['D' - 'A'][2]) / (exp10(-cPk['D' - 'A'][2]) + exp10(-ph)); 458 double cglu = comp['E' - 'A'] * exp10(-cPk['E' - 'A'][2]) / (exp10(-cPk['E' - 'A'][2]) + exp10(-ph)); 459 460 double ccys = comp['C' - 'A'] * exp10(-cPk['C' - 'A'][2]) / (exp10(-cPk['C' - 'A'][2]) + exp10(-ph)); 461 double ctyr = comp['Y' - 'A'] * exp10(-cPk['Y' - 'A'][2]) / (exp10(-cPk['Y' - 'A'][2]) + exp10(-ph)); 462 463 return (carg + clys + chis + nter) - (casp + cglu + ctyr + ccys + cter); 464 } 465 466 private double getNetChargeInnovagen(ProteinSequence sequence, double pHPoint) { 467 Map<AminoAcidCompound, Integer> chargedAA2Count = this.getChargedAACount(sequence); 468 String sequenceString = sequence.getSequenceAsString(); 469 return getNetChargeInnovagen(chargedAA2Count, pHPoint, sequenceString.charAt(0), sequenceString.charAt(sequenceString.length() - 1)); 470 } 471 472 private double getNetChargeInnovagen(Map<AminoAcidCompound, Integer> chargedAA2Count, double ph, char nTerminalChar, char cTerminalChar){ 473 //Constraints.aa2PKa is aleady reinitialized in getChargedAACount hence no need to do it again 474 475 //Lys => K, Arg => R, His => H 476 //Asp => D, Glu => E, Cys => C, Tyr => Y 477 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 478 479 double nTerminalCharge = 0.0; 480 AminoAcidCompound nTermCompound = aaSet.getCompoundForString(String.valueOf(nTerminalChar)); 481 if(Constraints.aa2NTerminalPka.containsKey(nTermCompound)){ 482 nTerminalCharge = this.getPosCharge(Constraints.aa2NTerminalPka.get(nTermCompound), ph); 483 } 484 485 double cTerminalCharge = 0.0; 486 AminoAcidCompound cTermCompound = aaSet.getCompoundForString(String.valueOf(cTerminalChar)); 487 if(Constraints.aa2CTerminalPka.containsKey(cTermCompound)){ 488 cTerminalCharge = this.getNegCharge(Constraints.aa2CTerminalPka.get(cTermCompound), ph); 489 } 490 491 double kCharge = chargedAA2Count.get(aaSet.getCompoundForString("K")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("K")), ph); 492 double rCharge = chargedAA2Count.get(aaSet.getCompoundForString("R")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("R")), ph); 493 double hCharge = chargedAA2Count.get(aaSet.getCompoundForString("H")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("H")), ph); 494 double dCharge = chargedAA2Count.get(aaSet.getCompoundForString("D")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("D")), ph); 495 double eCharge = chargedAA2Count.get(aaSet.getCompoundForString("E")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("E")), ph); 496 double cCharge = chargedAA2Count.get(aaSet.getCompoundForString("C")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("C")), ph); 497 double yCharge = chargedAA2Count.get(aaSet.getCompoundForString("Y")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("Y")), ph); 498// if((kCharge + rCharge + hCharge) == 0.0 && (dCharge + eCharge + cCharge + yCharge) == 0.0){ 499// return 0.0; 500// } 501 return (nTerminalCharge + kCharge + rCharge + hCharge) - (dCharge + eCharge + cCharge + yCharge + cTerminalCharge); 502 } 503 504 private double getPosCharge(double pka, double ph){ 505 return Math.pow(10, pka) / (Math.pow(10, pka) + Math.pow(10, ph)); 506 } 507 508 private double getNegCharge(double pka, double ph){ 509 return Math.pow(10, ph) / (Math.pow(10, pka) + Math.pow(10, ph)); 510 } 511 512 private Map<AminoAcidCompound, Integer> getChargedAACount(ProteinSequence sequence){ 513 //Lys => K, Arg => R, His => H 514 //Asp => D, Glu => E, Cys => C, Tyr => Y 515 int numK = 0; 516 int numR = 0; 517 int numH = 0; 518 int numD = 0; 519 int numE = 0; 520 int numC = 0; 521 int numY = 0; 522 char[] seq = this.getSequence(sequence.getSequenceAsString(), true); 523 for(char aa:seq){ 524 switch(aa){ 525 case 'K': numK++; break; 526 case 'R': numR++; break; 527 case 'H': numH++; break; 528 case 'D': numD++; break; 529 case 'E': numE++; break; 530 case 'C': numC++; break; 531 case 'Y': numY++; break; 532 } 533 } 534 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 535 Map<AminoAcidCompound, Integer> chargedAA2Count = new HashMap<>(); 536 chargedAA2Count.put(aaSet.getCompoundForString("K"), numK); 537 chargedAA2Count.put(aaSet.getCompoundForString("R"), numR); 538 chargedAA2Count.put(aaSet.getCompoundForString("H"), numH); 539 chargedAA2Count.put(aaSet.getCompoundForString("D"), numD); 540 chargedAA2Count.put(aaSet.getCompoundForString("E"), numE); 541 chargedAA2Count.put(aaSet.getCompoundForString("C"), numC); 542 chargedAA2Count.put(aaSet.getCompoundForString("Y"), numY); 543 return chargedAA2Count; 544 } 545 546 @Override 547 public double getEnrichment(ProteinSequence sequence, AminoAcidCompound aminoAcidCode) { 548 double counter = 0.0; 549 char[] seq = this.getSequence(sequence.getSequenceAsString(), true); 550 for(char aa:seq){ 551 if(aminoAcidCode.getShortName().equals(String.valueOf(aa))){ 552 counter++; 553 } 554 } 555 return counter/sequence.getLength(); 556 } 557 558 @Override 559 public Map<AminoAcidCompound, Double> getAAComposition(ProteinSequence sequence) { 560 int validLength = 0; 561 Map<AminoAcidCompound, Double> aa2Composition = new HashMap<>(); 562 AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); 563 for(AminoAcidCompound aa:aaSet.getAllCompounds()){ 564 aa2Composition.put(aa, 0.0); 565 } 566 char[] seq = this.getSequence(sequence.toString(), true); 567 for(char aa:seq){ 568 if(PeptideProperties.standardAASet.contains(aa)){ 569 AminoAcidCompound compound = aaSet.getCompoundForString(String.valueOf(aa)); 570 aa2Composition.put(compound, aa2Composition.get(compound) + 1.0); 571 validLength++; 572 } 573 } 574 if(validLength > 0){ 575 for(AminoAcidCompound aa:aaSet.getAllCompounds()){ 576 aa2Composition.put(aa, aa2Composition.get(aa) / validLength); 577 } 578 }else{ 579 for(AminoAcidCompound aa:aaSet.getAllCompounds()){ 580 aa2Composition.put(aa, 0.0); 581 } 582 } 583 return aa2Composition; 584 } 585 586 587 @Override 588 public double getAromaticity(ProteinSequence sequence) { 589 int validLength = sequence.getSequenceAsString().length(); 590 591 if (validLength == 0) { 592 logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate aromaticity: setting aromaticity to 0"); 593 return 0.0; 594 } 595 596 //Phe - Phenylalanine 597 int totalF = 0; 598 //Tyr - Tyrosine 599 int totalY = 0; 600 //Trp - Tryptophan 601 int totalW = 0; 602 603 char[] seq = this.getSequence(sequence.toString(), true); 604 for (char aa : seq) { 605 char amino = Character.toUpperCase(aa); 606 switch (amino) { 607 case 'F': 608 totalF++; 609 break; 610 case 'Y': 611 totalY++; 612 break; 613 case 'W': 614 totalW++; 615 break; 616 } 617 } 618 619 return (totalF + totalY + totalW) / (double) (validLength); 620 } 621} 622