001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.aaproperties;
022
023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable;
024import org.biojava.nbio.aaproperties.xml.ElementTable;
025import org.biojava.nbio.aaproperties.xml.MyValidationEventHandler;
026import org.biojava.nbio.core.sequence.ProteinSequence;
027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import jakarta.xml.bind.JAXBContext;
033import jakarta.xml.bind.JAXBException;
034import jakarta.xml.bind.Unmarshaller;
035import java.io.File;
036import java.io.FileInputStream;
037import java.io.FileNotFoundException;
038import java.util.HashMap;
039import java.util.Map;
040
041/**
042 * This class contains the actual implementation of IPeptideProperties and is wrapped around by PeptideProperties for ease of use.
043 *
044 * @author kohchuanhock
045 * @version 2011.08.22
046 * @since 3.0.2
047 * @see IPeptideProperties
048 * @see PeptideProperties
049 */
050public class PeptidePropertiesImpl implements IPeptideProperties{
051
052        private final static Logger logger = LoggerFactory.getLogger(PeptidePropertiesImpl.class);
053
054        /**
055         * @return the molecular weight of water
056         */
057        private double getWaterMoleculeWeight(){
058                final double hydrogenMW = 1.0079;
059                final double hydroxideMW = 17.0073;
060                //H     1.0079  OH      17.0073
061                return hydrogenMW + hydroxideMW;
062        }
063
064        private char[] getSequence(String sequence, boolean ignoreCase){
065                if(ignoreCase){
066                        return sequence.toUpperCase().toCharArray();
067                }else{
068                        return sequence.toCharArray();
069                }
070        }
071
072        @Override
073        public double getMolecularWeight(ProteinSequence sequence) {
074                double value = 0.0;
075                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
076                char[] seq = getSequence(sequence.toString(), true);//ignore case
077                for(char aa:seq){
078                        AminoAcidCompound c = aaSet.getCompoundForString(String.valueOf(aa));
079                        if(Constraints.aa2MolecularWeight.containsKey(c)){
080                                value += Constraints.aa2MolecularWeight.get(c);
081                        }
082                }
083                if(value == 0)
084                        return value;
085                else
086                        return value + getWaterMoleculeWeight();
087        }
088
089        @Override
090        public double getMolecularWeight(ProteinSequence sequence, File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException {
091                File elementMassFile = new File("./src/main/resources/ElementMass.xml");
092                if(!elementMassFile.exists()){
093                        throw new FileNotFoundException("Cannot locate ElementMass.xml. " +
094                                        "Please use getMolecularWeight(ProteinSequence, File, File) to specify ElementMass.xml location.");
095                }
096                return getMolecularWeightBasedOnXML(sequence, obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile));
097        }
098
099        @Override
100        public double getMolecularWeight(ProteinSequence sequence, File elementMassFile, File aminoAcidCompositionFile)
101                        throws JAXBException, FileNotFoundException{
102                return getMolecularWeightBasedOnXML(sequence, obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile));
103        }
104
105        @Override
106        public double getMolecularWeightBasedOnXML(ProteinSequence sequence, AminoAcidCompositionTable aminoAcidCompositionTable){
107                double value = 0.0;
108                char[] seq = sequence.toString().toCharArray();
109                for(char aa:seq){
110                        Double weight = aminoAcidCompositionTable.getMolecularWeight(aa);
111                        if(weight != null){
112                                value += weight;
113                        }
114                }
115                if(value == 0.0)
116                        return value;
117                else
118                        return value + getWaterMoleculeWeight();
119        }
120
121        @Override
122        public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile)
123                throws JAXBException, FileNotFoundException{
124                File elementMassFile = new File("./src/main/resources/ElementMass.xml");
125                if(!elementMassFile.exists()){
126                        throw new FileNotFoundException("Cannot locate ElementMass.xml. " +
127                                        "Please use getMolecularWeight(ProteinSequence, File, File) to specify ElementMass.xml location.");
128                }
129                return obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile);
130        }
131
132        @Override
133        public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile)
134                throws JAXBException, FileNotFoundException{
135                //Parse elementMassFile
136                ElementTable iTable = new ElementTable();
137                // Get a JAXB Context for the object we created above
138                JAXBContext jc = JAXBContext.newInstance(iTable.getClass());
139                Unmarshaller u = jc.createUnmarshaller();
140                u.setEventHandler(new MyValidationEventHandler());
141                iTable = (ElementTable)u.unmarshal(new FileInputStream(elementMassFile));
142                iTable.populateMaps();
143
144                //Parse aminoAcidCompositionFile
145                AminoAcidCompositionTable aTable = new AminoAcidCompositionTable();
146                // Get a JAXB Context for the object we created above
147                JAXBContext jc2 = JAXBContext.newInstance(aTable.getClass());
148                Unmarshaller u2 = jc2.createUnmarshaller();
149                u2.setEventHandler(new MyValidationEventHandler());
150                aTable = (AminoAcidCompositionTable)u2.unmarshal(new FileInputStream(aminoAcidCompositionFile));
151                aTable.computeMolecularWeight(iTable);
152                return aTable;
153        }
154
155        @Override
156        public double getExtinctionCoefficient(ProteinSequence sequence, boolean assumeCysReduced) {
157                //Tyr => Y
158                //Trp => W
159                //Cys => C
160                //E(Prot) = Numb(Tyr)*Ext(Tyr) + Numb(Trp)*Ext(Trp) + Numb(Cystine)*Ext(Cystine)
161                //where (for proteins in water measured at 280 nm): Ext(Tyr) = 1490, Ext(Trp) = 5500, Ext(Cystine) = 125;
162                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
163                Map<AminoAcidCompound, Integer> extinctAA2Count = this.getExtinctAACount(sequence);
164
165                double eProt;
166                if(!assumeCysReduced){
167                        eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) *
168                                Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) +
169                                extinctAA2Count.get(aaSet.getCompoundForString("W")) *
170                                Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W")) +
171                                extinctAA2Count.get(aaSet.getCompoundForString("C")) *
172                                Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("C"));
173                }else
174                        eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) *
175                                Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) +
176                                extinctAA2Count.get(aaSet.getCompoundForString("W")) *
177                                Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W"));
178
179                return eProt;
180        }
181
182        @Override
183        public double getAbsorbance(ProteinSequence sequence, boolean assumeCysReduced){
184                //Absorb(Prot) = E(Prot) / Molecular_weight
185                double mw = this.getMolecularWeight(sequence);
186                double eProt = this.getExtinctionCoefficient(sequence, assumeCysReduced);
187                if (mw == 0.0) {
188                        logger.warn("Molecular weight is 0.0, can't divide by 0: setting absorbance to 0.0");
189                        return 0.0;
190                }
191                return eProt / mw;
192        }
193
194        private Map<AminoAcidCompound, Integer> getExtinctAACount(ProteinSequence sequence){
195                //Cys => C, Tyr => Y, Trp => W
196                int numW = 0;
197                int smallW = 0;
198                double numC = 0;
199                double smallC = 0;
200                int numY = 0;
201                int smallY = 0;
202                for(char aa:sequence.getSequenceAsString().toCharArray()){
203                        switch(aa){
204                        case 'W': numW++; break;
205                        case 'w': smallW++; break;
206                        case 'C': numC += 0.5; break;
207                        case 'c': smallC += 0.5; break;
208                        case 'Y': numY++; break;
209                        case 'y': smallY++; break;
210                        }
211                }
212                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
213                Map<AminoAcidCompound, Integer> extinctAA2Count = new HashMap<>();
214                //Ignore Case is always true
215                extinctAA2Count.put(aaSet.getCompoundForString("W"), numW + smallW);
216                extinctAA2Count.put(aaSet.getCompoundForString("C"), (int) (numC + smallC));
217                extinctAA2Count.put(aaSet.getCompoundForString("Y"), numY + smallY);
218                return extinctAA2Count;
219        }
220
221        @Override
222        public double getInstabilityIndex(ProteinSequence sequence) {
223                double sum = 0.0;
224                String s = sequence.getSequenceAsString().toUpperCase();
225                for(int i = 0; i < sequence.getLength() - 1; i++){
226                        String dipeptide = s.substring(i, i+2);
227                        if(Constraints.diAA2Instability.containsKey(dipeptide)){
228                                sum += Constraints.diAA2Instability.get(dipeptide);
229                        }
230                }
231                int denominator = s.length() - Utils.getNumberOfInvalidChar(s, null, true);
232
233                if (denominator==0) {
234                        logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate instability index: setting instability index value to 0.0");
235                        return 0.0;
236                }
237                return sum * 10.0 / denominator;
238        }
239
240        @Override
241        public double getApliphaticIndex(ProteinSequence sequence) {
242//              Aliphatic index = X(Ala) + a * X(Val) + b * ( X(Ile) + X(Leu) )
243//              where X(Ala), X(Val), X(Ile), and X(Leu) are mole percent (100 X mole fraction)
244//              of alanine, valine, isoleucine, and leucine.
245//              The coefficients a and b are the relative volume of valine side chain (a = 2.9)
246//              and of Leu/Ile side chains (b = 3.9) to the side chain of alanine.
247//              Ala => A, Val => V, Ile => I, Leu => L
248                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
249                Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence);
250                final double a = 2.9;
251                final double b = 3.9;
252                double xAla = aa2Composition.get(aaSet.getCompoundForString("A"));
253                double xVal = aa2Composition.get(aaSet.getCompoundForString("V"));
254                double xIle = aa2Composition.get(aaSet.getCompoundForString("I"));
255                double xLeu = aa2Composition.get(aaSet.getCompoundForString("L"));
256                return (xAla + (a * xVal) + (b * (xIle + xLeu))) * 100;
257        }
258
259        @Override
260        public double getAvgHydropathy(ProteinSequence sequence) {
261                int validLength = 0;
262                double total = 0.0;
263                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
264                char[] seq = this.getSequence(sequence.toString(), true);
265                for(char aa:seq){
266                        AminoAcidCompound c = aaSet.getCompoundForString(String.valueOf(aa));
267                        if(Constraints.aa2Hydrophathicity.containsKey(c)){
268                                total += Constraints.aa2Hydrophathicity.get(c);
269                                validLength++;
270                        }
271                }
272                if (validLength==0) {
273                        logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate average hydropathy: setting average hydropathy to 0");
274                        return 0.0;
275                }
276
277                return total / validLength;
278        }
279
280        @Override
281        public double getIsoelectricPoint(ProteinSequence sequence, boolean useExpasyValues) {
282                if(useExpasyValues){
283                        return this.getIsoelectricPointExpasy(sequence.toString().toUpperCase());
284                }else{
285                        return this.getIsoelectricPointInnovagen(sequence);
286                }
287        }
288
289        private double getIsoelectricPointInnovagen(ProteinSequence sequence){
290                double currentPH = 7.0;
291                double changeSize = 7.0;
292                String sequenceString = sequence.toString();
293                char nTerminalChar = sequenceString.charAt(0);
294                char cTerminalChar = sequenceString.charAt(sequenceString.length() - 1);
295
296                Map<AminoAcidCompound, Integer> chargedAA2Count = this.getChargedAACount(sequence);
297                double margin;
298                final double difference = 0.0001;
299
300                while(true){
301                        margin = this.getNetChargeInnovagen(chargedAA2Count, currentPH, nTerminalChar, cTerminalChar);
302                        //Within allowed difference
303                        if(margin <= difference && margin >= -difference) break;
304                        changeSize /= 2.0;
305                        if(margin > 0){
306                                currentPH += changeSize;
307                        }else{
308                                currentPH -= changeSize;
309                        }
310                }
311                return currentPH;
312        }
313
314        /*
315         *  Pseudo code obtained from email correspondance with ExPASy Helpdesk, Gregoire Rossier
316         */
317        //
318        // Table of pk values :
319        // Note: the current algorithm does not use the last two columns.
320        // Each row corresponds to an amino acid starting with Ala. J, O and U are
321        // inexistant, but here only in order to have the complete alphabet.
322        //
323        // Ct Nt Sm Sc Sn
324        //
325        private final double[][] cPk = {
326                        {3.55, 7.59, 0.0},  // A
327                        {3.55, 7.50, 0.0},  // B
328                        {3.55, 7.50, 9.00}, // C
329//                      {4.55, 7.50, 4.05}, // D
330//                      {4.75, 7.70, 4.45}, // E
331                        {3.55, 7.50, 4.05}, // D
332                        {3.55, 7.70, 4.45}, // E
333                        {3.55, 7.50, 0}, // F
334                        {3.55, 7.50, 0}, // G
335                        {3.55, 7.50, 5.98}, // H
336                        {3.55, 7.50, 0.0}, // I
337                        {0.0, 0.0, 0.0}, // J
338                        {3.55, 7.50, 10.00}, // K
339                        {3.55, 7.50, 0.0}, // L
340                        {3.55, 7.00, 0.0},// M
341                        {3.55, 7.50, 0.0},// N
342                        {0.00, 0.00, 0.0},// O
343                        {3.55, 8.36, 0.0},// P
344                        {3.55, 7.50, 0.0}, // Q
345                        {3.55, 7.50, 12.0},// R
346                        {3.55, 6.93, 0.0},// S
347                        {3.55, 6.82, 0.0}, // T
348                        {0.00, 0.00, 0.0}, // U
349                        {3.55, 7.44, 0.0},// V
350                        {3.55, 7.50, 0.0},// W
351                        {3.55, 7.50, 0.0},// X
352                        {3.55, 7.50, 10.00},// Y
353                        {3.55, 7.50, 0.0}}; // Z
354
355        private final double PH_MIN = 0.0; /* minimum pH value */
356        private final double PH_MAX = 14.0; /* maximum pH value */
357        private final double MAXLOOP = 2000.0; /* maximum number of iterations */
358        private final double EPSI = 0.0001; /* desired precision */
359
360        private double exp10(double pka){
361                return Math.pow(10, pka);
362        }
363
364        private double getIsoelectricPointExpasy(String sequence){
365                //
366                // Compute the amino-acid composition.
367                //
368                int[] comp = new int[26];
369                for(int i = 0; i < sequence.length(); i++){
370                        int index = sequence.charAt(i) - 'A';
371                        if(index < 0 || index >= 26) continue;
372                        comp[index]++;
373                }
374                //
375                // Look up N-terminal and C-terminal residue.
376                //
377                int nTermResidue = -1;
378                int index = 0;
379                while((nTermResidue < 0 || nTermResidue >= 26) && index < 25){
380                        nTermResidue = sequence.charAt(index++) - 'A';
381                }
382
383                int cTermResidue = -1;
384                index = 1;
385                while((cTermResidue < 0 || cTermResidue >= 26) && index < 25){
386                        cTermResidue = sequence.charAt(sequence.length() - index++) - 'A';
387                }
388
389                double phMin = PH_MIN;
390                double phMax = PH_MAX;
391
392                double phMid = 0.0;
393                double charge = 1.0;
394                for (int i = 0; i < MAXLOOP && (phMax - phMin) > EPSI; i++){
395                        phMid = phMin + (phMax - phMin) / 2.0;
396
397                        charge = getNetChargeExpasy(comp, nTermResidue, cTermResidue, phMid);
398
399                        if (charge > 0.0) phMin = phMid;
400                        else phMax = phMid;
401                }
402                return phMid;
403        }
404
405        @Override
406        public double getIsoelectricPoint(ProteinSequence sequence){
407                return getIsoelectricPoint(sequence, true);
408        }
409
410        @Override
411        public double getNetCharge(ProteinSequence sequence) {
412                return getNetCharge(sequence, true);
413        }
414
415        @Override
416        public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues){
417                return getNetCharge(sequence, true, 7.0);
418        }
419
420        @Override
421        public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues, double pHPoint){
422                if(useExpasyValues){
423                        return getNetChargeExpasy(sequence.toString().toUpperCase(), pHPoint);
424                }else{
425                        return getNetChargeInnovagen(sequence, pHPoint);
426                }
427        }
428
429        private double getNetChargeExpasy(String sequence, double pHPoint){
430                //
431                // Compute the amino-acid composition.
432                //
433                int[] comp = new int[26];
434                for(int i = 0; i < sequence.length(); i++){
435                        int index = sequence.charAt(i) - 'A';
436                        if(index < 0 || index >= 26) continue;
437                        comp[index]++;
438                }
439                //
440                // Look up N-terminal and C-terminal residue.
441                //
442                int nTermResidue = sequence.charAt(0) - 'A';
443                int cTermResidue = sequence.charAt(sequence.length() - 1) - 'A';
444                return getNetChargeExpasy(comp, nTermResidue, cTermResidue, pHPoint);
445        }
446
447        private double getNetChargeExpasy(int[] comp, int nTermResidue, int cTermResidue, double ph){
448                double cter = 0.0;
449                if(cTermResidue >= 0 && cTermResidue < 26) cter = exp10(-cPk[cTermResidue][0]) / (exp10(-cPk[cTermResidue][0]) + exp10(-ph));
450                double nter = 0.0;
451                if(nTermResidue >= 0 && nTermResidue < 26) nter = exp10(-ph) / (exp10(-cPk[nTermResidue][1]) + exp10(-ph));
452
453                double carg = comp['R' - 'A'] * exp10(-ph) / (exp10(-cPk['R' - 'A'][2]) + exp10(-ph));
454                double chis = comp['H' - 'A'] * exp10(-ph) / (exp10(-cPk['H' - 'A'][2]) + exp10(-ph));
455                double clys = comp['K' - 'A'] * exp10(-ph) / (exp10(-cPk['K' - 'A'][2]) + exp10(-ph));
456
457                double casp = comp['D' - 'A'] * exp10(-cPk['D' - 'A'][2]) / (exp10(-cPk['D' - 'A'][2]) + exp10(-ph));
458                double cglu = comp['E' - 'A'] * exp10(-cPk['E' - 'A'][2]) / (exp10(-cPk['E' - 'A'][2]) + exp10(-ph));
459
460                double ccys = comp['C' - 'A'] * exp10(-cPk['C' - 'A'][2]) / (exp10(-cPk['C' - 'A'][2]) + exp10(-ph));
461                double ctyr = comp['Y' - 'A'] * exp10(-cPk['Y' - 'A'][2]) / (exp10(-cPk['Y' - 'A'][2]) + exp10(-ph));
462
463                return (carg + clys + chis + nter) - (casp + cglu + ctyr + ccys + cter);
464        }
465
466        private double getNetChargeInnovagen(ProteinSequence sequence, double pHPoint) {
467                Map<AminoAcidCompound, Integer> chargedAA2Count = this.getChargedAACount(sequence);
468                String sequenceString = sequence.getSequenceAsString();
469                return getNetChargeInnovagen(chargedAA2Count, pHPoint, sequenceString.charAt(0), sequenceString.charAt(sequenceString.length() - 1));
470        }
471
472        private double getNetChargeInnovagen(Map<AminoAcidCompound, Integer> chargedAA2Count, double ph, char nTerminalChar, char cTerminalChar){
473                //Constraints.aa2PKa is aleady reinitialized in getChargedAACount hence no need to do it again
474
475                //Lys => K, Arg => R, His => H
476                //Asp => D, Glu => E, Cys => C, Tyr => Y
477                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
478
479                double nTerminalCharge = 0.0;
480                AminoAcidCompound nTermCompound = aaSet.getCompoundForString(String.valueOf(nTerminalChar));
481                if(Constraints.aa2NTerminalPka.containsKey(nTermCompound)){
482                        nTerminalCharge = this.getPosCharge(Constraints.aa2NTerminalPka.get(nTermCompound), ph);
483                }
484
485                double cTerminalCharge = 0.0;
486                AminoAcidCompound cTermCompound = aaSet.getCompoundForString(String.valueOf(cTerminalChar));
487                if(Constraints.aa2CTerminalPka.containsKey(cTermCompound)){
488                        cTerminalCharge = this.getNegCharge(Constraints.aa2CTerminalPka.get(cTermCompound), ph);
489                }
490
491                double kCharge = chargedAA2Count.get(aaSet.getCompoundForString("K")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("K")), ph);
492                double rCharge = chargedAA2Count.get(aaSet.getCompoundForString("R")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("R")), ph);
493                double hCharge = chargedAA2Count.get(aaSet.getCompoundForString("H")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("H")), ph);
494                double dCharge = chargedAA2Count.get(aaSet.getCompoundForString("D")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("D")), ph);
495                double eCharge = chargedAA2Count.get(aaSet.getCompoundForString("E")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("E")), ph);
496                double cCharge = chargedAA2Count.get(aaSet.getCompoundForString("C")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("C")), ph);
497                double yCharge = chargedAA2Count.get(aaSet.getCompoundForString("Y")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("Y")), ph);
498//              if((kCharge + rCharge + hCharge) == 0.0 && (dCharge + eCharge + cCharge + yCharge) == 0.0){
499//                      return 0.0;
500//              }
501                return (nTerminalCharge + kCharge + rCharge + hCharge) - (dCharge + eCharge + cCharge + yCharge + cTerminalCharge);
502        }
503
504        private double getPosCharge(double pka, double ph){
505                return Math.pow(10, pka) / (Math.pow(10, pka) + Math.pow(10, ph));
506        }
507
508        private double getNegCharge(double pka, double ph){
509                return Math.pow(10, ph) / (Math.pow(10, pka) + Math.pow(10, ph));
510        }
511
512        private Map<AminoAcidCompound, Integer> getChargedAACount(ProteinSequence sequence){
513                //Lys => K, Arg => R, His => H
514                //Asp => D, Glu => E, Cys => C, Tyr => Y
515                int numK = 0;
516                int numR = 0;
517                int numH = 0;
518                int numD = 0;
519                int numE = 0;
520                int numC = 0;
521                int numY = 0;
522                char[] seq = this.getSequence(sequence.getSequenceAsString(), true);
523                for(char aa:seq){
524                        switch(aa){
525                        case 'K': numK++; break;
526                        case 'R': numR++; break;
527                        case 'H': numH++; break;
528                        case 'D': numD++; break;
529                        case 'E': numE++; break;
530                        case 'C': numC++; break;
531                        case 'Y': numY++; break;
532                        }
533                }
534                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
535                Map<AminoAcidCompound, Integer> chargedAA2Count = new HashMap<>();
536                chargedAA2Count.put(aaSet.getCompoundForString("K"), numK);
537                chargedAA2Count.put(aaSet.getCompoundForString("R"), numR);
538                chargedAA2Count.put(aaSet.getCompoundForString("H"), numH);
539                chargedAA2Count.put(aaSet.getCompoundForString("D"), numD);
540                chargedAA2Count.put(aaSet.getCompoundForString("E"), numE);
541                chargedAA2Count.put(aaSet.getCompoundForString("C"), numC);
542                chargedAA2Count.put(aaSet.getCompoundForString("Y"), numY);
543                return chargedAA2Count;
544        }
545
546        @Override
547        public double getEnrichment(ProteinSequence sequence, AminoAcidCompound aminoAcidCode) {
548                double counter = 0.0;
549                char[] seq = this.getSequence(sequence.getSequenceAsString(), true);
550                for(char aa:seq){
551                        if(aminoAcidCode.getShortName().equals(String.valueOf(aa))){
552                                counter++;
553                        }
554                }
555                return counter/sequence.getLength();
556        }
557
558        @Override
559        public Map<AminoAcidCompound, Double> getAAComposition(ProteinSequence sequence) {
560                int validLength = 0;
561                Map<AminoAcidCompound, Double> aa2Composition = new HashMap<>();
562                AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
563                for(AminoAcidCompound aa:aaSet.getAllCompounds()){
564                        aa2Composition.put(aa, 0.0);
565                }
566                char[] seq = this.getSequence(sequence.toString(), true);
567                for(char aa:seq){
568                        if(PeptideProperties.standardAASet.contains(aa)){
569                                AminoAcidCompound compound = aaSet.getCompoundForString(String.valueOf(aa));
570                                aa2Composition.put(compound, aa2Composition.get(compound) + 1.0);
571                                validLength++;
572                        }
573                }
574                if(validLength > 0){
575                        for(AminoAcidCompound aa:aaSet.getAllCompounds()){
576                                aa2Composition.put(aa, aa2Composition.get(aa) / validLength);
577                        }
578                }else{
579                        for(AminoAcidCompound aa:aaSet.getAllCompounds()){
580                                aa2Composition.put(aa, 0.0);
581                        }
582                }
583                return aa2Composition;
584        }
585
586
587        @Override
588        public double getAromaticity(ProteinSequence sequence) {
589                int validLength = sequence.getSequenceAsString().length();
590
591                if (validLength == 0) {
592                        logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate aromaticity: setting aromaticity to 0");
593                        return 0.0;
594                }
595
596                //Phe - Phenylalanine
597                int totalF = 0;
598                //Tyr - Tyrosine
599                int totalY = 0;
600                //Trp - Tryptophan
601                int totalW = 0;
602
603                char[] seq = this.getSequence(sequence.toString(), true);
604                for (char aa : seq) {
605                        char amino = Character.toUpperCase(aa);
606                        switch (amino) {
607                                case 'F':
608                                        totalF++;
609                                        break;
610                                case 'Y':
611                                        totalY++;
612                                        break;
613                                case 'W':
614                                        totalW++;
615                                        break;
616                        }
617                }
618
619                return (totalF + totalY + totalW) / (double) (validLength);
620        }
621}
622