001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.aaproperties.profeat;
022
023import org.biojava.nbio.core.sequence.ProteinSequence;
024
025import java.util.Map;
026
027public interface IProfeatProperties {
028        /**
029         * Based on Table 2 of http://nar.oxfordjournals.org/content/34/suppl_2/W32.full.pdf<br/>
030         * An interface class to generate the properties of a protein sequence based on its converted attributes.<br/>
031         * The seven different attributes are<p/>
032         * Hydrophobicity (Polar, Neutral, Hydrophobicity)<br/>
033         * Normalized van der Waals volume (Range 0 - 2.78, 2.95 - 4.0, 4.03 - 8.08)<br/>
034         * Polarity (Value 4.9 - 6.2, 8.0 - 9.2, 10.4 - 13.0)<br/>
035         * Polarizability (Value 0 - 1.08, 0.128 - 0.186, 0.219 - 0.409)<br/>
036         * Charge (Positive, Neutral, Negative)<br/>
037         * Secondary structure (Helix, Strand, Coil)<br/>
038         * Solvent accessibility (Buried, Exposed, Intermediate)<br/>
039         *
040         * @author kohchuanhock
041         * @version 2011.06.16
042         * @since 3.0.2
043         */
044
045        /**
046         * Enumeration of the seven different attributes
047         */
048        public enum ATTRIBUTE {HYDROPHOBICITY, VOLUME, POLARITY, POLARIZABILITY, CHARGE, SECONDARYSTRUCTURE, SOLVENTACCESSIBILITY};
049        /**
050         * Enumeration of the three different groupings for each attributes
051         */
052        public enum GROUPING {GROUP1, GROUP2, GROUP3};
053        /**
054         * Enumeration of the transition between groupA and groupB
055         */
056        public enum TRANSITION {BETWEEN_11, BETWEEN_22, BETWEEN_33, BETWEEN_12, BETWEEN_13, BETWEEN_23};
057        /**
058         * Enumeration of the distribution for the first, first 25%, first 50%, first 75% and 100% of the grouping
059         */
060        public enum DISTRIBUTION {FIRST, FIRST25, FIRST50, FIRST75, ALL};
061
062        /**
063         * Returns the composition of the specific grouping for the given attribute.
064         *
065         * @param sequence
066         *      a protein sequence consisting of non-ambiguous characters only
067         * @param attribute
068         *      one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
069         * @param group
070         *      the grouping to be computed
071         * @return
072         *      returns the composition of the specific grouping for the given attribute
073         * @throws Exception
074         *      throws Exception if attribute or group are unknown
075         */
076        public double getComposition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group) throws Exception;
077
078        public Map<GROUPING, Double> getComposition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception;
079
080        public Map<ATTRIBUTE, Map<GROUPING, Double>> getComposition(ProteinSequence sequence) throws Exception;
081
082        /**
083         * Returns the number of transition between the specified groups for the given attribute with respect to the length of sequence.
084         *
085         * @param sequence
086         *      a protein sequence consisting of non-ambiguous characters only
087         * @param attribute
088         *      one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
089         * @param transition
090         *      the interested transition between the groups
091         * @return
092         *  returns the number of transition between the specified groups for the given attribute with respect to the length of sequence.
093         * @throws Exception
094         *      throws Exception if attribute or group are unknown
095         */
096        public double getTransition(ProteinSequence sequence, ATTRIBUTE attribute, TRANSITION transition) throws Exception;
097
098        public Map<TRANSITION, Double> getTransition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception;
099
100        public Map<ATTRIBUTE, Map<TRANSITION, Double>> getTransition(ProteinSequence sequence) throws Exception;
101
102        /**
103         * Computes and return the position with respect to the sequence where the given distribution of the grouping can be found.<br/>
104         * Example: "1111122222"<br/>
105         * For the above example,<br/>
106         * position of the GROUPING.GROUP1 && DISTRIBUTION.FIRST = 0/10 (because the first occurrence of '1' is at position 0)<br/>
107         * position of the GROUPING.GROUP1 && DISTRIBUTION.ALL = 4/10 (because all occurrences of '1' happens on and before position 4)<br/>
108         *
109         * @param sequence
110         *      a protein sequence consisting of non-ambiguous characters only
111         * @param attribute
112         *      one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
113         * @param group
114         *      one the three groups for the attribute
115         * @param distribution
116         *      the distribution of the grouping
117         *
118         * @return
119         *      the position with respect to the length of sequence where the given distribution of the grouping can be found.<br/>
120         * @throws Exception
121         *      throws Exception if attribute or group are unknown
122         */
123        public double getDistributionPosition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group, DISTRIBUTION distribution) throws Exception;
124
125        public Map<DISTRIBUTION, Double> getDistributionPosition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group) throws Exception;
126
127        public Map<GROUPING, Map<DISTRIBUTION, Double>> getDistributionPosition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception;
128
129        public Map<ATTRIBUTE , Map<GROUPING, Map<DISTRIBUTION, Double>>> getDistributionPosition(ProteinSequence sequence) throws Exception;
130}