001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.aaproperties.profeat.convertor;
022
023import org.biojava.nbio.core.sequence.ProteinSequence;
024
025public abstract class Convertor {
026        /**
027         * Based on Table 2 of http://nar.oxfordjournals.org/content/34/suppl_2/W32.full.pdf<br/>
028         * An abstract class to convert a protein sequence into representation of different attribute with each attribute having 3 groups.<br/>
029         * The seven different attributes are<p/>
030         * Hydrophobicity (Polar, Neutral, Hydrophobicity)<br/>
031         * Normalized van der Waals volume (Range 0 - 2.78, 2.95 - 4.0, 4.03 - 8.08)<br/>
032         * Polarity (Value 4.9 - 6.2, 8.0 - 9.2, 10.4 - 13.0)<br/>
033         * Polarizability (Value 0 - 1.08, 0.128 - 0.186, 0.219 - 0.409)<br/>
034         * Charge (Positive, Neutral, Negative)<br/>
035         * Secondary structure (Helix, Strand, Coil)<br/>
036         * Solvent accessibility (Buried, Exposed, Intermediate)<br/>
037         *
038         * @author kohchuanhock
039         * @version 2011.06.09
040         */
041        public final static char group1 = '1';
042        public final static char group2 = '2';
043        public final static char group3 = '3';
044        public final static char unknownGroup = '0';
045
046        /**
047         * Returns the grouping of the amino acid character.
048         * The aminoAcid argument is preferably of non-ambiguous characters.
049         * Standard amino acids will be converted to '1', '2' or '3' depending on its grouping
050         * Non-standard amino acids are simply converted to '0'.
051         *
052         * @param aminoAcid
053         *              an amino acid character preferably of non-ambiguous characters
054         * @return its grouping
055         */
056        public abstract char convert(char aminoAcid);
057
058        /**
059         * Returns the groupings of the attribute
060         * @return the groupings of the attribute
061         */
062        public abstract String[] getGrouping();
063
064        /**
065         * Return the attribute of the grouping
066         * @return the attribute of the grouping
067         */
068        public abstract String getAttribute();
069
070        /**
071         * Returns the converted sequence.
072         * The sequence argument must be a protein sequence consisting of preferably non-ambiguous characters only.
073         * Standard amino acids will be converted to '1', '2' or '3' depending on its grouping
074         * Non-standard amino acids are simply converted to '0'.
075         *
076         * @param sequence
077         *              a protein sequence consisting of preferably non-ambiguous characters only
078         * @return the converted sequence
079         */
080        public String convert(ProteinSequence sequence){
081                String convertedSequence = "";
082                String uppercaseSequence = sequence.getSequenceAsString().toUpperCase();
083                for(int x = 0; x < uppercaseSequence.length(); x++){
084                        convertedSequence += String.valueOf(convert(uppercaseSequence.charAt(x)));
085                }
086                return convertedSequence;
087        }
088
089}