001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.aaproperties.profeat.convertor;
022
023import java.util.stream.Collectors;
024
025import org.biojava.nbio.core.sequence.ProteinSequence;
026
027public abstract class Convertor {
028        /**
029         * Based on Table 2 of http://nar.oxfordjournals.org/content/34/suppl_2/W32.full.pdf<br/>
030         * An abstract class to convert a protein sequence into representation of different attribute with each attribute having 3 groups.<br/>
031         * The seven different attributes are<p>
032         * Hydrophobicity (Polar, Neutral, Hydrophobicity)<br/>
033         * Normalized van der Waals volume (Range 0 - 2.78, 2.95 - 4.0, 4.03 - 8.08)<br/>
034         * Polarity (Value 4.9 - 6.2, 8.0 - 9.2, 10.4 - 13.0)<br/>
035         * Polarizability (Value 0 - 1.08, 0.128 - 0.186, 0.219 - 0.409)<br/>
036         * Charge (Positive, Neutral, Negative)<br/>
037         * Secondary structure (Helix, Strand, Coil)<br/>
038         * Solvent accessibility (Buried, Exposed, Intermediate)<br/>
039         *
040         * @author kohchuanhock
041         * @version 2011.06.09
042         */
043        public final static char group1 = '1';
044        public final static char group2 = '2';
045        public final static char group3 = '3';
046        public final static char unknownGroup = '0';
047
048        /**
049         * Returns the grouping of the amino acid character.
050         * The aminoAcid argument is preferably of non-ambiguous characters.
051         * Standard amino acids will be converted to '1', '2' or '3' depending on its grouping
052         * Non-standard amino acids are simply converted to '0'.
053         *
054         * @param aminoAcid
055         *              an amino acid character preferably of non-ambiguous characters
056         * @return its grouping
057         */
058        public abstract char convert(char aminoAcid);
059
060        /**
061         * Returns the groupings of the attribute
062         * @return the groupings of the attribute
063         */
064        public abstract String[] getGrouping();
065
066        /**
067         * Return the attribute of the grouping
068         * @return the attribute of the grouping
069         */
070        public abstract String getAttribute();
071
072        /**
073         * Returns the converted sequence.
074         * The sequence argument must be a protein sequence consisting of preferably non-ambiguous characters only.
075         * Standard amino acids will be converted to '1', '2' or '3' depending on its grouping
076         * Non-standard amino acids are simply converted to '0'.
077         *
078         * @param sequence
079         *              a protein sequence consisting of preferably non-ambiguous characters only
080         * @return the converted sequence
081         */
082        public String convert(ProteinSequence sequence){
083                String uppercaseSequence = sequence.getSequenceAsString().toUpperCase();
084                String convertedSequence = uppercaseSequence.chars().mapToObj(upperCaseSeq -> String.valueOf(convert((char)(upperCaseSeq)))).collect(Collectors.joining());
085                return convertedSequence;
086        }
087}