001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.aaproperties.profeat.convertor; 022 023import org.biojava.nbio.core.sequence.ProteinSequence; 024 025public abstract class Convertor { 026 /** 027 * Based on Table 2 of http://nar.oxfordjournals.org/content/34/suppl_2/W32.full.pdf<br/> 028 * An abstract class to convert a protein sequence into representation of different attribute with each attribute having 3 groups.<br/> 029 * The seven different attributes are<p/> 030 * Hydrophobicity (Polar, Neutral, Hydrophobicity)<br/> 031 * Normalized van der Waals volume (Range 0 - 2.78, 2.95 - 4.0, 4.03 - 8.08)<br/> 032 * Polarity (Value 4.9 - 6.2, 8.0 - 9.2, 10.4 - 13.0)<br/> 033 * Polarizability (Value 0 - 1.08, 0.128 - 0.186, 0.219 - 0.409)<br/> 034 * Charge (Positive, Neutral, Negative)<br/> 035 * Secondary structure (Helix, Strand, Coil)<br/> 036 * Solvent accessibility (Buried, Exposed, Intermediate)<br/> 037 * 038 * @author kohchuanhock 039 * @version 2011.06.09 040 */ 041 public final static char group1 = '1'; 042 public final static char group2 = '2'; 043 public final static char group3 = '3'; 044 public final static char unknownGroup = '0'; 045 046 /** 047 * Returns the grouping of the amino acid character. 048 * The aminoAcid argument is preferably of non-ambiguous characters. 049 * Standard amino acids will be converted to '1', '2' or '3' depending on its grouping 050 * Non-standard amino acids are simply converted to '0'. 051 * 052 * @param aminoAcid 053 * an amino acid character preferably of non-ambiguous characters 054 * @return its grouping 055 */ 056 public abstract char convert(char aminoAcid); 057 058 /** 059 * Returns the groupings of the attribute 060 * @return the groupings of the attribute 061 */ 062 public abstract String[] getGrouping(); 063 064 /** 065 * Return the attribute of the grouping 066 * @return the attribute of the grouping 067 */ 068 public abstract String getAttribute(); 069 070 /** 071 * Returns the converted sequence. 072 * The sequence argument must be a protein sequence consisting of preferably non-ambiguous characters only. 073 * Standard amino acids will be converted to '1', '2' or '3' depending on its grouping 074 * Non-standard amino acids are simply converted to '0'. 075 * 076 * @param sequence 077 * a protein sequence consisting of preferably non-ambiguous characters only 078 * @return the converted sequence 079 */ 080 public String convert(ProteinSequence sequence){ 081 String convertedSequence = ""; 082 String uppercaseSequence = sequence.getSequenceAsString().toUpperCase(); 083 for(int x = 0; x < uppercaseSequence.length(); x++){ 084 convertedSequence += String.valueOf(convert(uppercaseSequence.charAt(x))); 085 } 086 return convertedSequence; 087 } 088 089}