001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * created at Mar 4, 2008
021 */
022package org.biojava.nbio.structure.io.mmcif.chem;
023
024import org.biojava.nbio.structure.io.mmcif.ChemicalComponentDictionary;
025import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
026
027import java.util.*;
028
029/** Some tools for working with chemical compounds.
030 *
031 * @author Andreas Prlic
032 * @since 1.7
033 *
034 */
035public class ChemCompTools {
036
037        private static final Character UNKNOWN_ONE_LETTER_CODE = 'X';
038        private static final Character UNKNOWN_NUCLEOTIDE = 'N';
039
040        /**
041         * Lookup table to convert standard amino acid's monomer ids to one-letter-codes
042         */
043        private static final Map<String, Character> AMINO_ACID_LOOKUP_3TO1;
044
045        /**
046         * Lookup table to convert standard amino acid's one-letter-codes to monomer ids
047         */
048        private static final Map<Character, String> AMINO_ACID_LOOKUP_1TO3;
049
050        /**
051         * Lookup table to convert standard nucleic acid's monomer ids to one-letter-codes
052         */
053        private static final Map<String, Character> DNA_LOOKUP_2TO1;
054
055        /**
056         * Lookup table to convert standard nucleic acid's one-letter-codes to monomer ids
057         */
058        private static final Map<Character, String> DNA_LOOKUP_1TO2;
059
060        /**
061         * Static block that initializes lookup maps and initializes their <tt>ResidueInfo</tt> instances
062         */
063        static
064        {
065                Map<String, Character> foo = new HashMap<String, Character>();
066                foo.put("ALA", 'A');
067                foo.put("ASP", 'D');
068                foo.put("ASN", 'N');
069                foo.put("ASX", 'B');
070                foo.put("ARG", 'R');
071                foo.put("CYS", 'C');
072                foo.put("GLU", 'E');
073                foo.put("GLN", 'Q');
074                foo.put("GLY", 'G');
075                foo.put("GLX", 'Z');
076                foo.put("HIS", 'H');
077                foo.put("ILE", 'I');
078                foo.put("LYS", 'K');
079                foo.put("LEU", 'L');
080                foo.put("MET", 'M');
081                foo.put("PHE", 'F');
082                foo.put("PRO", 'P');
083                foo.put("SER", 'S');
084                foo.put("THR", 'T');
085                foo.put("TRP", 'W');
086                foo.put("TYR", 'Y');
087                foo.put("VAL", 'V');
088                AMINO_ACID_LOOKUP_3TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo)));
089
090                Map<Character, String> bar = new HashMap<Character, String>();
091                bar.put('A', "ALA");
092                bar.put('D', "ASP");
093                bar.put('N', "ASN");
094                bar.put('B', "ASX");
095                bar.put('R', "ARG");
096                bar.put('C', "CYS");
097                bar.put('E', "GLU");
098                bar.put('Q', "GLN");
099                bar.put('G', "GLY");
100                bar.put('Z', "GLX");
101                bar.put('H', "HIS");
102                bar.put('I', "ILE");
103                bar.put('K', "LYS");
104                bar.put('L', "LEU");
105                bar.put('M', "MET");
106                bar.put('F', "PHE");
107                bar.put('P', "PRO");
108                bar.put('S', "SER");
109                bar.put('T', "THR");
110                bar.put('W', "TRP");
111                bar.put('Y', "TYR");
112                bar.put('V', "VAL");
113                AMINO_ACID_LOOKUP_1TO3 = Collections.unmodifiableMap(Collections.synchronizedMap(bar));
114
115                foo = new HashMap<String, Character>();
116                foo.put("DA",'A');
117                foo.put("DC",'C');
118                foo.put("DG",'G');
119                foo.put("DI",'I');
120                foo.put("DU",'U');
121                foo.put("DT",'T');
122                DNA_LOOKUP_2TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo)));
123
124                bar = new HashMap<Character, String>();
125                bar.put('A',"DA");
126                bar.put('C',"DC");
127                bar.put('G',"DG");
128                bar.put('I',"DI");
129                bar.put('U',"DU");
130                bar.put('T',"DT");
131                DNA_LOOKUP_1TO2 = Collections.unmodifiableMap(Collections.synchronizedMap(bar));
132
133
134                // initialise standard chemical components
135                List<String> stdMonIds = new ArrayList<String>();
136                stdMonIds.addAll(AMINO_ACID_LOOKUP_3TO1.keySet());
137                stdMonIds.addAll(DNA_LOOKUP_2TO1.keySet());
138
139
140
141        }
142
143        public static Character getAminoOneLetter(String chemCompId){
144                return  AMINO_ACID_LOOKUP_3TO1.get(chemCompId);
145        }
146
147
148        public static Character getDNAOneLetter(String chemCompId){
149                return  DNA_LOOKUP_2TO1.get(chemCompId) ;
150        }
151
152        public static String getAminoThreeLetter(Character c){
153                return AMINO_ACID_LOOKUP_1TO3.get(c);
154        }
155
156        public static String getDNATwoLetter(Character c){
157                return DNA_LOOKUP_1TO2.get(c);
158        }
159
160        public static final boolean isStandardChemComp(ChemComp cc){
161
162                String pid = cc.getMon_nstd_parent_comp_id();
163                String one = cc.getOne_letter_code();
164
165                PolymerType polymerType = cc.getPolymerType();
166
167                // standard residues have no parent
168                if ((pid == null) || (pid.equals("?"))){
169
170                        // and they have a one letter code
171                        if ( ( one != null) && ( ! one.equals("?") )){
172
173                                // peptides and dpeptides must not have X
174                                if ( (polymerType == PolymerType.peptide) ||
175                                                ( polymerType == PolymerType.dpeptide)) {
176                                        return performPeptideCheck(cc, one);
177
178                                }
179                                if (polymerType == PolymerType.rna){
180                                        return performRNACheck(cc);
181                                }
182                                if (polymerType == PolymerType.dna) {
183
184                                        return performDNACheck(cc);
185
186                                }
187
188                                //System.err.println("Non standard chem comp: " + cc);
189                                return false;
190                        }
191                }
192                return false;
193        }
194
195
196        private static boolean performRNACheck(ChemComp cc) {
197                if (cc.getId().length() == 1)
198                        return true;
199                else
200                        return false;
201        }
202
203
204        private static boolean performDNACheck(ChemComp cc) {
205                if ( cc.getId().equals(UNKNOWN_NUCLEOTIDE.toString()))
206                        return false;
207
208                Character c = getDNAOneLetter(cc.getId());
209                if ( c==null){
210                        // we did not find it in the list of standard nucleotides
211                        return false;
212                }
213                return true;
214        }
215
216
217        private static boolean performPeptideCheck(ChemComp cc, String one) {
218                if (one.equals(UNKNOWN_ONE_LETTER_CODE.toString())) {
219                        return false;
220                }
221                Character c =  getAminoOneLetter(cc.getId());
222                if ( c==null){
223                        // we did not find it in the list of standard aminos
224                        return false;
225                }
226                return true;
227        }
228
229
230        // TODO: component 175 has 3 chars as a one letter code...
231        // Figure out what to do with it...
232        // so does: 4F3,5ZA and others
233        public static Character getOneLetterCode(ChemComp cc, ChemicalComponentDictionary dictionary){
234                if ( cc.getResidueType() == ResidueType.nonPolymer )
235                        return null;
236
237                if ( cc.isStandard())
238                        return cc.getOne_letter_code().charAt(0);
239
240                ChemComp parent = dictionary.getParent(cc);
241                if ( parent == null){
242                        //System.err.println("parent is null " + cc);
243                        return cc.getOne_letter_code().charAt(0);
244                }
245                PolymerType poly = cc.getPolymerType();
246                if (( poly == PolymerType.peptide) || ( poly == PolymerType.dpeptide)){
247                        Character c = getAminoOneLetter(parent.getId());
248                        if ( c == null)
249                                c = UNKNOWN_ONE_LETTER_CODE;
250                        return c;
251                }
252                if ( poly == PolymerType.dna){
253                        Character c = getDNAOneLetter(parent.getId());
254                        if (c == null)
255                                c = UNKNOWN_NUCLEOTIDE;
256                        return c;
257
258                }
259                return cc.getMon_nstd_parent_comp_id().charAt(0);
260        }
261}