001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * created at Mar 4, 2008 021 */ 022package org.biojava.nbio.structure.io.mmcif.chem; 023 024import org.biojava.nbio.structure.io.mmcif.ChemicalComponentDictionary; 025import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 026 027import java.util.*; 028 029/** Some tools for working with chemical compounds. 030 * 031 * @author Andreas Prlic 032 * @since 1.7 033 * 034 */ 035public class ChemCompTools { 036 037 private static final Character UNKNOWN_ONE_LETTER_CODE = 'X'; 038 private static final Character UNKNOWN_NUCLEOTIDE = 'N'; 039 040 /** 041 * Lookup table to convert standard amino acid's monomer ids to one-letter-codes 042 */ 043 private static final Map<String, Character> AMINO_ACID_LOOKUP_3TO1; 044 045 /** 046 * Lookup table to convert standard amino acid's one-letter-codes to monomer ids 047 */ 048 private static final Map<Character, String> AMINO_ACID_LOOKUP_1TO3; 049 050 /** 051 * Lookup table to convert standard nucleic acid's monomer ids to one-letter-codes 052 */ 053 private static final Map<String, Character> DNA_LOOKUP_2TO1; 054 055 /** 056 * Lookup table to convert standard nucleic acid's one-letter-codes to monomer ids 057 */ 058 private static final Map<Character, String> DNA_LOOKUP_1TO2; 059 060 /** 061 * Static block that initializes lookup maps and initializes their <tt>ResidueInfo</tt> instances 062 */ 063 static 064 { 065 Map<String, Character> foo = new HashMap<String, Character>(); 066 foo.put("ALA", 'A'); 067 foo.put("ASP", 'D'); 068 foo.put("ASN", 'N'); 069 foo.put("ASX", 'B'); 070 foo.put("ARG", 'R'); 071 foo.put("CYS", 'C'); 072 foo.put("GLU", 'E'); 073 foo.put("GLN", 'Q'); 074 foo.put("GLY", 'G'); 075 foo.put("GLX", 'Z'); 076 foo.put("HIS", 'H'); 077 foo.put("ILE", 'I'); 078 foo.put("LYS", 'K'); 079 foo.put("LEU", 'L'); 080 foo.put("MET", 'M'); 081 foo.put("PHE", 'F'); 082 foo.put("PRO", 'P'); 083 foo.put("SER", 'S'); 084 foo.put("THR", 'T'); 085 foo.put("TRP", 'W'); 086 foo.put("TYR", 'Y'); 087 foo.put("VAL", 'V'); 088 AMINO_ACID_LOOKUP_3TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo))); 089 090 Map<Character, String> bar = new HashMap<Character, String>(); 091 bar.put('A', "ALA"); 092 bar.put('D', "ASP"); 093 bar.put('N', "ASN"); 094 bar.put('B', "ASX"); 095 bar.put('R', "ARG"); 096 bar.put('C', "CYS"); 097 bar.put('E', "GLU"); 098 bar.put('Q', "GLN"); 099 bar.put('G', "GLY"); 100 bar.put('Z', "GLX"); 101 bar.put('H', "HIS"); 102 bar.put('I', "ILE"); 103 bar.put('K', "LYS"); 104 bar.put('L', "LEU"); 105 bar.put('M', "MET"); 106 bar.put('F', "PHE"); 107 bar.put('P', "PRO"); 108 bar.put('S', "SER"); 109 bar.put('T', "THR"); 110 bar.put('W', "TRP"); 111 bar.put('Y', "TYR"); 112 bar.put('V', "VAL"); 113 AMINO_ACID_LOOKUP_1TO3 = Collections.unmodifiableMap(Collections.synchronizedMap(bar)); 114 115 foo = new HashMap<String, Character>(); 116 foo.put("DA",'A'); 117 foo.put("DC",'C'); 118 foo.put("DG",'G'); 119 foo.put("DI",'I'); 120 foo.put("DU",'U'); 121 foo.put("DT",'T'); 122 DNA_LOOKUP_2TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo))); 123 124 bar = new HashMap<Character, String>(); 125 bar.put('A',"DA"); 126 bar.put('C',"DC"); 127 bar.put('G',"DG"); 128 bar.put('I',"DI"); 129 bar.put('U',"DU"); 130 bar.put('T',"DT"); 131 DNA_LOOKUP_1TO2 = Collections.unmodifiableMap(Collections.synchronizedMap(bar)); 132 133 134 // initialise standard chemical components 135 List<String> stdMonIds = new ArrayList<String>(); 136 stdMonIds.addAll(AMINO_ACID_LOOKUP_3TO1.keySet()); 137 stdMonIds.addAll(DNA_LOOKUP_2TO1.keySet()); 138 139 140 141 } 142 143 public static Character getAminoOneLetter(String chemCompId){ 144 return AMINO_ACID_LOOKUP_3TO1.get(chemCompId); 145 } 146 147 148 public static Character getDNAOneLetter(String chemCompId){ 149 return DNA_LOOKUP_2TO1.get(chemCompId) ; 150 } 151 152 public static String getAminoThreeLetter(Character c){ 153 return AMINO_ACID_LOOKUP_1TO3.get(c); 154 } 155 156 public static String getDNATwoLetter(Character c){ 157 return DNA_LOOKUP_1TO2.get(c); 158 } 159 160 public static final boolean isStandardChemComp(ChemComp cc){ 161 162 String pid = cc.getMon_nstd_parent_comp_id(); 163 String one = cc.getOne_letter_code(); 164 165 PolymerType polymerType = cc.getPolymerType(); 166 167 // standard residues have no parent 168 if ((pid == null) || (pid.equals("?"))){ 169 170 // and they have a one letter code 171 if ( ( one != null) && ( ! one.equals("?") )){ 172 173 // peptides and dpeptides must not have X 174 if ( (polymerType == PolymerType.peptide) || 175 ( polymerType == PolymerType.dpeptide)) { 176 return performPeptideCheck(cc, one); 177 178 } 179 if (polymerType == PolymerType.rna){ 180 return performRNACheck(cc); 181 } 182 if (polymerType == PolymerType.dna) { 183 184 return performDNACheck(cc); 185 186 } 187 188 //System.err.println("Non standard chem comp: " + cc); 189 return false; 190 } 191 } 192 return false; 193 } 194 195 196 private static boolean performRNACheck(ChemComp cc) { 197 if (cc.getId().length() == 1) 198 return true; 199 else 200 return false; 201 } 202 203 204 private static boolean performDNACheck(ChemComp cc) { 205 if ( cc.getId().equals(UNKNOWN_NUCLEOTIDE.toString())) 206 return false; 207 208 Character c = getDNAOneLetter(cc.getId()); 209 if ( c==null){ 210 // we did not find it in the list of standard nucleotides 211 return false; 212 } 213 return true; 214 } 215 216 217 private static boolean performPeptideCheck(ChemComp cc, String one) { 218 if (one.equals(UNKNOWN_ONE_LETTER_CODE.toString())) { 219 return false; 220 } 221 Character c = getAminoOneLetter(cc.getId()); 222 if ( c==null){ 223 // we did not find it in the list of standard aminos 224 return false; 225 } 226 return true; 227 } 228 229 230 // TODO: component 175 has 3 chars as a one letter code... 231 // Figure out what to do with it... 232 // so does: 4F3,5ZA and others 233 public static Character getOneLetterCode(ChemComp cc, ChemicalComponentDictionary dictionary){ 234 if ( cc.getResidueType() == ResidueType.nonPolymer ) 235 return null; 236 237 if ( cc.isStandard()) 238 return cc.getOne_letter_code().charAt(0); 239 240 ChemComp parent = dictionary.getParent(cc); 241 if ( parent == null){ 242 //System.err.println("parent is null " + cc); 243 return cc.getOne_letter_code().charAt(0); 244 } 245 PolymerType poly = cc.getPolymerType(); 246 if (( poly == PolymerType.peptide) || ( poly == PolymerType.dpeptide)){ 247 Character c = getAminoOneLetter(parent.getId()); 248 if ( c == null) 249 c = UNKNOWN_ONE_LETTER_CODE; 250 return c; 251 } 252 if ( poly == PolymerType.dna){ 253 Character c = getDNAOneLetter(parent.getId()); 254 if (c == null) 255 c = UNKNOWN_NUCLEOTIDE; 256 return c; 257 258 } 259 return cc.getMon_nstd_parent_comp_id().charAt(0); 260 } 261}