001package org.biojava.nbio.structure.chem; 002 003import java.util.Collections; 004import java.util.HashMap; 005import java.util.Map; 006 007public class ChemCompTools { 008 private static final Character UNKNOWN_ONE_LETTER_CODE = 'X'; 009 private static final Character UNKNOWN_NUCLEOTIDE = 'N'; 010 /** 011 * Lookup table to convert standard amino acid's monomer ids to one-letter-codes 012 */ 013 private static final Map<String, Character> AMINO_ACID_LOOKUP_3TO1; 014 /** 015 * Lookup table to convert standard amino acid's one-letter-codes to monomer ids 016 */ 017 private static final Map<Character, String> AMINO_ACID_LOOKUP_1TO3; 018 /** 019 * Lookup table to convert standard nucleic acid's monomer ids to one-letter-codes 020 */ 021 private static final Map<String, Character> DNA_LOOKUP_2TO1; 022 /** 023 * Lookup table to convert standard nucleic acid's one-letter-codes to monomer ids 024 */ 025 private static final Map<Character, String> DNA_LOOKUP_1TO2; 026 /* 027 Static block that initializes lookup maps and initializes their <tt>ResidueInfo</tt> instances 028 */ 029 static { 030 Map<String, Character> foo = new HashMap<>(); 031 foo.put("ALA", 'A'); 032 foo.put("ASP", 'D'); 033 foo.put("ASN", 'N'); 034 foo.put("ASX", 'B'); 035 foo.put("ARG", 'R'); 036 foo.put("CYS", 'C'); 037 foo.put("GLU", 'E'); 038 foo.put("GLN", 'Q'); 039 foo.put("GLY", 'G'); 040 foo.put("GLX", 'Z'); 041 foo.put("HIS", 'H'); 042 foo.put("ILE", 'I'); 043 foo.put("LYS", 'K'); 044 foo.put("LEU", 'L'); 045 foo.put("MET", 'M'); 046 foo.put("PHE", 'F'); 047 foo.put("PRO", 'P'); 048 foo.put("SER", 'S'); 049 foo.put("THR", 'T'); 050 foo.put("TRP", 'W'); 051 foo.put("TYR", 'Y'); 052 foo.put("VAL", 'V'); 053 AMINO_ACID_LOOKUP_3TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo))); 054 055 Map<Character, String> bar = new HashMap<>(); 056 bar.put('A', "ALA"); 057 bar.put('D', "ASP"); 058 bar.put('N', "ASN"); 059 bar.put('B', "ASX"); 060 bar.put('R', "ARG"); 061 bar.put('C', "CYS"); 062 bar.put('E', "GLU"); 063 bar.put('Q', "GLN"); 064 bar.put('G', "GLY"); 065 bar.put('Z', "GLX"); 066 bar.put('H', "HIS"); 067 bar.put('I', "ILE"); 068 bar.put('K', "LYS"); 069 bar.put('L', "LEU"); 070 bar.put('M', "MET"); 071 bar.put('F', "PHE"); 072 bar.put('P', "PRO"); 073 bar.put('S', "SER"); 074 bar.put('T', "THR"); 075 bar.put('W', "TRP"); 076 bar.put('Y', "TYR"); 077 bar.put('V', "VAL"); 078 AMINO_ACID_LOOKUP_1TO3 = Collections.unmodifiableMap(Collections.synchronizedMap(bar)); 079 080 foo = new HashMap<>(); 081 foo.put("DA", 'A'); 082 foo.put("DC", 'C'); 083 foo.put("DG", 'G'); 084 foo.put("DI", 'I'); 085 foo.put("DU", 'U'); 086 foo.put("DT", 'T'); 087 DNA_LOOKUP_2TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo))); 088 089 bar = new HashMap<>(); 090 bar.put('A', "DA"); 091 bar.put('C', "DC"); 092 bar.put('G', "DG"); 093 bar.put('I', "DI"); 094 bar.put('U', "DU"); 095 bar.put('T', "DT"); 096 DNA_LOOKUP_1TO2 = Collections.unmodifiableMap(Collections.synchronizedMap(bar)); 097 } 098 099 public static Character getAminoOneLetter(String chemCompId) { 100 return AMINO_ACID_LOOKUP_3TO1.get(chemCompId); 101 } 102 103 public static Character getDNAOneLetter(String chemCompId) { 104 return DNA_LOOKUP_2TO1.get(chemCompId); 105 } 106 107 public static String getAminoThreeLetter(Character c) { 108 return AMINO_ACID_LOOKUP_1TO3.get(c); 109 } 110 111 public static String getDNATwoLetter(Character c) { 112 return DNA_LOOKUP_1TO2.get(c); 113 } 114 115 public static PolymerType getPolymerType(ResidueType residueType) { 116 if (residueType != null) { 117 return residueType.polymerType; 118 } 119 return null; 120 } 121 122 public static boolean isStandardChemComp(ChemComp cc) { 123 String pid = cc.getMonNstdParentCompId(); 124 String one = cc.getOneLetterCode(); 125 126 ResidueType residueType = ResidueType.getResidueTypeFromString(cc.getType()); 127 PolymerType polymerType = getPolymerType(residueType); 128 129 // standard residues have no parent 130 if (pid == null || pid.equals("?")) { 131 // and they have a one letter code 132 if (one != null && !one.equals("?")) { 133 // peptides and dpeptides must not have X 134 if (polymerType == PolymerType.peptide || polymerType == PolymerType.dpeptide) { 135 return performPeptideCheck(cc, one); 136 } 137 if (polymerType == PolymerType.rna) { 138 return performRNACheck(cc); 139 } 140 if (polymerType == PolymerType.dna) { 141 return performDNACheck(cc); 142 } 143 144 //System.err.println("Non standard chem comp: " + cc); 145 return false; 146 } 147 } 148 return false; 149 } 150 151 private static boolean performRNACheck(ChemComp cc) { 152 return cc.getId().length() == 1; 153 } 154 155 private static boolean performDNACheck(ChemComp cc) { 156 if (cc.getId().equals(UNKNOWN_NUCLEOTIDE.toString())) { 157 return false; 158 } 159 160 Character c = getDNAOneLetter(cc.getId()); 161 // we did not find it in the list of standard nucleotides 162 return c != null; 163 } 164 165 private static boolean performPeptideCheck(ChemComp cc, String one) { 166 if (one.equals(UNKNOWN_ONE_LETTER_CODE.toString())) { 167 return false; 168 } 169 Character c = getAminoOneLetter(cc.getId()); 170 // we did not find it in the list of standard aminos 171 return c != null; 172 } 173 174 // TODO: component 175 has 3 chars as a one letter code... 175 // Figure out what to do with it... 176 // so does: 4F3,5ZA and others 177 public static Character getOneLetterCode(ChemComp cc, ChemicalComponentDictionary dictionary) { 178 if (cc.getResidueType() == ResidueType.nonPolymer) { 179 return null; 180 } 181 182 if (cc.isStandard()) { 183 return cc.getOneLetterCode().charAt(0); 184 } 185 186 ChemComp parent = dictionary.getParent(cc); 187 if (parent == null) { 188 //System.err.println("parent is null " + cc); 189 return cc.getOneLetterCode().charAt(0); 190 } 191 PolymerType poly = cc.getPolymerType(); 192 if (poly == PolymerType.peptide || poly == PolymerType.dpeptide) { 193 Character c = getAminoOneLetter(parent.getId()); 194 if (c == null) { 195 c = UNKNOWN_ONE_LETTER_CODE; 196 } 197 return c; 198 } 199 if (poly == PolymerType.dna) { 200 Character c = getDNAOneLetter(parent.getId()); 201 if (c == null) { 202 c = UNKNOWN_NUCLEOTIDE; 203 } 204 return c; 205 206 } 207 return cc.getMonNstdParentCompId().charAt(0); 208 } 209}