001package org.biojava.nbio.structure.chem;
002
003import java.util.Collections;
004import java.util.HashMap;
005import java.util.Map;
006
007public class ChemCompTools {
008    private static final Character UNKNOWN_ONE_LETTER_CODE = 'X';
009    private static final Character UNKNOWN_NUCLEOTIDE = 'N';
010    /**
011     * Lookup table to convert standard amino acid's monomer ids to one-letter-codes
012     */
013    private static final Map<String, Character> AMINO_ACID_LOOKUP_3TO1;
014    /**
015     * Lookup table to convert standard amino acid's one-letter-codes to monomer ids
016     */
017    private static final Map<Character, String> AMINO_ACID_LOOKUP_1TO3;
018    /**
019     * Lookup table to convert standard nucleic acid's monomer ids to one-letter-codes
020     */
021    private static final Map<String, Character> DNA_LOOKUP_2TO1;
022    /**
023     * Lookup table to convert standard nucleic acid's one-letter-codes to monomer ids
024     */
025    private static final Map<Character, String> DNA_LOOKUP_1TO2;
026    /*
027      Static block that initializes lookup maps and initializes their <tt>ResidueInfo</tt> instances
028     */
029    static {
030        Map<String, Character> foo = new HashMap<>();
031        foo.put("ALA", 'A');
032        foo.put("ASP", 'D');
033        foo.put("ASN", 'N');
034        foo.put("ASX", 'B');
035        foo.put("ARG", 'R');
036        foo.put("CYS", 'C');
037        foo.put("GLU", 'E');
038        foo.put("GLN", 'Q');
039        foo.put("GLY", 'G');
040        foo.put("GLX", 'Z');
041        foo.put("HIS", 'H');
042        foo.put("ILE", 'I');
043        foo.put("LYS", 'K');
044        foo.put("LEU", 'L');
045        foo.put("MET", 'M');
046        foo.put("PHE", 'F');
047        foo.put("PRO", 'P');
048        foo.put("SER", 'S');
049        foo.put("THR", 'T');
050        foo.put("TRP", 'W');
051        foo.put("TYR", 'Y');
052        foo.put("VAL", 'V');
053        AMINO_ACID_LOOKUP_3TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo)));
054
055        Map<Character, String> bar = new HashMap<>();
056        bar.put('A', "ALA");
057        bar.put('D', "ASP");
058        bar.put('N', "ASN");
059        bar.put('B', "ASX");
060        bar.put('R', "ARG");
061        bar.put('C', "CYS");
062        bar.put('E', "GLU");
063        bar.put('Q', "GLN");
064        bar.put('G', "GLY");
065        bar.put('Z', "GLX");
066        bar.put('H', "HIS");
067        bar.put('I', "ILE");
068        bar.put('K', "LYS");
069        bar.put('L', "LEU");
070        bar.put('M', "MET");
071        bar.put('F', "PHE");
072        bar.put('P', "PRO");
073        bar.put('S', "SER");
074        bar.put('T', "THR");
075        bar.put('W', "TRP");
076        bar.put('Y', "TYR");
077        bar.put('V', "VAL");
078        AMINO_ACID_LOOKUP_1TO3 = Collections.unmodifiableMap(Collections.synchronizedMap(bar));
079
080        foo = new HashMap<>();
081        foo.put("DA", 'A');
082        foo.put("DC", 'C');
083        foo.put("DG", 'G');
084        foo.put("DI", 'I');
085        foo.put("DU", 'U');
086        foo.put("DT", 'T');
087        DNA_LOOKUP_2TO1 = Collections.unmodifiableMap((Collections.synchronizedMap(foo)));
088
089        bar = new HashMap<>();
090        bar.put('A', "DA");
091        bar.put('C', "DC");
092        bar.put('G', "DG");
093        bar.put('I', "DI");
094        bar.put('U', "DU");
095        bar.put('T', "DT");
096        DNA_LOOKUP_1TO2 = Collections.unmodifiableMap(Collections.synchronizedMap(bar));
097    }
098
099    public static Character getAminoOneLetter(String chemCompId) {
100        return AMINO_ACID_LOOKUP_3TO1.get(chemCompId);
101    }
102
103    public static Character getDNAOneLetter(String chemCompId) {
104        return DNA_LOOKUP_2TO1.get(chemCompId);
105    }
106
107    public static String getAminoThreeLetter(Character c) {
108        return AMINO_ACID_LOOKUP_1TO3.get(c);
109    }
110
111    public static String getDNATwoLetter(Character c) {
112        return DNA_LOOKUP_1TO2.get(c);
113    }
114
115    public static PolymerType getPolymerType(ResidueType residueType) {
116        if (residueType != null) {
117            return residueType.polymerType;
118        }
119        return null;
120    }
121
122    public static boolean isStandardChemComp(ChemComp cc) {
123        String pid = cc.getMonNstdParentCompId();
124        String one = cc.getOneLetterCode();
125
126        ResidueType residueType = ResidueType.getResidueTypeFromString(cc.getType());
127        PolymerType polymerType = getPolymerType(residueType);
128
129        // standard residues have no parent
130        if (pid == null || pid.equals("?")) {
131            // and they have a one letter code
132            if (one != null && !one.equals("?")) {
133                // peptides and dpeptides must not have X
134                if (polymerType == PolymerType.peptide || polymerType == PolymerType.dpeptide) {
135                    return performPeptideCheck(cc, one);
136                }
137                if (polymerType == PolymerType.rna) {
138                    return performRNACheck(cc);
139                }
140                if (polymerType == PolymerType.dna) {
141                    return performDNACheck(cc);
142                }
143
144                //System.err.println("Non standard chem comp: " + cc);
145                return false;
146            }
147        }
148        return false;
149    }
150
151    private static boolean performRNACheck(ChemComp cc) {
152        return cc.getId().length() == 1;
153    }
154
155    private static boolean performDNACheck(ChemComp cc) {
156        if (cc.getId().equals(UNKNOWN_NUCLEOTIDE.toString())) {
157            return false;
158        }
159
160        Character c = getDNAOneLetter(cc.getId());
161        // we did not find it in the list of standard nucleotides
162        return c != null;
163    }
164
165    private static boolean performPeptideCheck(ChemComp cc, String one) {
166        if (one.equals(UNKNOWN_ONE_LETTER_CODE.toString())) {
167            return false;
168        }
169        Character c = getAminoOneLetter(cc.getId());
170        // we did not find it in the list of standard aminos
171        return c != null;
172    }
173
174    // TODO: component 175 has 3 chars as a one letter code...
175    // Figure out what to do with it...
176    // so does: 4F3,5ZA and others
177    public static Character getOneLetterCode(ChemComp cc, ChemicalComponentDictionary dictionary) {
178        if (cc.getResidueType() == ResidueType.nonPolymer) {
179            return null;
180        }
181
182        if (cc.isStandard()) {
183            return cc.getOneLetterCode().charAt(0);
184        }
185
186        ChemComp parent = dictionary.getParent(cc);
187        if (parent == null) {
188            //System.err.println("parent is null " + cc);
189            return cc.getOneLetterCode().charAt(0);
190        }
191        PolymerType poly = cc.getPolymerType();
192        if (poly == PolymerType.peptide || poly == PolymerType.dpeptide) {
193            Character c = getAminoOneLetter(parent.getId());
194            if (c == null) {
195                c = UNKNOWN_ONE_LETTER_CODE;
196            }
197            return c;
198        }
199        if (poly == PolymerType.dna) {
200            Character c = getDNAOneLetter(parent.getId());
201            if (c == null) {
202                c = UNKNOWN_NUCLEOTIDE;
203            }
204            return c;
205
206        }
207        return cc.getMonNstdParentCompId().charAt(0);
208    }
209}