001package org.biojava.nbio.structure.chem;
002
003import org.biojava.nbio.core.util.SoftHashMap;
004import org.biojava.nbio.structure.AminoAcid;
005import org.biojava.nbio.structure.AminoAcidImpl;
006import org.biojava.nbio.structure.Group;
007import org.biojava.nbio.structure.HetatomImpl;
008import org.biojava.nbio.structure.NucleotideImpl;
009import org.slf4j.Logger;
010import org.slf4j.LoggerFactory;
011
012public class ChemCompGroupFactory {
013    private static final Logger logger = LoggerFactory.getLogger(ChemCompGroupFactory.class);
014    private static ChemCompProvider chemCompProvider = new DownloadChemCompProvider();
015    private static final SoftHashMap<String, ChemComp> cache = new SoftHashMap<>(0);
016
017    public static ChemComp getChemComp(String recordName) {
018        recordName = recordName.toUpperCase().trim();
019
020        // we are using the cache, to avoid hitting the file system too often.
021        ChemComp cc = cache.get(recordName);
022        if (cc != null) {
023            logger.debug("Chem comp {} read from cache", cc.getThreeLetterCode());
024            return cc;
025        }
026
027        // not cached, get the chem comp from the provider
028        logger.debug("Chem comp {} read from provider {}", recordName, chemCompProvider.getClass().getCanonicalName());
029        cc = chemCompProvider.getChemComp(recordName);
030
031        // Note that this also caches null or empty responses
032        cache.put(recordName, cc);
033        return cc;
034    }
035
036    /**
037     * The new ChemCompProvider will be set in the static variable,
038     * so this provider will be used from now on until it is changed
039     * again. Note that this change can have unexpected behavior of
040     * code executed afterwards.
041     * <p>
042     * Changing the provider also resets the cache, so any groups
043     * previously accessed will be reread or re-downloaded.
044     *
045     * @param provider
046     */
047    public static void setChemCompProvider(ChemCompProvider provider) {
048        logger.debug("Setting new chem comp provider to {}", provider.getClass().getCanonicalName());
049        chemCompProvider = provider;
050        // clear cache
051        cache.clear();
052    }
053
054    public static ChemCompProvider getChemCompProvider(){
055        return chemCompProvider;
056    }
057
058    /**
059     * Force the in-memory cache to be reset.
060     *
061     * Note that the ChemCompProvider may have additional memory or disk caches that need to be cleared too.
062     */
063    public static void clearCache() {
064        cache.clear();
065    }
066
067    public static Group getGroupFromChemCompDictionary(String recordName) {
068        // make sure we work with upper case records
069        recordName = recordName.toUpperCase().trim();
070        Group g;
071        ChemComp cc =  getChemComp(recordName);
072
073        if (cc == null) {
074            return null;
075        }
076
077        if (PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType())) {
078            AminoAcid aa = new AminoAcidImpl();
079
080            String one_letter = cc.getOneLetterCode();
081            if (one_letter == null || "X".equals(one_letter) || "?".equals(one_letter) || one_letter.length() == 0) {
082                String parent = cc.getMonNstdParentCompId();
083                if (parent != null && parent.length() == 3) {
084                    String parentid = cc.getMonNstdParentCompId();
085                    ChemComp parentCC = getChemComp(parentid);
086                    one_letter = parentCC.getOneLetterCode();
087                }
088            }
089
090            if (one_letter == null || one_letter.length() == 0 || "?".equals(one_letter)) {
091                // e.g. problem with PRR, which probably should have a parent of ALA, but as of 20110127 does not.
092                logger.warn("Problem with chemical component: {} Did not find one letter code! Setting it to 'X'",
093                        recordName);
094                aa.setAminoType('X');
095            } else  {
096                aa.setAminoType(one_letter.charAt(0));
097            }
098
099            g = aa;
100        } else if (PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) {
101            g = new NucleotideImpl();
102        } else {
103            g = new HetatomImpl();
104        }
105
106        g.setChemComp(cc);
107        return g;
108    }
109
110    public static String getOneLetterCode(ChemComp cc) {
111        String oneLetter = cc.getOneLetterCode();
112        if (oneLetter == null || "X".equals(oneLetter) || "?".equals(oneLetter)) {
113            String parentId = cc.getMonNstdParentCompId();
114            if (parentId == null) {
115                return oneLetter;
116            }
117            // cases like OIM have multiple parents (comma separated), we shouldn't try grab a chemcomp for those strings
118            if (parentId.length() > 3) {
119                return oneLetter;
120            }
121            ChemComp parentCC = ChemCompGroupFactory.getChemComp(parentId);
122            if (parentCC == null) {
123                return oneLetter;
124            }
125            oneLetter = parentCC.getOneLetterCode();
126        }
127        return oneLetter;
128    }
129}