001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on May 23, 2010
021 *
022 */
023package org.biojava.nbio.structure.io.mmcif;
024
025import org.biojava.nbio.core.util.SoftHashMap;
026import org.biojava.nbio.structure.AminoAcid;
027import org.biojava.nbio.structure.AminoAcidImpl;
028import org.biojava.nbio.structure.Group;
029import org.biojava.nbio.structure.HetatomImpl;
030import org.biojava.nbio.structure.NucleotideImpl;
031import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
032import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036
037public class ChemCompGroupFactory {
038
039        private static final Logger logger = LoggerFactory.getLogger(ChemCompGroupFactory.class);
040
041        private static ChemCompProvider chemCompProvider = new DownloadChemCompProvider();
042
043        private static SoftHashMap<String, ChemComp> cache = new SoftHashMap<String, ChemComp>(0);
044
045        public static ChemComp getChemComp(String recordName){
046
047                recordName = recordName.toUpperCase().trim();
048
049                // we are using the cache, to avoid hitting the file system too often.
050                ChemComp cc = cache.get(recordName);
051                if ( cc != null) {
052                        logger.debug("Chem comp "+cc.getThree_letter_code()+" read from cache");
053                        return cc;
054                }
055
056                // not cached, get the chem comp from the provider
057                logger.debug("Chem comp "+recordName+" read from provider "+chemCompProvider.getClass().getCanonicalName());
058                cc = chemCompProvider.getChemComp(recordName);
059
060                // Note that this also caches null or empty responses
061                cache.put(recordName, cc);
062                return cc;
063        }
064
065        /**
066         * The new ChemCompProvider will be set in the static variable,
067         * so this provider will be used from now on until it is changed
068         * again. Note that this change can have unexpected behavior of
069         * code executed afterwards.
070         * <p>
071         * Changing the provider also resets the cache, so any groups
072         * previously accessed will be reread or re-downloaded.
073         *
074         * @param provider
075         */
076        public static void setChemCompProvider(ChemCompProvider provider) {
077                logger.debug("Setting new chem comp provider to "+provider.getClass().getCanonicalName());
078                chemCompProvider = provider;
079                // clear cache
080                cache.clear();
081        }
082
083        public static ChemCompProvider getChemCompProvider(){
084                return chemCompProvider;
085        }
086
087        /**
088         * Force the in-memory cache to be reset.
089         *
090         * Note that the ChemCompProvider may have additional memory or disk caches that need to be cleared too.
091         */
092        public static void clearCache() {
093                cache.clear();
094        }
095
096        public static Group getGroupFromChemCompDictionary(String recordName) {
097
098                // make sure we work with upper case records
099                recordName = recordName.toUpperCase().trim();
100
101                Group g = null;
102
103
104                ChemComp cc =  getChemComp(recordName);
105
106                if ( cc == null)
107                        return null;
108
109                if ( PolymerType.PROTEIN_ONLY.contains( cc.getPolymerType() ) ){
110                        AminoAcid aa = new AminoAcidImpl();
111
112                        String one_letter = cc.getOne_letter_code();
113                        if ( one_letter == null || one_letter.equals("X") || one_letter.equals("?") || one_letter.length()==0){
114                                String parent = cc.getMon_nstd_parent_comp_id();
115                                if ( parent != null && parent.length() == 3){
116                                        String parentid = cc.getMon_nstd_parent_comp_id() ;
117                                        ChemComp parentCC = getChemComp(parentid);
118                                        one_letter = parentCC.getOne_letter_code();
119                                }
120                        }
121
122                        if ( one_letter == null || one_letter.length()==0 || one_letter.equals("?")) {
123                                // e.g. problem with PRR, which probably should have a parent of ALA, but as of 20110127 does not.
124                                logger.warn("Problem with chemical component: " + recordName + "  Did not find one letter code! Setting it to 'X'");
125                                aa.setAminoType('X');
126
127                        } else  {
128                                aa.setAminoType(one_letter.charAt(0));
129                        }
130
131
132                        g = aa;
133                } else if ( PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) {
134                        NucleotideImpl nuc = new NucleotideImpl();
135
136                        g = nuc;
137
138
139                } else {
140
141                        g = new HetatomImpl();
142                }
143
144                g.setChemComp(cc);
145
146
147                return g;
148        }
149
150
151        public  static String getOneLetterCode(ChemComp cc){
152                String oneLetter = cc.getOne_letter_code();
153                if ( oneLetter == null || oneLetter.equals("X") || oneLetter.equals("?")) {
154                        String parentId = cc.getMon_nstd_parent_comp_id() ;
155                        if ( parentId == null)
156                                return oneLetter;
157                        // cases like OIM have multiple parents (comma separated), we shouldn't try grab a chemcomp for those strings
158                        if (parentId.length()>3)
159                                return oneLetter;
160                        ChemComp parentCC = ChemCompGroupFactory.getChemComp(parentId);
161                        if ( parentCC == null)
162                                return oneLetter;
163                        oneLetter = parentCC.getOne_letter_code();
164                }
165                return oneLetter;
166        }
167
168}