001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on May 23, 2010
021 *
022 */
023package org.biojava.nbio.structure.io.mmcif;
024
025import org.biojava.nbio.core.util.SoftHashMap;
026import org.biojava.nbio.structure.AminoAcid;
027import org.biojava.nbio.structure.AminoAcidImpl;
028import org.biojava.nbio.structure.Group;
029import org.biojava.nbio.structure.HetatomImpl;
030import org.biojava.nbio.structure.NucleotideImpl;
031import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
032import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036
037public class ChemCompGroupFactory {
038
039        private static final Logger logger = LoggerFactory.getLogger(ChemCompGroupFactory.class);
040
041        private static ChemCompProvider chemCompProvider = new DownloadChemCompProvider();
042
043        private static SoftHashMap<String, ChemComp> cache = new SoftHashMap<String, ChemComp>(0);
044
045        public static ChemComp getChemComp(String recordName){
046
047                recordName = recordName.toUpperCase().trim();
048
049                // we are using the cache, to avoid hitting the file system too often.
050                ChemComp cc = cache.get(recordName);
051                if ( cc != null) {
052                        logger.debug("Chem comp "+cc.getThree_letter_code()+" read from cache");
053                        return cc;
054                }
055
056                // not cached, get the chem comp from the provider
057                logger.debug("Chem comp "+recordName+" read from provider "+chemCompProvider.getClass().getCanonicalName());
058                cc = chemCompProvider.getChemComp(recordName);
059
060                // Note that this also caches null or empty responses
061                cache.put(recordName, cc);
062                return cc;
063        }
064
065        /**
066         * The new ChemCompProvider will be set in the static variable,
067         * so this provider will be used from now on until it is changed
068         * again. Note that this change can have unexpected behavior of
069         * code executed afterwards.
070         * <p>
071         * Changing the provider does not reset the cache, so Chemical
072         * Component definitions already downloaded from previous providers
073         * will be used. To reset the cache see {@link #getCache()).
074         *
075         * @param provider
076         */
077        public static void setChemCompProvider(ChemCompProvider provider) {
078                logger.debug("Setting new chem comp provider to "+provider.getClass().getCanonicalName());
079                chemCompProvider = provider;
080                // clear cache
081                cache.clear();
082        }
083
084        public static ChemCompProvider getChemCompProvider(){
085                return chemCompProvider;
086        }
087
088        public static Group getGroupFromChemCompDictionary(String recordName) {
089
090                // make sure we work with upper case records
091                recordName = recordName.toUpperCase().trim();
092
093                Group g = null;
094
095
096                ChemComp cc =  getChemComp(recordName);
097
098                if ( cc == null)
099                        return null;
100
101                if ( PolymerType.PROTEIN_ONLY.contains( cc.getPolymerType() ) ){
102                        AminoAcid aa = new AminoAcidImpl();
103
104                        String one_letter = cc.getOne_letter_code();
105                        if ( one_letter == null || one_letter.equals("X") || one_letter.equals("?") || one_letter.length()==0){
106                                String parent = cc.getMon_nstd_parent_comp_id();
107                                if ( parent != null && parent.length() == 3){
108                                        String parentid = cc.getMon_nstd_parent_comp_id() ;
109                                        ChemComp parentCC = getChemComp(parentid);
110                                        one_letter = parentCC.getOne_letter_code();
111                                }
112                        }
113
114                        if ( one_letter == null || one_letter.length()==0 || one_letter.equals("?")) {
115                                // e.g. problem with PRR, which probably should have a parent of ALA, but as of 20110127 does not.
116                                logger.warn("Problem with chemical component: " + recordName + "  Did not find one letter code! Setting it to 'X'");
117                                aa.setAminoType('X');
118
119                        } else  {
120                                aa.setAminoType(one_letter.charAt(0));
121                        }
122
123
124                        g = aa;
125                } else if ( PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) {
126                        NucleotideImpl nuc = new NucleotideImpl();
127
128                        g = nuc;
129
130
131                } else {
132
133                        g = new HetatomImpl();
134                }
135
136                g.setChemComp(cc);
137
138
139                return g;
140        }
141
142
143        public  static String getOneLetterCode(ChemComp cc){
144                String oneLetter = cc.getOne_letter_code();
145                if ( oneLetter == null || oneLetter.equals("X") || oneLetter.equals("?")) {
146                        String parentId = cc.getMon_nstd_parent_comp_id() ;
147                        if ( parentId == null)
148                                return oneLetter;
149                        // cases like OIM have multiple parents (comma separated), we shouldn't try grab a chemcomp for those strings
150                        if (parentId.length()>3) 
151                                return oneLetter;                       
152                        ChemComp parentCC = ChemCompGroupFactory.getChemComp(parentId);
153                        if ( parentCC == null)
154                                return oneLetter;
155                        oneLetter = parentCC.getOne_letter_code();
156                }
157                return oneLetter;
158        }
159
160}