001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023import org.biojava.nbio.structure.align.util.UserConfiguration;
024import org.biojava.nbio.structure.io.LocalPDBDirectory;
025import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
026import org.biojava.nbio.core.util.InputStreamProvider;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import java.io.*;
031import java.net.URL;
032import java.util.concurrent.atomic.AtomicBoolean;
033
034/**
035 * A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads
036 * all chemical components at startup and keeps them in memory. This provider is not used as a default
037 * since it is slower at startup and requires more memory than the {@link DownloadChemCompProvider} that is used by default.
038 *
039 * @author Andreas Prlic
040 *
041 */
042public class AllChemCompProvider implements ChemCompProvider, Runnable{
043
044        private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class);
045
046        public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz";
047
048
049        private static String path;
050
051        private static String serverName;
052
053
054        // there will be only one copy of the dictionary across all instances
055        // to reduce memory impact
056        static ChemicalComponentDictionary dict;
057
058        // flags to make sure there is only one thread running that is loading the dictionary
059        static AtomicBoolean loading       = new AtomicBoolean(false);
060        static AtomicBoolean isInitialized = new AtomicBoolean(false);
061
062        public AllChemCompProvider(){
063
064                if ( loading.get()) {
065                        logger.warn("other thread is already loading all chemcomps, no need to init twice");
066                        return;
067                }
068                if ( isInitialized.get())
069                        return;
070
071                loading.set(true);
072
073                Thread t = new Thread(this);
074                t.start();
075
076        }
077
078
079        /** make sure all paths are initialized correctly
080         *
081         */
082        private static void initPath(){
083
084                if (path==null) {
085                        UserConfiguration config = new UserConfiguration();
086                        path = config.getCacheFilePath();
087                }
088        }
089
090        private static void initServerName() {
091
092                if (serverName==null) {
093                        serverName = LocalPDBDirectory.getServerName();
094                }
095        }
096
097        private void ensureFileExists() {
098
099
100                String fileName = getLocalFileName();
101                File f = new File(fileName);
102
103                if ( ! f.exists()) {
104                        try {
105                        downloadFile();
106                        } catch (IOException e) {
107                                logger.error("Caught IOException",e);
108                        }
109                }
110
111
112
113        }
114
115        /** Downloads the components.cif.gz file from the wwPDB site.
116         *
117         */
118        public static void downloadFile() throws IOException {
119
120                initPath();
121
122                initServerName();
123
124                String localName = getLocalFileName();
125
126                String u = serverName + "/" + COMPONENTS_FILE_LOCATION;
127
128                downloadFileFromRemote(new URL(u), new File(localName));
129
130
131        }
132
133
134        private static  void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{
135                logger.info("Downloading " + remoteURL + " to: " + localFile);
136                FileOutputStream out = new FileOutputStream(localFile);
137
138                InputStream in = remoteURL.openStream();
139                byte[] buf = new byte[4 * 1024]; // 4K buffer
140                int bytesRead;
141                while ((bytesRead = in.read(buf)) != -1) {
142                        out.write(buf, 0, bytesRead);
143                }
144                in.close();
145                out.close();
146
147
148        }
149
150
151        private static String getLocalFileName(){
152
153                File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY);
154
155                if (! dir.exists()){
156                        logger.info("Creating directory {}", dir.toString());
157                        dir.mkdir();
158                }
159
160                String fileName = new File(dir, "components.cif.gz").toString();
161
162                return fileName;
163        }
164
165        /** Load all {@link ChemComp} definitions into memory.
166         *
167         */
168        private void loadAllChemComps() throws IOException {
169                String fileName = getLocalFileName();
170                logger.debug("Loading " + fileName);
171                InputStreamProvider isp = new InputStreamProvider();
172
173
174                InputStream inStream = isp.getInputStream(fileName);
175
176                MMcifParser parser = new SimpleMMcifParser();
177
178                ChemCompConsumer consumer = new ChemCompConsumer();
179
180                // The Consumer builds up the BioJava - structure object.
181                // you could also hook in your own and build up you own data model.
182                parser.addMMcifConsumer(consumer);
183
184                parser.parse(new BufferedReader(new InputStreamReader(inStream)));
185
186                dict = consumer.getDictionary();
187
188                inStream.close();
189
190        }
191
192
193        /** {@inheritDoc}
194         *
195         */
196        @Override
197        public ChemComp getChemComp(String recordName) {
198
199                while ( loading.get()) {
200
201                        // another thread is still initializing the definitions
202                        try {
203                                // wait half a second
204
205                                Thread.sleep(500);
206                        } catch (InterruptedException e) {
207                                logger.error("Interrepted thread while waiting: "+e.getMessage());
208                                //e.printStackTrace();
209                        }
210                }
211
212
213
214                return dict.getChemComp(recordName);
215        }
216
217
218        /** Do the actual loading of the dictionary in a thread.
219         *
220         */
221        @Override
222        public void run() {
223                long timeS = System.currentTimeMillis();
224
225                initPath();
226
227                ensureFileExists();
228
229                try {
230                        loadAllChemComps();
231
232                        long timeE = System.currentTimeMillis();
233                        logger.debug("Time to init chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
234
235
236                } catch (IOException e) {
237                        logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage());
238
239                } finally {
240                        loading.set(false);
241                        isInitialized.set(true);
242                }
243        }
244
245}