001/*
002
003 *                    BioJava development code
004 *
005 * This code may be freely distributed and modified under the
006 * terms of the GNU Lesser General Public Licence.  This should
007 * be distributed with the code.  If you do not have a copy,
008 * see:
009 *
010 *      http://www.gnu.org/copyleft/lesser.html
011 *
012 * Copyright for this code is held jointly by the individual
013 * authors.  These should be listed in @author doc comments.
014 *
015 * For more information on the BioJava project and its aims,
016 * or to join the biojava-l mailing list, visit the home page
017 * at:
018 *
019 *      http://www.biojava.org/
020 *
021 */
022package org.biojava.nbio.structure.io.mmcif;
023
024import org.biojava.nbio.structure.align.util.UserConfiguration;
025import org.biojava.nbio.structure.io.LocalPDBDirectory;
026import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
027import org.biojava.nbio.core.util.InputStreamProvider;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.io.*;
032import java.net.URL;
033import java.util.concurrent.atomic.AtomicBoolean;
034
035/**
036 * A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads
037 * all chemical components at startup and keeps them in memory. This provider is not used as a default
038 * since it is slower at startup and requires more memory than the {@link DownloadChemCompProvider} that is used by default.
039 *
040 * @author Andreas Prlic
041 *
042 */
043public class AllChemCompProvider implements ChemCompProvider, Runnable{
044
045        private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class);
046
047        public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz";
048
049
050        private static String path;
051
052        private static String serverName;
053
054
055        // there will be only one copy of the dictionary across all instances
056        // to reduce memory impact
057        static ChemicalComponentDictionary dict;
058
059        // flags to make sure there is only one thread running that is loading the dictionary
060        static AtomicBoolean loading       = new AtomicBoolean(false);
061        static AtomicBoolean isInitialized = new AtomicBoolean(false);
062
063        public AllChemCompProvider(){
064
065                if ( loading.get()) {
066                        logger.warn("other thread is already loading all chemcomps, no need to init twice");
067                        return;
068                }
069                if ( isInitialized.get())
070                        return;
071
072                loading.set(true);
073
074                Thread t = new Thread(this);
075                t.start();
076
077        }
078
079
080        /** make sure all paths are initialized correctly
081         *
082         */
083        private static void initPath(){
084
085                if (path==null) {
086                        UserConfiguration config = new UserConfiguration();
087                        path = config.getCacheFilePath();
088                }
089        }
090
091        private static void initServerName() {
092
093                if (serverName==null) {
094                        serverName = LocalPDBDirectory.getServerName();
095                }
096        }
097
098        private void ensureFileExists() {
099
100
101                String fileName = getLocalFileName();
102                File f = new File(fileName);
103
104                if ( ! f.exists()) {
105                        try {
106                        downloadFile();
107                        } catch (IOException e) {
108                                logger.error("Caught IOException",e);
109                        }
110                }
111
112
113
114        }
115
116        /** Downloads the components.cif.gz file from the wwPDB site.
117         *
118         */
119        public static void downloadFile() throws IOException {
120
121                initPath();
122
123                initServerName();
124
125                String localName = getLocalFileName();
126
127                String u = serverName + "/" + COMPONENTS_FILE_LOCATION;
128
129                downloadFileFromRemote(new URL(u), new File(localName));
130
131
132        }
133
134
135        private static  void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{
136                logger.info("Downloading " + remoteURL + " to: " + localFile);
137                FileOutputStream out = new FileOutputStream(localFile);
138
139                InputStream in = remoteURL.openStream();
140                byte[] buf = new byte[4 * 1024]; // 4K buffer
141                int bytesRead;
142                while ((bytesRead = in.read(buf)) != -1) {
143                        out.write(buf, 0, bytesRead);
144                }
145                in.close();
146                out.close();
147
148
149        }
150
151
152        private static String getLocalFileName(){
153
154                File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY);
155
156                if (! dir.exists()){
157                        logger.info("Creating directory {}", dir.toString());
158                        dir.mkdir();
159                }
160
161                String fileName = new File(dir, "components.cif.gz").toString();
162
163                return fileName;
164        }
165
166        /** Load all {@link ChemComp} definitions into memory.
167         *
168         */
169        private void loadAllChemComps() throws IOException {
170                String fileName = getLocalFileName();
171                logger.debug("Loading " + fileName);
172                InputStreamProvider isp = new InputStreamProvider();
173
174
175                InputStream inStream = isp.getInputStream(fileName);
176
177                MMcifParser parser = new SimpleMMcifParser();
178
179                ChemCompConsumer consumer = new ChemCompConsumer();
180
181                // The Consumer builds up the BioJava - structure object.
182                // you could also hook in your own and build up you own data model.
183                parser.addMMcifConsumer(consumer);
184
185                parser.parse(new BufferedReader(new InputStreamReader(inStream)));
186
187                dict = consumer.getDictionary();
188
189                inStream.close();
190
191        }
192
193
194        /** {@inheritDoc}
195         *
196         */
197        @Override
198        public ChemComp getChemComp(String recordName) {
199
200                while ( loading.get()) {
201
202                        // another thread is still initializing the definitions
203                        try {
204                                // wait half a second
205
206                                Thread.sleep(500);
207                        } catch (InterruptedException e) {
208                                logger.error("Interrepted thread while waiting: "+e.getMessage());
209                                //e.printStackTrace();
210                        }
211                }
212
213
214
215                return dict.getChemComp(recordName);
216        }
217
218
219        /** Do the actual loading of the dictionary in a thread.
220         *
221         */
222        @Override
223        public void run() {
224                long timeS = System.currentTimeMillis();
225
226                initPath();
227
228                ensureFileExists();
229
230                try {
231                        loadAllChemComps();
232
233                        long timeE = System.currentTimeMillis();
234                        logger.debug("Time to init chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
235
236
237                } catch (IOException e) {
238                        logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage());
239
240                } finally {
241                        loading.set(false);
242                        isInitialized.set(true);
243                }
244        }
245
246}