001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023import java.io.BufferedReader;
024import java.io.File;
025import java.io.FileOutputStream;
026import java.io.FilenameFilter;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.io.PrintWriter;
031import java.io.StringWriter;
032import java.net.HttpURLConnection;
033import java.net.URL;
034import java.nio.file.Files;
035import java.nio.file.Paths;
036import java.nio.file.StandardCopyOption;
037import java.util.ArrayList;
038import java.util.List;
039import java.util.concurrent.atomic.AtomicBoolean;
040import java.util.zip.GZIPOutputStream;
041
042import org.biojava.nbio.core.util.InputStreamProvider;
043import org.biojava.nbio.structure.align.util.HTTPConnectionTools;
044import org.biojava.nbio.structure.align.util.UserConfiguration;
045import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049
050
051/** This provider of chemical components can download and cache chemical component definition files from the RCSB PDB web site.
052 *  It is the default way to access these definitions.
053 *  If this provider is called he first time, it will download and install all chemical
054 *  component definitions in a local directory.
055 *  Once the definition files have been installed, it has quick startup time and low memory requirements.
056 *
057 *  An alternative provider, that keeps all definitions in memory is the {@link AllChemCompProvider}. Another provider, that
058 *  does not require any network access, but only can support a limited set of chemical component definitions, is the {@link ReducedChemCompProvider}.
059 *
060 *
061 * @author Andreas Prlic
062 *
063 */
064public class DownloadChemCompProvider implements ChemCompProvider {
065
066        private static final Logger logger = LoggerFactory.getLogger(DownloadChemCompProvider.class);
067
068        public static final String CHEM_COMP_CACHE_DIRECTORY = "chemcomp";
069
070        public static final String SERVER_LOCATION = "http://files.rcsb.org/ligands/download/";
071
072
073        private static File path;
074        //private static final String FILE_SEPARATOR = System.getProperty("file.separator");
075        private static final String NEWLINE = System.getProperty("line.separator");
076
077
078        // flags to make sure there is only one thread running that is loading the dictionary
079        static AtomicBoolean loading = new AtomicBoolean(false);
080
081        static final List<String> protectedIDs = new ArrayList<String> ();
082        static {
083                protectedIDs.add("CON");
084                protectedIDs.add("PRN");
085                protectedIDs.add("AUX");
086                protectedIDs.add("NUL");
087        }
088
089        /** by default we will download only some of the files. User has to request that all files should be downloaded...
090         *
091         */
092        boolean downloadAll = false;
093
094        public DownloadChemCompProvider(){
095                logger.debug("Initialising DownloadChemCompProvider");
096
097                // note that path is static, so this is just to make sure that all non-static methods will have path initialised
098                initPath();
099        }
100
101        public DownloadChemCompProvider(String cacheFilePath){
102                logger.debug("Initialising DownloadChemCompProvider");
103
104                // note that path is static, so this is just to make sure that all non-static methods will have path initialised
105                path = new File(cacheFilePath);
106        }
107
108        private static void initPath(){
109
110                if (path==null) {
111                        UserConfiguration config = new UserConfiguration();
112                        path = new File(config.getCacheFilePath());
113                }
114        }
115
116        /**
117         * Checks if the chemical components already have been installed into the PDB directory.
118         * If not, will download the chemical components definitions file and split it up into small
119         * subfiles.
120         */
121        public void checkDoFirstInstall(){
122
123                if ( ! downloadAll ) {
124                        return;
125                }
126
127
128                // this makes sure there is a file separator between every component,
129                // if path has a trailing file separator or not, it will work for both cases
130                File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY);
131                File f = new File(dir, "components.cif.gz");
132
133                if ( ! f.exists()) {
134
135                        downloadAllDefinitions();
136
137                } else {
138                        // file exists.. did it get extracted?
139
140                        FilenameFilter filter =new FilenameFilter() {
141
142                                @Override
143                                public boolean accept(File dir, String file) {
144                                        return file.endsWith(".cif.gz");
145                                }
146                        };
147                        String[] files = dir.list(filter);
148                        if ( files.length < 500) {
149                                // not all did get unpacked
150                                try {
151                                        split();
152                                } catch (IOException e) {
153                                        logger.error("Could not split file {} into individual chemical component files. Error: {}",
154                                                        f.toString(), e.getMessage());
155                                }
156                        }
157                }
158        }
159
160        private void split() throws IOException {
161
162                logger.info("Installing individual chem comp files ...");
163
164                File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY);
165                File f = new File(dir, "components.cif.gz");
166
167
168                int counter = 0;
169                InputStreamProvider prov = new InputStreamProvider();
170
171                try( BufferedReader buf = new BufferedReader (new InputStreamReader (prov.getInputStream(f)));
172                                ) {
173                        String line = null;
174                        line = buf.readLine ();
175                        StringWriter writer = new StringWriter();
176
177                        String currentID = null;
178                        while (line != null){
179
180                                if ( line.startsWith("data_")) {
181                                        // a new record found!
182
183                                        if ( currentID != null) {
184                                                writeID(writer.toString(), currentID);
185                                                counter++;
186                                        }
187
188                                        currentID = line.substring(5);
189                                        writer = new StringWriter();
190                                }
191
192                                writer.append(line);
193                                writer.append(NEWLINE);
194
195                                line = buf.readLine ();
196                        }
197
198                        // write the last record...
199                        writeID(writer.toString(),currentID);
200                        counter++;
201
202                }
203
204                logger.info("Created " + counter + " chemical component files.");
205        }
206
207        /**
208         * Output chemical contents to a file
209         * @param contents File contents
210         * @param currentID Chemical ID, used to determine the filename
211         * @throws IOException
212         */
213        private void writeID(String contents, String currentID) throws IOException{
214
215                String localName = DownloadChemCompProvider.getLocalFileName(currentID);
216
217                try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) {
218
219                        pw.print(contents.toString());
220                        pw.flush();
221                }
222        }
223
224        /**
225         * Loads the definitions for this {@link ChemComp} from a local file and instantiates a new object.
226         *
227         * @param recordName the ID of the {@link ChemComp}
228         * @return a new {@link ChemComp} definition.
229         */
230        @Override
231        public  ChemComp getChemComp(String recordName) {
232
233                // make sure we work with upper case records
234                recordName = recordName.toUpperCase().trim();
235
236                boolean haveFile = true;
237                if ( recordName.equals("?")){
238                        return null;
239                }
240
241                if ( ! fileExists(recordName)) {
242                        // check if we should install all components
243                        checkDoFirstInstall();
244                }
245                if ( ! fileExists(recordName)) {
246                        // we previously have installed already the definitions,
247                        // just do an incrememntal update
248                        haveFile = downloadChemCompRecord(recordName);
249                }
250
251                // Added check that download was successful and chemical component is available.
252                if (haveFile) {
253                        String filename = getLocalFileName(recordName);
254                        InputStream inStream = null;
255                        try {
256
257                                InputStreamProvider isp = new InputStreamProvider();
258
259                                inStream = isp.getInputStream(filename);
260
261                                MMcifParser parser = new SimpleMMcifParser();
262
263                                ChemCompConsumer consumer = new ChemCompConsumer();
264
265                                // The Consumer builds up the BioJava - structure object.
266                                // you could also hook in your own and build up you own data model.
267                                parser.addMMcifConsumer(consumer);
268
269                                parser.parse(new BufferedReader(new InputStreamReader(inStream)));
270
271                                ChemicalComponentDictionary dict = consumer.getDictionary();
272
273                                ChemComp chemComp = dict.getChemComp(recordName);
274
275                                return chemComp;
276
277                        } catch (IOException e) {
278
279                                logger.error("Could not parse chemical component file {}. Error: {}. "
280                                                + "There will be no chemical component info available for {}", filename, e.getMessage(), recordName);
281
282                        }
283                        finally{
284                                // Now close it
285                                if(inStream!=null){
286                                        try {
287                                                inStream.close();
288                                        } catch (IOException e) {
289                                                // This would be weird...
290                                                logger.error("Could not close chemical component file {}. A resource leak could occur!!", filename);
291                                        }
292                                }
293
294                        }
295                }
296
297                // see https://github.com/biojava/biojava/issues/315
298                // probably a network error happened. Try to use the ReducedChemCOmpProvider
299                ReducedChemCompProvider reduced = new ReducedChemCompProvider();
300
301                return reduced.getChemComp(recordName);
302
303        }
304
305        /** Returns the file name that contains the definition for this {@link ChemComp}
306         *
307         * @param recordName the ID of the {@link ChemComp}
308         * @return full path to the file
309         */
310        public static String getLocalFileName(String recordName){
311
312                if ( protectedIDs.contains(recordName)){
313                        recordName = "_" + recordName;
314                }
315
316                initPath();
317
318                File f = new File(path, CHEM_COMP_CACHE_DIRECTORY);
319                if (! f.exists()){
320                        logger.info("Creating directory " + f);
321
322                        boolean success = f.mkdir();
323                        // we've checked in initPath that path is writable, so there's no need to check if it succeeds
324                        // in the unlikely case that in the meantime it isn't writable at least we log an error
325                        if (!success) logger.error("Directory {} could not be created",f);
326
327                }
328
329                File theFile = new File(f,recordName + ".cif.gz");
330
331                return theFile.toString();
332        }
333
334        private static  boolean fileExists(String recordName){
335
336                String fileName = getLocalFileName(recordName);
337
338                File f = new File(fileName);
339
340                return f.exists();
341
342        }
343
344        /**
345         * @param recordName : three-letter name
346         * @return true if successful download
347         */
348        private static boolean downloadChemCompRecord(String recordName) {
349
350                String localName = getLocalFileName(recordName);
351                File newFile;
352                try{
353                        newFile = File.createTempFile("chemcomp"+recordName, "cif");
354                }
355                catch(IOException e){
356                        logger.error("Could not write to temp directory {} to create the chemical component download temp file", System.getProperty("java.io.tmpdir"));
357                        return false;
358                }
359                String u = SERVER_LOCATION + recordName + ".cif";
360
361                logger.debug("downloading " + u);
362
363                URL url = null;
364
365
366                try {
367                        url = new URL(u);
368
369                        HttpURLConnection uconn = HTTPConnectionTools.openHttpURLConnection(url);
370
371                        try( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(newFile)));
372                                        BufferedReader fileBuffer = new BufferedReader(new InputStreamReader(uconn.getInputStream()));
373                                        ) {
374
375                                String line;
376
377                                while ((line = fileBuffer.readLine()) != null) {
378                                        pw.println(line);
379                                }
380
381                                pw.flush();
382                        }
383                        // Now we move this across to where it actually wants to be
384                        Files.move(newFile.toPath(), Paths.get(localName), StandardCopyOption.REPLACE_EXISTING);
385
386                        return true;
387                }  catch (IOException e){
388                        logger.error("Could not download "+url.toString()+" OR store locally to "+localName+" Error ="+e.getMessage());
389                        newFile.delete();
390                }
391                return false;
392        }
393
394        private void downloadAllDefinitions() {
395
396                if ( loading.get()){
397                        logger.info("Waiting for other thread to install chemical components...");
398                }
399
400                while ( loading.get() ) {
401
402                        // another thread is already downloading the components definitions
403                        // wait for the other thread to finish...
404
405                        try {
406                                // wait half a second
407
408                                Thread.sleep(500);
409                        } catch (InterruptedException e) {
410                                //e.printStackTrace();
411                                logger.error("Thread interrupted "+e.getMessage());
412                        }
413
414                        logger.info("Another thread installed the chemical components.");
415                        return;
416
417                }
418
419                loading.set(true);
420                long timeS = System.currentTimeMillis();
421
422                logger.info("Performing first installation of chemical components.");
423                logger.info("Downloading components.cif.gz ...");
424
425
426                try {
427                        AllChemCompProvider.downloadFile();
428                } catch (IOException e){
429                        logger.error("Could not download the all chemical components file. Error: {}. "
430                                        + "Chemical components information won't be available", e.getMessage());
431                        // no point in trying to split if the file could not be downloaded
432                        loading.set(false);
433                        return;
434                }
435                try {
436                        split();
437                } catch (IOException e) {
438                        logger.error("Could not split all chem comp file into individual chemical component files. Error: {}",
439                                 e.getMessage());
440                        // no point in reporting time
441                        loading.set(false);
442                        return;
443                }
444                long timeE = System.currentTimeMillis();
445                logger.info("time to install chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
446                loading.set(false);
447
448        }
449
450        /** By default this provider will download only some of the {@link ChemComp} files.
451         * The user has to request that all files should be downloaded by setting this parameter to true.
452         *
453         *  @return flag if the all components should be downloaded and installed at startup. (default: false)
454         */
455        public boolean isDownloadAll() {
456                return downloadAll;
457        }
458
459        /** By default this provider will download only some of the {@link ChemComp} files.
460         * The user has to request that all files should be downloaded by setting this parameter to true.
461         *
462         * @param  flag if the all components should be downloaded and installed at startup. (default: false)
463         */
464        public void setDownloadAll(boolean downloadAll) {
465                this.downloadAll = downloadAll;
466        }
467
468
469
470
471
472}