001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023import java.io.BufferedReader;
024import java.io.File;
025import java.io.FileOutputStream;
026import java.io.FilenameFilter;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.io.PrintWriter;
031import java.io.StringWriter;
032import java.net.URL;
033import java.net.URLConnection;
034import java.nio.file.Files;
035import java.nio.file.Paths;
036import java.nio.file.StandardCopyOption;
037import java.util.ArrayList;
038import java.util.List;
039import java.util.concurrent.atomic.AtomicBoolean;
040import java.util.zip.GZIPOutputStream;
041
042import org.biojava.nbio.core.util.InputStreamProvider;
043import org.biojava.nbio.structure.align.util.URLConnectionTools;
044import org.biojava.nbio.structure.align.util.UserConfiguration;
045import org.biojava.nbio.structure.io.LocalPDBDirectory;
046import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050
051
052/**
053 * This provider of chemical components can download and cache chemical component definition files from the RCSB PDB web site.
054 *  It is the default way to access these definitions.
055 *  If this provider is called he first time, it will download and install all chemical
056 *  component definitions in a local directory.
057 *  Once the definition files have been installed, it has quick startup time and low memory requirements.
058 *
059 *  An alternative provider, that keeps all definitions in memory is the {@link AllChemCompProvider}. Another provider, that
060 *  does not require any network access, but only can support a limited set of chemical component definitions, is the {@link ReducedChemCompProvider}.
061 *
062 *
063 * @author Andreas Prlic
064 *
065 */
066public class DownloadChemCompProvider implements ChemCompProvider {
067
068        private static final Logger logger = LoggerFactory.getLogger(DownloadChemCompProvider.class);
069
070        public static final String CHEM_COMP_CACHE_DIRECTORY = "chemcomp";
071
072        public static final String DEFAULT_SERVER_URL = "http://files.rcsb.org/ligands/download/";
073
074        public static String serverBaseUrl = DEFAULT_SERVER_URL;
075
076        /**
077         * Use default RCSB server layout (true) or internal RCSB server layout (false)
078         */
079        public static boolean useDefaultUrlLayout = true;
080
081
082        private static File path;
083        //private static final String FILE_SEPARATOR = System.getProperty("file.separator");
084        private static final String NEWLINE = System.getProperty("line.separator");
085
086
087        // flags to make sure there is only one thread running that is loading the dictionary
088        static AtomicBoolean loading = new AtomicBoolean(false);
089
090        static final List<String> protectedIDs = new ArrayList<String> ();
091        static {
092                protectedIDs.add("CON");
093                protectedIDs.add("PRN");
094                protectedIDs.add("AUX");
095                protectedIDs.add("NUL");
096        }
097
098        private static ChemCompProvider fallback = null; // Fallback provider if the download fails
099
100        /** by default we will download only some of the files. User has to request that all files should be downloaded...
101         *
102         */
103        boolean downloadAll = false;
104
105        public DownloadChemCompProvider(){
106                this(null);
107        }
108
109        public DownloadChemCompProvider(String cacheFilePath){
110                logger.debug("Initialising DownloadChemCompProvider");
111
112                // note that path is static, so this is just to make sure that all non-static methods will have path initialised
113                if(cacheFilePath != null) {
114                        path = new File(cacheFilePath);
115                }
116        }
117
118        /**
119         * Get this provider's cache path
120         * @return
121         */
122        public static File getPath(){
123                if (path==null) {
124                        UserConfiguration config = new UserConfiguration();
125                        path = new File(config.getCacheFilePath());
126                }
127                return path;
128        }
129
130        /**
131         * Checks if the chemical components already have been installed into the PDB directory.
132         * If not, will download the chemical components definitions file and split it up into small
133         * subfiles.
134         */
135        public void checkDoFirstInstall(){
136
137                if ( ! downloadAll ) {
138                        return;
139                }
140
141
142                // this makes sure there is a file separator between every component,
143                // if path has a trailing file separator or not, it will work for both cases
144                File dir = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY);
145                File f = new File(dir, "components.cif.gz");
146
147                if ( ! f.exists()) {
148
149                        downloadAllDefinitions();
150
151                } else {
152                        // file exists.. did it get extracted?
153
154                        FilenameFilter filter =new FilenameFilter() {
155
156                                @Override
157                                public boolean accept(File dir, String file) {
158                                        return file.endsWith(".cif.gz");
159                                }
160                        };
161                        String[] files = dir.list(filter);
162                        if ( files.length < 500) {
163                                // not all did get unpacked
164                                try {
165                                        split();
166                                } catch (IOException e) {
167                                        logger.error("Could not split file {} into individual chemical component files. Error: {}",
168                                                        f.toString(), e.getMessage());
169                                }
170                        }
171                }
172        }
173
174        private void split() throws IOException {
175
176                logger.info("Installing individual chem comp files ...");
177
178                File dir = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY);
179                File f = new File(dir, "components.cif.gz");
180
181
182                int counter = 0;
183                InputStreamProvider prov = new InputStreamProvider();
184
185                try( BufferedReader buf = new BufferedReader (new InputStreamReader (prov.getInputStream(f)));
186                                ) {
187                        String line = null;
188                        line = buf.readLine ();
189                        StringWriter writer = new StringWriter();
190
191                        String currentID = null;
192                        while (line != null){
193
194                                if ( line.startsWith("data_")) {
195                                        // a new record found!
196
197                                        if ( currentID != null) {
198                                                writeID(writer.toString(), currentID);
199                                                counter++;
200                                        }
201
202                                        currentID = line.substring(5);
203                                        writer = new StringWriter();
204                                }
205
206                                writer.append(line);
207                                writer.append(NEWLINE);
208
209                                line = buf.readLine ();
210                        }
211
212                        // write the last record...
213                        writeID(writer.toString(),currentID);
214                        counter++;
215
216                }
217
218                logger.info("Created " + counter + " chemical component files.");
219        }
220
221        /**
222         * Output chemical contents to a file
223         * @param contents File contents
224         * @param currentID Chemical ID, used to determine the filename
225         * @throws IOException
226         */
227        private void writeID(String contents, String currentID) throws IOException{
228
229                String localName = getLocalFileName(currentID);
230
231                try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) {
232
233                        pw.print(contents);
234                        pw.flush();
235                }
236        }
237
238        /**
239         * Loads the definitions for this {@link ChemComp} from a local file and instantiates a new object.
240         *
241         * @param recordName the ID of the {@link ChemComp}
242         * @return a new {@link ChemComp} definition.
243         */
244        @Override
245        public  ChemComp getChemComp(String recordName) {
246
247                // make sure we work with upper case records
248                recordName = recordName.toUpperCase().trim();
249
250                boolean haveFile = true;
251                if ( recordName.equals("?")){
252                        return null;
253                }
254
255                if ( ! fileExists(recordName)) {
256                        // check if we should install all components
257                        checkDoFirstInstall();
258                }
259                if ( ! fileExists(recordName)) {
260                        // we previously have installed already the definitions,
261                        // just do an incrememntal update
262                        haveFile = downloadChemCompRecord(recordName);
263                }
264
265                // Added check that download was successful and chemical component is available.
266                if (haveFile) {
267                        String filename = getLocalFileName(recordName);
268                        InputStream inStream = null;
269                        try {
270
271                                InputStreamProvider isp = new InputStreamProvider();
272
273                                inStream = isp.getInputStream(filename);
274
275                                MMcifParser parser = new SimpleMMcifParser();
276
277                                ChemCompConsumer consumer = new ChemCompConsumer();
278
279                                // The Consumer builds up the BioJava - structure object.
280                                // you could also hook in your own and build up you own data model.
281                                parser.addMMcifConsumer(consumer);
282
283                                parser.parse(new BufferedReader(new InputStreamReader(inStream)));
284
285                                ChemicalComponentDictionary dict = consumer.getDictionary();
286
287                                ChemComp chemComp = dict.getChemComp(recordName);
288
289                                // May be null if the file was corrupt. Fall back on ReducedChemCompProvider in that case
290                                if(chemComp != null) {
291                                        return chemComp;
292                                }
293
294                        } catch (IOException e) {
295
296                                logger.warn(
297                                                "Could not download chemical component file {} for {}. Error: {}. Now trying to use the local chemical component definitions.",
298                                                filename, recordName, e.getMessage());
299
300                        }
301                        finally{
302                                // Now close it
303                                if(inStream!=null){
304                                        try {
305                                                inStream.close();
306                                        } catch (IOException e) {
307                                                // This would be weird...
308                                                logger.error("Could not close chemical component file {}. A resource leak could occur!!", filename);
309                                        }
310                                }
311
312                        }
313                }
314
315                // see https://github.com/biojava/biojava/issues/315
316                // probably a network error happened. Try to use the ReducedChemCOmpProvider
317                if( fallback == null) {
318                        fallback = new ReducedChemCompProvider();
319                }
320
321                logger.warn("Falling back to ReducedChemCompProvider for {}. This could indicate a network error.", recordName);
322                return fallback.getChemComp(recordName);
323
324        }
325
326        /**
327         * Returns the file name that contains the definition for this {@link ChemComp}
328         *
329         * @param recordName the ID of the {@link ChemComp}
330         * @return full path to the file
331         */
332        public static String getLocalFileName(String recordName){
333
334                if ( protectedIDs.contains(recordName)){
335                        recordName = "_" + recordName;
336                }
337
338                File f = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY);
339                if (! f.exists()){
340                        logger.info("Creating directory " + f);
341
342                        boolean success = f.mkdir();
343                        // we've checked in initPath that path is writable, so there's no need to check if it succeeds
344                        // in the unlikely case that in the meantime it isn't writable at least we log an error
345                        if (!success)
346                                logger.error("Directory {} could not be created",f);
347
348                }
349
350                File theFile = new File(f,recordName + ".cif.gz");
351
352                return theFile.toString();
353        }
354
355        private static  boolean fileExists(String recordName){
356
357                String fileName = getLocalFileName(recordName);
358
359                File f = new File(fileName);
360
361                // delete files that are too short to have contents
362                if( f.length() < LocalPDBDirectory.MIN_PDB_FILE_SIZE ) {
363                        // Delete defensively.
364                        // Note that if delete is unsuccessful, we re-download the file anyways
365                        f.delete();
366                        return false;
367                }
368
369                return f.exists();
370
371        }
372
373        /**
374         * @param recordName : three-letter name
375         * @return true if successful download
376         */
377        private static boolean downloadChemCompRecord(String recordName) {
378
379                String localName = getLocalFileName(recordName);
380                File newFile;
381                try{
382                        newFile = File.createTempFile("chemcomp"+recordName, "cif");
383                        logger.debug("Will write chem comp file to temp file {}", newFile.toString());
384                }
385                catch(IOException e){
386                        logger.error("Could not write to temp directory {} to create the chemical component download temp file", System.getProperty("java.io.tmpdir"));
387                        return false;
388                }
389                String u;
390                if(useDefaultUrlLayout){
391                        u = serverBaseUrl + recordName + ".cif";
392                }
393                else{
394                        u = serverBaseUrl + recordName.charAt(0) + "/"  + recordName +"/" + recordName + ".cif";
395                }
396
397                logger.debug("downloading " + u);
398
399                URL url = null;
400
401
402                try {
403                        url = new URL(u);
404                        URLConnection uconn = URLConnectionTools.openURLConnection(url);
405
406                        try( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(newFile)));
407                                        BufferedReader fileBuffer = new BufferedReader(new InputStreamReader(uconn.getInputStream()));
408                                        ) {
409
410                                String line;
411
412                                while ((line = fileBuffer.readLine()) != null) {
413                                        pw.println(line);
414                                }
415
416                                pw.flush();
417                        }
418                        // Now we move this across to where it actually wants to be
419                        Files.move(newFile.toPath(), Paths.get(localName), StandardCopyOption.REPLACE_EXISTING);
420
421                        return true;
422                }  catch (IOException e){
423                        logger.error("Could not download "+url.toString()+" OR store locally to "+localName+" Error ="+e.getMessage());
424                        newFile.delete();
425                }
426                return false;
427        }
428
429        private void downloadAllDefinitions() {
430
431                if ( loading.get()){
432                        logger.info("Waiting for other thread to install chemical components...");
433                }
434
435                while ( loading.get() ) {
436
437                        // another thread is already downloading the components definitions
438                        // wait for the other thread to finish...
439
440                        try {
441                                // wait half a second
442
443                                Thread.sleep(500);
444                        } catch (InterruptedException e) {
445                                //e.printStackTrace();
446                                logger.error("Thread interrupted "+e.getMessage());
447                        }
448
449                        logger.info("Another thread installed the chemical components.");
450                        return;
451
452                }
453
454                loading.set(true);
455                long timeS = System.currentTimeMillis();
456
457                logger.info("Performing first installation of chemical components.");
458                logger.info("Downloading components.cif.gz ...");
459
460
461                try {
462                        AllChemCompProvider.downloadFile();
463                } catch (IOException e){
464                        logger.error("Could not download the all chemical components file. Error: {}. "
465                                        + "Chemical components information won't be available", e.getMessage());
466                        // no point in trying to split if the file could not be downloaded
467                        loading.set(false);
468                        return;
469                }
470                try {
471                        split();
472                } catch (IOException e) {
473                        logger.error("Could not split all chem comp file into individual chemical component files. Error: {}",
474                                 e.getMessage());
475                        // no point in reporting time
476                        loading.set(false);
477                        return;
478                }
479                long timeE = System.currentTimeMillis();
480                logger.info("time to install chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
481                loading.set(false);
482
483        }
484
485        /** By default this provider will download only some of the {@link ChemComp} files.
486         * The user has to request that all files should be downloaded by setting this parameter to true.
487         *
488         *  @return flag if the all components should be downloaded and installed at startup. (default: false)
489         */
490        public boolean isDownloadAll() {
491                return downloadAll;
492        }
493
494        /** By default this provider will download only some of the {@link ChemComp} files.
495         * The user has to request that all files should be downloaded by setting this parameter to true.
496         *
497         * @param  flag if the all components should be downloaded and installed at startup. (default: false)
498         */
499        public void setDownloadAll(boolean downloadAll) {
500                this.downloadAll = downloadAll;
501        }
502
503
504
505
506
507}