001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023import java.io.BufferedReader;
024import java.io.File;
025import java.io.FileOutputStream;
026import java.io.FilenameFilter;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.io.PrintWriter;
031import java.io.StringWriter;
032import java.net.URL;
033import java.net.URLConnection;
034import java.nio.file.Files;
035import java.nio.file.Paths;
036import java.nio.file.StandardCopyOption;
037import java.util.ArrayList;
038import java.util.List;
039import java.util.concurrent.atomic.AtomicBoolean;
040import java.util.zip.GZIPOutputStream;
041
042import org.biojava.nbio.core.util.InputStreamProvider;
043import org.biojava.nbio.structure.align.util.URLConnectionTools;
044import org.biojava.nbio.structure.align.util.UserConfiguration;
045import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049
050
051/** 
052 * This provider of chemical components can download and cache chemical component definition files from the RCSB PDB web site.
053 * It is the default way to access these definitions.
054 * If this provider is called he first time, it will download and install all chemical
055 * component definitions in a local directory.
056 * Once the definition files have been installed, it has quick startup time and low memory requirements.
057 *
058 * An alternative provider, that keeps all definitions in memory is the {@link AllChemCompProvider}. Another provider, that
059 * does not require any network access, but only can support a limited set of chemical component definitions, is the {@link ReducedChemCompProvider}.
060 *
061 *
062 * @author Andreas Prlic
063 *
064 */
065public class DownloadChemCompProvider implements ChemCompProvider {
066
067        private static final Logger logger = LoggerFactory.getLogger(DownloadChemCompProvider.class);
068
069        public static final String CHEM_COMP_CACHE_DIRECTORY = "chemcomp";
070
071        public static final String DEFAULT_SERVER_URL = "http://files.rcsb.org/ligands/download/";
072        
073        public static String serverBaseUrl = DEFAULT_SERVER_URL;
074        
075        /**
076         * Use default RCSB server layout (true) or internal RCSB server layout (false)
077         */
078        public static boolean useDefaultUrlLayout = true;
079
080
081        private static File path;
082        //private static final String FILE_SEPARATOR = System.getProperty("file.separator");
083        private static final String NEWLINE = System.getProperty("line.separator");
084
085
086        // flags to make sure there is only one thread running that is loading the dictionary
087        static AtomicBoolean loading = new AtomicBoolean(false);
088
089        static final List<String> protectedIDs = new ArrayList<String> ();
090        static {
091                protectedIDs.add("CON");
092                protectedIDs.add("PRN");
093                protectedIDs.add("AUX");
094                protectedIDs.add("NUL");
095        }
096
097        /** by default we will download only some of the files. User has to request that all files should be downloaded...
098         *
099         */
100        boolean downloadAll = false;
101
102        public DownloadChemCompProvider(){
103                logger.debug("Initialising DownloadChemCompProvider");
104
105                // note that path is static, so this is just to make sure that all non-static methods will have path initialised
106                initPath();
107        }
108
109        public DownloadChemCompProvider(String cacheFilePath){
110                logger.debug("Initialising DownloadChemCompProvider");
111
112                // note that path is static, so this is just to make sure that all non-static methods will have path initialised
113                path = new File(cacheFilePath);
114        }
115
116        private static void initPath(){
117
118                if (path==null) {
119                        UserConfiguration config = new UserConfiguration();
120                        path = new File(config.getCacheFilePath());
121                }
122        }
123
124        /**
125         * Checks if the chemical components already have been installed into the PDB directory.
126         * If not, will download the chemical components definitions file and split it up into small
127         * subfiles.
128         */
129        public void checkDoFirstInstall(){
130
131                if ( ! downloadAll ) {
132                        return;
133                }
134
135
136                // this makes sure there is a file separator between every component,
137                // if path has a trailing file separator or not, it will work for both cases
138                File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY);
139                File f = new File(dir, "components.cif.gz");
140
141                if ( ! f.exists()) {
142
143                        downloadAllDefinitions();
144
145                } else {
146                        // file exists.. did it get extracted?
147
148                        FilenameFilter filter =new FilenameFilter() {
149
150                                @Override
151                                public boolean accept(File dir, String file) {
152                                        return file.endsWith(".cif.gz");
153                                }
154                        };
155                        String[] files = dir.list(filter);
156                        if ( files.length < 500) {
157                                // not all did get unpacked
158                                try {
159                                        split();
160                                } catch (IOException e) {
161                                        logger.error("Could not split file {} into individual chemical component files. Error: {}",
162                                                        f.toString(), e.getMessage());
163                                }
164                        }
165                }
166        }
167
168        private void split() throws IOException {
169
170                logger.info("Installing individual chem comp files ...");
171
172                File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY);
173                File f = new File(dir, "components.cif.gz");
174
175
176                int counter = 0;
177                InputStreamProvider prov = new InputStreamProvider();
178
179                try( BufferedReader buf = new BufferedReader (new InputStreamReader (prov.getInputStream(f)));
180                                ) {
181                        String line = null;
182                        line = buf.readLine ();
183                        StringWriter writer = new StringWriter();
184
185                        String currentID = null;
186                        while (line != null){
187
188                                if ( line.startsWith("data_")) {
189                                        // a new record found!
190
191                                        if ( currentID != null) {
192                                                writeID(writer.toString(), currentID);
193                                                counter++;
194                                        }
195
196                                        currentID = line.substring(5);
197                                        writer = new StringWriter();
198                                }
199
200                                writer.append(line);
201                                writer.append(NEWLINE);
202
203                                line = buf.readLine ();
204                        }
205
206                        // write the last record...
207                        writeID(writer.toString(),currentID);
208                        counter++;
209
210                }
211
212                logger.info("Created " + counter + " chemical component files.");
213        }
214
215        /**
216         * Output chemical contents to a file
217         * @param contents File contents
218         * @param currentID Chemical ID, used to determine the filename
219         * @throws IOException
220         */
221        private void writeID(String contents, String currentID) throws IOException{
222
223                String localName = DownloadChemCompProvider.getLocalFileName(currentID);
224
225                try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) {
226
227                        pw.print(contents);
228                        pw.flush();
229                }
230        }
231
232        /**
233         * Loads the definitions for this {@link ChemComp} from a local file and instantiates a new object.
234         *
235         * @param recordName the ID of the {@link ChemComp}
236         * @return a new {@link ChemComp} definition.
237         */
238        @Override
239        public  ChemComp getChemComp(String recordName) {
240
241                // make sure we work with upper case records
242                recordName = recordName.toUpperCase().trim();
243
244                boolean haveFile = true;
245                if ( recordName.equals("?")){
246                        return null;
247                }
248
249                if ( ! fileExists(recordName)) {
250                        // check if we should install all components
251                        checkDoFirstInstall();
252                }
253                if ( ! fileExists(recordName)) {
254                        // we previously have installed already the definitions,
255                        // just do an incrememntal update
256                        haveFile = downloadChemCompRecord(recordName);
257                }
258
259                // Added check that download was successful and chemical component is available.
260                if (haveFile) {
261                        String filename = getLocalFileName(recordName);
262                        InputStream inStream = null;
263                        try {
264
265                                InputStreamProvider isp = new InputStreamProvider();
266
267                                inStream = isp.getInputStream(filename);
268
269                                MMcifParser parser = new SimpleMMcifParser();
270
271                                ChemCompConsumer consumer = new ChemCompConsumer();
272
273                                // The Consumer builds up the BioJava - structure object.
274                                // you could also hook in your own and build up you own data model.
275                                parser.addMMcifConsumer(consumer);
276
277                                parser.parse(new BufferedReader(new InputStreamReader(inStream)));
278
279                                ChemicalComponentDictionary dict = consumer.getDictionary();
280
281                                ChemComp chemComp = dict.getChemComp(recordName);
282
283                                return chemComp;
284
285                        } catch (IOException e) {
286
287                                logger.error("Could not parse chemical component file {}. Error: {}. "
288                                                + "There will be no chemical component info available for {}", filename, e.getMessage(), recordName);
289
290                        }
291                        finally{
292                                // Now close it
293                                if(inStream!=null){
294                                        try {
295                                                inStream.close();
296                                        } catch (IOException e) {
297                                                // This would be weird...
298                                                logger.error("Could not close chemical component file {}. A resource leak could occur!!", filename);
299                                        }
300                                }
301
302                        }
303                }
304
305                // see https://github.com/biojava/biojava/issues/315
306                // probably a network error happened. Try to use the ReducedChemCOmpProvider
307                ReducedChemCompProvider reduced = new ReducedChemCompProvider();
308
309                return reduced.getChemComp(recordName);
310
311        }
312
313        /** 
314         * Returns the file name that contains the definition for this {@link ChemComp}
315         *
316         * @param recordName the ID of the {@link ChemComp}
317         * @return full path to the file
318         */
319        public static String getLocalFileName(String recordName){
320
321                if ( protectedIDs.contains(recordName)){
322                        recordName = "_" + recordName;
323                }
324
325                initPath();
326
327                File f = new File(path, CHEM_COMP_CACHE_DIRECTORY);
328                if (! f.exists()){
329                        logger.info("Creating directory " + f);
330
331                        boolean success = f.mkdir();
332                        // we've checked in initPath that path is writable, so there's no need to check if it succeeds
333                        // in the unlikely case that in the meantime it isn't writable at least we log an error
334                        if (!success) logger.error("Directory {} could not be created",f);
335
336                }
337
338                File theFile = new File(f,recordName + ".cif.gz");
339
340                return theFile.toString();
341        }
342
343        private static  boolean fileExists(String recordName){
344
345                String fileName = getLocalFileName(recordName);
346
347                File f = new File(fileName);
348
349                return f.exists();
350
351        }
352
353        /**
354         * @param recordName : three-letter name
355         * @return true if successful download
356         */
357        private static boolean downloadChemCompRecord(String recordName) {
358
359                String localName = getLocalFileName(recordName);
360                File newFile;
361                try{
362                        newFile = File.createTempFile("chemcomp"+recordName, "cif");
363                        logger.debug("Will write chem comp file to temp file {}", newFile.toString());
364                }
365                catch(IOException e){
366                        logger.error("Could not write to temp directory {} to create the chemical component download temp file", System.getProperty("java.io.tmpdir"));
367                        return false;
368                }
369                String u;
370                if(useDefaultUrlLayout){
371                        u = serverBaseUrl + recordName + ".cif";
372                }
373                else{
374                        u = serverBaseUrl + recordName.charAt(0) + "/"  + recordName +"/" + recordName + ".cif";
375                }
376
377                logger.debug("downloading " + u);
378
379                URL url = null;
380
381
382                try {
383                        url = new URL(u);
384                        URLConnection uconn = URLConnectionTools.openURLConnection(url);
385
386                        try( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(newFile)));
387                                        BufferedReader fileBuffer = new BufferedReader(new InputStreamReader(uconn.getInputStream()));
388                                        ) {
389
390                                String line;
391
392                                while ((line = fileBuffer.readLine()) != null) {
393                                        pw.println(line);
394                                }
395
396                                pw.flush();
397                        }
398                        // Now we move this across to where it actually wants to be
399                        Files.move(newFile.toPath(), Paths.get(localName), StandardCopyOption.REPLACE_EXISTING);
400
401                        return true;
402                }  catch (IOException e){
403                        logger.error("Could not download "+url.toString()+" OR store locally to "+localName+" Error ="+e.getMessage());
404                        newFile.delete();
405                }
406                return false;
407        }
408
409        private void downloadAllDefinitions() {
410
411                if ( loading.get()){
412                        logger.info("Waiting for other thread to install chemical components...");
413                }
414
415                while ( loading.get() ) {
416
417                        // another thread is already downloading the components definitions
418                        // wait for the other thread to finish...
419
420                        try {
421                                // wait half a second
422
423                                Thread.sleep(500);
424                        } catch (InterruptedException e) {
425                                //e.printStackTrace();
426                                logger.error("Thread interrupted "+e.getMessage());
427                        }
428
429                        logger.info("Another thread installed the chemical components.");
430                        return;
431
432                }
433
434                loading.set(true);
435                long timeS = System.currentTimeMillis();
436
437                logger.info("Performing first installation of chemical components.");
438                logger.info("Downloading components.cif.gz ...");
439
440
441                try {
442                        AllChemCompProvider.downloadFile();
443                } catch (IOException e){
444                        logger.error("Could not download the all chemical components file. Error: {}. "
445                                        + "Chemical components information won't be available", e.getMessage());
446                        // no point in trying to split if the file could not be downloaded
447                        loading.set(false);
448                        return;
449                }
450                try {
451                        split();
452                } catch (IOException e) {
453                        logger.error("Could not split all chem comp file into individual chemical component files. Error: {}",
454                                        e.getMessage());
455                        // no point in reporting time
456                        loading.set(false);
457                        return;
458                }
459                long timeE = System.currentTimeMillis();
460                logger.info("time to install chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
461                loading.set(false);
462
463        }
464
465        /** By default this provider will download only some of the {@link ChemComp} files.
466         * The user has to request that all files should be downloaded by setting this parameter to true.
467         *
468         *  @return flag if the all components should be downloaded and installed at startup. (default: false)
469         */
470        public boolean isDownloadAll() {
471                return downloadAll;
472        }
473
474        /** By default this provider will download only some of the {@link ChemComp} files.
475         * The user has to request that all files should be downloaded by setting this parameter to true.
476         *
477         * @param  flag if the all components should be downloaded and installed at startup. (default: false)
478         */
479        public void setDownloadAll(boolean downloadAll) {
480                this.downloadAll = downloadAll;
481        }
482
483
484
485
486
487}