001package org.biojava.nbio.structure.chem;
002
003import org.biojava.nbio.structure.align.util.UserConfiguration;
004import org.biojava.nbio.structure.io.LocalPDBDirectory;
005import org.biojava.nbio.structure.io.cif.ChemCompConverter;
006import org.slf4j.Logger;
007import org.slf4j.LoggerFactory;
008
009import java.io.File;
010import java.io.FileOutputStream;
011import java.io.IOException;
012import java.io.InputStream;
013import java.net.URL;
014import java.nio.file.Paths;
015import java.util.concurrent.atomic.AtomicBoolean;
016
017/**
018 * A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads all chemical
019 * components at startup and keeps them in memory. This provider is not used as a default since it is slower at startup
020 * and requires more memory than the {@link DownloadChemCompProvider} that is used by default.
021 *
022 * @author Andreas Prlic
023 */
024public class AllChemCompProvider implements ChemCompProvider, Runnable {
025    private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class);
026    public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz";
027    private static String path;
028    private static String serverName;
029
030    // there will be only one copy of the dictionary across all instances
031    // to reduce memory impact
032    static ChemicalComponentDictionary dict;
033
034    // flags to make sure there is only one thread running that is loading the dictionary
035    static AtomicBoolean loading = new AtomicBoolean(false);
036    static AtomicBoolean isInitialized = new AtomicBoolean(false);
037
038    public AllChemCompProvider() {
039        if (loading.get()) {
040            logger.warn("other thread is already loading all chemcomps, no need to init twice");
041            return;
042        }
043        if (isInitialized.get()) {
044            return;
045        }
046
047        loading.set(true);
048
049        Thread t = new Thread(this);
050        t.start();
051    }
052
053    /**
054     * make sure all paths are initialized correctly
055     */
056    private static void initPath() {
057        if (path == null) {
058            UserConfiguration config = new UserConfiguration();
059            path = config.getCacheFilePath();
060        }
061    }
062
063    private static void initServerName() {
064        if (serverName == null) {
065            serverName = LocalPDBDirectory.getServerName();
066        }
067    }
068
069    private void ensureFileExists() {
070        String fileName = getLocalFileName();
071        File f = new File(fileName);
072
073        if (!f.exists()) {
074            try {
075                downloadFile();
076            } catch (IOException e) {
077                logger.error("Caught IOException", e);
078            }
079        }
080    }
081
082    /**
083     * Downloads the components.cif.gz file from the wwPDB site.
084     */
085    public static void downloadFile() throws IOException {
086        initPath();
087        initServerName();
088        String localName = getLocalFileName();
089        String u = serverName + "/" + COMPONENTS_FILE_LOCATION;
090        downloadFileFromRemote(new URL(u), new File(localName));
091    }
092
093    private static  void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException {
094        logger.info("Downloading {} to: {}", remoteURL, localFile);
095        FileOutputStream out = new FileOutputStream(localFile);
096
097        InputStream in = remoteURL.openStream();
098        byte[] buf = new byte[4 * 1024]; // 4K buffer
099        int bytesRead;
100        while ((bytesRead = in.read(buf)) != -1) {
101            out.write(buf, 0, bytesRead);
102        }
103        in.close();
104        out.close();
105    }
106
107    private static String getLocalFileName(){
108        File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY);
109
110        if (!dir.exists()) {
111            logger.info("Creating directory {}", dir.toString());
112            dir.mkdir();
113        }
114
115        return new File(dir, "components.cif.gz").toString();
116    }
117
118    /**
119     * Load all {@link ChemComp} definitions into memory.
120     */
121    private void loadAllChemComps() throws IOException {
122        String fileName = getLocalFileName();
123        logger.debug("Loading {}", fileName);
124        dict = ChemCompConverter.fromPath(Paths.get(fileName));
125    }
126
127    /**
128     *  {@inheritDoc}
129     */
130    @Override
131    public ChemComp getChemComp(String recordName) {
132        while (loading.get()) {
133            // another thread is still initializing the definitions
134            try {
135                // wait half a second
136                Thread.sleep(500);
137            } catch (InterruptedException e) {
138                logger.error("Interrepted thread while waiting: {}", e.getMessage());
139                //e.printStackTrace();
140            }
141        }
142
143        return dict.getChemComp(recordName);
144    }
145
146    /**
147     * Do the actual loading of the dictionary in a thread.
148     */
149    @Override
150    public void run() {
151        long timeS = System.currentTimeMillis();
152        initPath();
153        ensureFileExists();
154
155        try {
156            loadAllChemComps();
157            long timeE = System.currentTimeMillis();
158            logger.debug("Time to init chem comp dictionary: {} sec.", (timeE - timeS) / 1000);
159        } catch (IOException e) {
160            logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage());
161        } finally {
162            loading.set(false);
163            isInitialized.set(true);
164        }
165    }
166}
167