001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmcif; 022 023import org.biojava.nbio.structure.align.util.UserConfiguration; 024import org.biojava.nbio.structure.io.LocalPDBDirectory; 025import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 026import org.biojava.nbio.core.util.InputStreamProvider; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029 030import java.io.*; 031import java.net.URL; 032import java.util.concurrent.atomic.AtomicBoolean; 033 034/** 035 * A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads 036 * all chemical components at startup and keeps them in memory. This provider is not used as a default 037 * since it is slower at startup and requires more memory than the {@link DownloadChemCompProvider} that is used by default. 038 * 039 * @author Andreas Prlic 040 * 041 */ 042public class AllChemCompProvider implements ChemCompProvider, Runnable{ 043 044 private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class); 045 046 public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz"; 047 048 049 private static String path; 050 051 private static String serverName; 052 053 054 // there will be only one copy of the dictionary across all instances 055 // to reduce memory impact 056 static ChemicalComponentDictionary dict; 057 058 // flags to make sure there is only one thread running that is loading the dictionary 059 static AtomicBoolean loading = new AtomicBoolean(false); 060 static AtomicBoolean isInitialized = new AtomicBoolean(false); 061 062 public AllChemCompProvider(){ 063 064 if ( loading.get()) { 065 logger.warn("other thread is already loading all chemcomps, no need to init twice"); 066 return; 067 } 068 if ( isInitialized.get()) 069 return; 070 071 loading.set(true); 072 073 Thread t = new Thread(this); 074 t.start(); 075 076 } 077 078 079 /** make sure all paths are initialized correctly 080 * 081 */ 082 private static void initPath(){ 083 084 if (path==null) { 085 UserConfiguration config = new UserConfiguration(); 086 path = config.getCacheFilePath(); 087 } 088 } 089 090 private static void initServerName() { 091 092 if (serverName==null) { 093 serverName = LocalPDBDirectory.getServerName(); 094 } 095 } 096 097 private void ensureFileExists() { 098 099 100 String fileName = getLocalFileName(); 101 File f = new File(fileName); 102 103 if ( ! f.exists()) { 104 try { 105 downloadFile(); 106 } catch (IOException e) { 107 logger.error("Caught IOException",e); 108 } 109 } 110 111 112 113 } 114 115 /** Downloads the components.cif.gz file from the wwPDB site. 116 * 117 */ 118 public static void downloadFile() throws IOException { 119 120 initPath(); 121 122 initServerName(); 123 124 String localName = getLocalFileName(); 125 126 String u = serverName + "/" + COMPONENTS_FILE_LOCATION; 127 128 downloadFileFromRemote(new URL(u), new File(localName)); 129 130 131 } 132 133 134 private static void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{ 135 logger.info("Downloading " + remoteURL + " to: " + localFile); 136 FileOutputStream out = new FileOutputStream(localFile); 137 138 InputStream in = remoteURL.openStream(); 139 byte[] buf = new byte[4 * 1024]; // 4K buffer 140 int bytesRead; 141 while ((bytesRead = in.read(buf)) != -1) { 142 out.write(buf, 0, bytesRead); 143 } 144 in.close(); 145 out.close(); 146 147 148 } 149 150 151 private static String getLocalFileName(){ 152 153 File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY); 154 155 if (! dir.exists()){ 156 logger.info("Creating directory {}", dir.toString()); 157 dir.mkdir(); 158 } 159 160 String fileName = new File(dir, "components.cif.gz").toString(); 161 162 return fileName; 163 } 164 165 /** Load all {@link ChemComp} definitions into memory. 166 * 167 */ 168 private void loadAllChemComps() throws IOException { 169 String fileName = getLocalFileName(); 170 logger.debug("Loading " + fileName); 171 InputStreamProvider isp = new InputStreamProvider(); 172 173 174 InputStream inStream = isp.getInputStream(fileName); 175 176 MMcifParser parser = new SimpleMMcifParser(); 177 178 ChemCompConsumer consumer = new ChemCompConsumer(); 179 180 // The Consumer builds up the BioJava - structure object. 181 // you could also hook in your own and build up you own data model. 182 parser.addMMcifConsumer(consumer); 183 184 parser.parse(new BufferedReader(new InputStreamReader(inStream))); 185 186 dict = consumer.getDictionary(); 187 188 inStream.close(); 189 190 } 191 192 193 /** {@inheritDoc} 194 * 195 */ 196 @Override 197 public ChemComp getChemComp(String recordName) { 198 199 while ( loading.get()) { 200 201 // another thread is still initializing the definitions 202 try { 203 // wait half a second 204 205 Thread.sleep(500); 206 } catch (InterruptedException e) { 207 logger.error("Interrepted thread while waiting: "+e.getMessage()); 208 //e.printStackTrace(); 209 } 210 } 211 212 213 214 return dict.getChemComp(recordName); 215 } 216 217 218 /** Do the actual loading of the dictionary in a thread. 219 * 220 */ 221 @Override 222 public void run() { 223 long timeS = System.currentTimeMillis(); 224 225 initPath(); 226 227 ensureFileExists(); 228 229 try { 230 loadAllChemComps(); 231 232 long timeE = System.currentTimeMillis(); 233 logger.debug("Time to init chem comp dictionary: " + (timeE - timeS) / 1000 + " sec."); 234 235 236 } catch (IOException e) { 237 logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage()); 238 239 } finally { 240 loading.set(false); 241 isInitialized.set(true); 242 } 243 } 244 245}