001/* 002 003 * BioJava development code 004 * 005 * This code may be freely distributed and modified under the 006 * terms of the GNU Lesser General Public Licence. This should 007 * be distributed with the code. If you do not have a copy, 008 * see: 009 * 010 * http://www.gnu.org/copyleft/lesser.html 011 * 012 * Copyright for this code is held jointly by the individual 013 * authors. These should be listed in @author doc comments. 014 * 015 * For more information on the BioJava project and its aims, 016 * or to join the biojava-l mailing list, visit the home page 017 * at: 018 * 019 * http://www.biojava.org/ 020 * 021 */ 022package org.biojava.nbio.structure.io.mmcif; 023 024import org.biojava.nbio.structure.align.util.UserConfiguration; 025import org.biojava.nbio.structure.io.LocalPDBDirectory; 026import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 027import org.biojava.nbio.core.util.InputStreamProvider; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import java.io.*; 032import java.net.URL; 033import java.util.concurrent.atomic.AtomicBoolean; 034 035/** 036 * A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads 037 * all chemical components at startup and keeps them in memory. This provider is not used as a default 038 * since it is slower at startup and requires more memory than the {@link DownloadChemCompProvider} that is used by default. 039 * 040 * @author Andreas Prlic 041 * 042 */ 043public class AllChemCompProvider implements ChemCompProvider, Runnable{ 044 045 private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class); 046 047 public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz"; 048 049 050 private static String path; 051 052 private static String serverName; 053 054 055 // there will be only one copy of the dictionary across all instances 056 // to reduce memory impact 057 static ChemicalComponentDictionary dict; 058 059 // flags to make sure there is only one thread running that is loading the dictionary 060 static AtomicBoolean loading = new AtomicBoolean(false); 061 static AtomicBoolean isInitialized = new AtomicBoolean(false); 062 063 public AllChemCompProvider(){ 064 065 if ( loading.get()) { 066 logger.warn("other thread is already loading all chemcomps, no need to init twice"); 067 return; 068 } 069 if ( isInitialized.get()) 070 return; 071 072 loading.set(true); 073 074 Thread t = new Thread(this); 075 t.start(); 076 077 } 078 079 080 /** make sure all paths are initialized correctly 081 * 082 */ 083 private static void initPath(){ 084 085 if (path==null) { 086 UserConfiguration config = new UserConfiguration(); 087 path = config.getCacheFilePath(); 088 } 089 } 090 091 private static void initServerName() { 092 093 if (serverName==null) { 094 serverName = LocalPDBDirectory.getServerName(); 095 } 096 } 097 098 private void ensureFileExists() { 099 100 101 String fileName = getLocalFileName(); 102 File f = new File(fileName); 103 104 if ( ! f.exists()) { 105 try { 106 downloadFile(); 107 } catch (IOException e) { 108 logger.error("Caught IOException",e); 109 } 110 } 111 112 113 114 } 115 116 /** Downloads the components.cif.gz file from the wwPDB site. 117 * 118 */ 119 public static void downloadFile() throws IOException { 120 121 initPath(); 122 123 initServerName(); 124 125 String localName = getLocalFileName(); 126 127 String u = serverName + "/" + COMPONENTS_FILE_LOCATION; 128 129 downloadFileFromRemote(new URL(u), new File(localName)); 130 131 132 } 133 134 135 private static void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{ 136 logger.info("Downloading " + remoteURL + " to: " + localFile); 137 FileOutputStream out = new FileOutputStream(localFile); 138 139 InputStream in = remoteURL.openStream(); 140 byte[] buf = new byte[4 * 1024]; // 4K buffer 141 int bytesRead; 142 while ((bytesRead = in.read(buf)) != -1) { 143 out.write(buf, 0, bytesRead); 144 } 145 in.close(); 146 out.close(); 147 148 149 } 150 151 152 private static String getLocalFileName(){ 153 154 File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY); 155 156 if (! dir.exists()){ 157 logger.info("Creating directory {}", dir.toString()); 158 dir.mkdir(); 159 } 160 161 String fileName = new File(dir, "components.cif.gz").toString(); 162 163 return fileName; 164 } 165 166 /** Load all {@link ChemComp} definitions into memory. 167 * 168 */ 169 private void loadAllChemComps() throws IOException { 170 String fileName = getLocalFileName(); 171 logger.debug("Loading " + fileName); 172 InputStreamProvider isp = new InputStreamProvider(); 173 174 175 InputStream inStream = isp.getInputStream(fileName); 176 177 MMcifParser parser = new SimpleMMcifParser(); 178 179 ChemCompConsumer consumer = new ChemCompConsumer(); 180 181 // The Consumer builds up the BioJava - structure object. 182 // you could also hook in your own and build up you own data model. 183 parser.addMMcifConsumer(consumer); 184 185 parser.parse(new BufferedReader(new InputStreamReader(inStream))); 186 187 dict = consumer.getDictionary(); 188 189 inStream.close(); 190 191 } 192 193 194 /** {@inheritDoc} 195 * 196 */ 197 @Override 198 public ChemComp getChemComp(String recordName) { 199 200 while ( loading.get()) { 201 202 // another thread is still initializing the definitions 203 try { 204 // wait half a second 205 206 Thread.sleep(500); 207 } catch (InterruptedException e) { 208 logger.error("Interrepted thread while waiting: "+e.getMessage()); 209 //e.printStackTrace(); 210 } 211 } 212 213 214 215 return dict.getChemComp(recordName); 216 } 217 218 219 /** Do the actual loading of the dictionary in a thread. 220 * 221 */ 222 @Override 223 public void run() { 224 long timeS = System.currentTimeMillis(); 225 226 initPath(); 227 228 ensureFileExists(); 229 230 try { 231 loadAllChemComps(); 232 233 long timeE = System.currentTimeMillis(); 234 logger.debug("Time to init chem comp dictionary: " + (timeE - timeS) / 1000 + " sec."); 235 236 237 } catch (IOException e) { 238 logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage()); 239 240 } finally { 241 loading.set(false); 242 isInitialized.set(true); 243 } 244 } 245 246}