001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Dec 28, 2005 021 * 022 */ 023package org.biojava.nbio.core.util; 024 025import java.io.*; 026import java.net.URISyntaxException; 027import java.net.URL; 028import java.util.Enumeration; 029import java.util.jar.JarEntry; 030import java.util.jar.JarFile; 031import java.util.zip.GZIPInputStream; 032import java.util.zip.ZipEntry; 033import java.util.zip.ZipFile; 034 035//import org.slf4j.Logger; 036//import org.slf4j.LoggerFactory; 037 038 039/** A class that provides an InputStream from a File. The file can be compressed or uncompressed. 040 * 041 * Currently supported 042 * compressions: 043 * <ul> 044 * <li>Gzip (extension .gz)</li> 045 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li> 046 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li> 047 * <li>Z (extension .Z) compressed using the unix compress command </li> 048 * <li>for any other extension, no compression is assumed </li> 049 * </ul> 050 * 051 * 052 * @author Andreas Prlic 053 * @since 1.5 054 * @version %I% %G% 055 * 056 */ 057public class InputStreamProvider { 058 059 //private final static Logger logger = LoggerFactory.getLogger(InputStreamProvider.class); 060 061 /** 062 * The magic number found at the start of a GZIP stream. 063 */ 064 public static final int GZIP_MAGIC = 0x1f8b; 065 public static final String CACHE_PROPERTY = "biojava.cache.files"; 066 067 private boolean cacheRawFiles ; 068 069 public InputStreamProvider() { 070 super(); 071 cacheRawFiles = false; 072 073 String prop = System.getProperty(CACHE_PROPERTY); 074 if ( prop != null && prop.equals("true")) { 075 cacheRawFiles = true; 076 077 } 078 079 } 080 081 /** 082 * Get an InputStream for given file path. 083 * The caller is responsible for closing the stream or otherwise 084 * a resource leak can occur. 085 * @param pathToFile the path of the file. 086 * @return an InputStream for the file located at the path. 087 * @throws IOException 088 */ 089 public InputStream getInputStream(String pathToFile) 090 throws IOException 091 { 092 File f = new File(pathToFile); 093 return getInputStream(f); 094 } 095 096 097 /** open the file and read the magic number from the beginning 098 * this is used to determine the compression type 099 * 100 * @param in an input stream to read from 101 * @return the magic number 102 * @throws IOException 103 */ 104 private int getMagicNumber(InputStream in) 105 throws IOException { 106 107 108 int t = in.read(); 109 if (t < 0) throw new EOFException("Failed to read magic number"); 110 int magic = (t & 0xff) << 8; 111 t = in.read(); 112 if (t < 0) throw new EOFException("Failed to read magic number"); 113 magic += t & 0xff; 114 115 return magic; 116 } 117 118 119 public InputStream getInputStream(URL u) 120 throws IOException{ 121 122 if (u.getProtocol().equals("file")) { 123 try { 124 return getInputStream(new File(u.toURI().getPath())); 125 } catch (URISyntaxException e) { 126 throw new RuntimeException(e); 127 } 128 } 129 130 int magic = 0; 131 132 133 InputStream inStream = u.openStream(); 134 magic = getMagicNumber(inStream); 135 inStream.close(); 136 137 138 if (magic == UncompressInputStream.LZW_MAGIC ) { 139 // a Z compressed file 140 return openCompressedURL(u); 141 } else if (magic == GZIP_MAGIC ) { 142 return openGZIPURL(u); 143 } else if ( u.getPath().endsWith(".gz")) { 144 return openGZIPURL(u); 145 } else if ( u.getPath().endsWith(".Z")) { 146 // unix compressed 147 return openCompressedURL(u); 148 149 } else { 150 inStream = u.openStream(); 151 return inStream; 152 } 153 154 } 155 156 157 /** 158 * Get an InputStream for the file. 159 * The caller is responsible for closing the stream or otherwise 160 * a resource leak can occur. 161 * @param f a File 162 * @return an InputStream for the file 163 * @throws IOException 164 */ 165 public InputStream getInputStream(File f) 166 throws IOException 167 { 168 169 // use the magic numbers to determine the compression type, 170 // use file extension only as 2nd choice 171 172 int magic = 0; 173 174 175 InputStream test = getInputStreamFromFile(f); 176 magic = getMagicNumber(test); 177 test.close(); 178 179 180 InputStream inputStream = null; 181 182 String fileName = f.getName(); 183 184 if (magic == UncompressInputStream.LZW_MAGIC ) { 185 // a Z compressed file 186 return openCompressedFile(f); 187 } 188 189 else if (magic == GZIP_MAGIC ) { 190 return openGZIPFile(f); 191 } 192 193 else if ( fileName.endsWith(".gz")) { 194 return openGZIPFile(f); 195 } 196 197 else if ( fileName.endsWith(".zip")){ 198 199 ZipFile zipfile = new ZipFile(f); 200 201 // stream to first entry is returned ... 202 ZipEntry entry; 203 Enumeration<? extends ZipEntry> e = zipfile.entries(); 204 if ( e.hasMoreElements()){ 205 entry = e.nextElement(); 206 inputStream = zipfile.getInputStream(entry); 207 } else { 208 throw new IOException ("Zip file has no entries"); 209 } 210 211 } 212 213 else if ( fileName.endsWith(".jar")) { 214 215 JarFile jarFile = new JarFile(f); 216 217 // stream to first entry is returned 218 JarEntry entry; 219 Enumeration<JarEntry> e = jarFile.entries(); 220 if ( e.hasMoreElements()){ 221 entry = e.nextElement(); 222 inputStream = jarFile.getInputStream(entry); 223 } else { 224 throw new IOException ("Jar file has no entries"); 225 } 226 } 227 228 else if ( fileName.endsWith(".Z")) { 229 // unix compressed 230 return openCompressedFile(f); 231 232 } 233 234 else { 235 236 // no particular extension found, assume that it is an uncompressed file 237 inputStream = getInputStreamFromFile(f); 238 } 239 240 return inputStream; 241 } 242 243 244 /** 245 * Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache. 246 * 247 * @param f 248 * @return 249 * @throws FileNotFoundException 250 */ 251 private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{ 252 InputStream stream = null; 253 254 255 256 if ( cacheRawFiles ){ 257 stream = FlatFileCache.getInputStream(f.getAbsolutePath()); 258 259 if ( stream == null){ 260 FlatFileCache.addToCache(f.getAbsolutePath(),f); 261 stream = FlatFileCache.getInputStream(f.getAbsolutePath()); 262 } 263 } 264 265 if ( stream == null) 266 stream = new FileInputStream(f); 267 268 return stream; 269 } 270 271 272 private InputStream openCompressedFile(File f) 273 throws IOException{ 274 275 InputStream is = getInputStreamFromFile(f); 276 InputStream inputStream = new UncompressInputStream(is); 277 return inputStream; 278 } 279 280 private InputStream openCompressedURL(URL u) 281 throws IOException{ 282 283 InputStream is = u.openStream(); 284 InputStream inputStream = new UncompressInputStream(is); 285 return inputStream; 286 } 287 288 289 private InputStream openGZIPFile(File f) 290 throws IOException{ 291 292 InputStream is = getInputStreamFromFile(f); 293 InputStream inputStream = new GZIPInputStream(is); 294 return inputStream; 295 } 296 297 private InputStream openGZIPURL(URL u) 298 throws IOException{ 299 300 InputStream is = u.openStream(); 301 InputStream inputStream = new GZIPInputStream(is); 302 return inputStream; 303 } 304}