001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Dec 28, 2005 021 * 022 */ 023package org.biojava.utils.io; 024 025import java.io.EOFException; 026import java.io.File; 027import java.io.FileInputStream; 028import java.io.FileNotFoundException; 029import java.io.IOException; 030import java.io.InputStream; 031import java.net.URL; 032import java.util.Enumeration; 033import java.util.jar.JarEntry; 034import java.util.jar.JarFile; 035import java.util.zip.GZIPInputStream; 036import java.util.zip.ZipEntry; 037import java.util.zip.ZipFile; 038 039 040/** A class that provides an InputStream from a File. The file can be compressed or uncompressed. 041 * 042 * Currently supported 043 * compressions: 044 * <ul> 045 * <li>Gzip (extension .gz)</li> 046 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li> 047 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li> 048 * <li>Z (extension .Z) compressed using the unix compress command </li> 049 * <li>for any other extension, no compression is assumed </li> 050 * </ul> 051 * 052 * 053 * @author Andreas Prlic 054 * @since 1.5 055 * @version %I% %G% 056 * 057 */ 058public class InputStreamProvider { 059 060 /** 061 * The magic number found at the start of a GZIP stream. 062 */ 063 public static final int GZIP_MAGIC = 0x1f8b; 064 public static final String CACHE_PROPERTY = "biojava.cache.files"; 065 boolean cacheRawFiles ; 066 067 FlatFileCache cache ; 068 public InputStreamProvider() { 069 super(); 070 cacheRawFiles = false; 071 072 String prop = System.getProperty(CACHE_PROPERTY); 073 if ( prop != null && prop.equals("true")) { 074 cacheRawFiles = true; 075 cache = FlatFileCache.getInstance(); 076 } 077 078 } 079 080 /** get an InputStream for this file 081 * 082 * @param pathToFile the path of the file. 083 * @return an InputStream for the file located at the path. 084 * @throws IOException 085 */ 086 public InputStream getInputStream(String pathToFile) 087 throws IOException 088 { 089 File f = new File(pathToFile); 090 return getInputStream(f); 091 } 092 093 094 /** open the file and read the magic number from the beginning 095 * this is used to determine the compression type 096 * 097 * @param in an input stream to read from 098 * @return the magic number 099 * @throws IOException 100 */ 101 private int getMagicNumber(InputStream in) 102 throws IOException { 103 104 105 int t = in.read(); 106 if (t < 0) throw new EOFException("Failed to read magic number"); 107 int magic = (t & 0xff) << 8; 108 t = in.read(); 109 if (t < 0) throw new EOFException("Failed to read magic number"); 110 magic += t & 0xff; 111 112 return magic; 113 } 114 115 116 public InputStream getInputStream(URL u) 117 throws IOException{ 118 119 int magic = 0; 120 121 try { 122 InputStream inStream = u.openStream(); 123 magic = getMagicNumber(inStream); 124 inStream.close(); 125 } catch (Exception e){ 126 e.printStackTrace(); 127 }; 128 129 if (magic == UncompressInputStream.LZW_MAGIC ) { 130 // a Z compressed file 131 return openCompressedURL(u); 132 } else if (magic == GZIP_MAGIC ) { 133 return openGZIPURL(u); 134 } else if ( u.toString().endsWith(".gz")) { 135 return openGZIPURL(u); 136 } else if ( u.toString().endsWith(".Z")) { 137 // unix compressed 138 return openCompressedURL(u); 139 140 } else { 141 InputStream inStream = u.openStream(); 142 return inStream; 143 } 144 145 } 146 147 148 /** get an InputStream for the file 149 * 150 * @param f a File 151 * @return an InputStream for the file 152 * @throws IOException 153 */ 154 public InputStream getInputStream(File f) 155 throws IOException 156 { 157 158 // use the magic numbers to determine the compression type, 159 // use file extension only as 2nd choice 160 161 int magic = 0; 162 163 try { 164 InputStream test = getInputStreamFromFile(f); 165 magic = getMagicNumber(test); 166 test.close(); 167 } catch (Exception e){ 168 System.err.println("Problem while loading: " + f); 169 e.printStackTrace(); 170 }; 171 172 InputStream inputStream = null; 173 174 String fileName = f.getName(); 175 176 if (magic == UncompressInputStream.LZW_MAGIC ) { 177 // a Z compressed file 178 return openCompressedFile(f); 179 } 180 181 else if (magic == GZIP_MAGIC ) { 182 return openGZIPFile(f); 183 } 184 185 else if ( fileName.endsWith(".gz")) { 186 return openGZIPFile(f); 187 } 188 189 else if ( fileName.endsWith(".zip")){ 190 191 ZipFile zipfile = new ZipFile(f); 192 193 // stream to first entry is returned ... 194 ZipEntry entry; 195 Enumeration e = zipfile.entries(); 196 if ( e.hasMoreElements()){ 197 entry = (ZipEntry) e.nextElement(); 198 inputStream = zipfile.getInputStream(entry); 199 200 } else { 201 throw new IOException ("Zip file has no entries"); 202 } 203 204 } 205 206 else if ( fileName.endsWith(".jar")) { 207 208 JarFile jarFile = new JarFile(f); 209 210 // stream to first entry is returned 211 JarEntry entry; 212 Enumeration e = jarFile.entries(); 213 if ( e.hasMoreElements()){ 214 entry = (JarEntry) e.nextElement(); 215 216 inputStream = jarFile.getInputStream(entry); 217 } else { 218 throw new IOException ("Jar file has no entries"); 219 } 220 } 221 222 else if ( fileName.endsWith(".Z")) { 223 // unix compressed 224 return openCompressedFile(f); 225 226 } 227 228 else { 229 230 // no particular extension found, assume that it is an uncompressed file 231 inputStream = getInputStreamFromFile(f); 232 } 233 234 return inputStream; 235 } 236 237 238 /** Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache. 239 * 240 * @param f 241 * @return 242 * @throws FileNotFoundException 243 */ 244 private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{ 245 InputStream stream = null; 246 247 248 249 if ( cacheRawFiles ){ 250 stream = cache.getInputStream(f.getAbsolutePath()); 251 252 if ( stream == null){ 253 cache.addToCache(f.getAbsolutePath(),f); 254 stream = cache.getInputStream(f.getAbsolutePath()); 255 } 256 } 257 258 if ( stream == null) 259 stream = new FileInputStream(f); 260 261 return stream; 262 } 263 264 265 private InputStream openCompressedFile(File f) 266 throws IOException{ 267 268 InputStream is = getInputStreamFromFile(f); 269 InputStream inputStream = new UncompressInputStream(is); 270 return inputStream; 271 } 272 273 private InputStream openCompressedURL(URL u) 274 throws IOException{ 275 276 InputStream is = u.openStream(); 277 InputStream inputStream = new UncompressInputStream(is); 278 return inputStream; 279 } 280 281 282 private InputStream openGZIPFile(File f) 283 throws IOException{ 284 285 InputStream is = getInputStreamFromFile(f); 286 InputStream inputStream = new GZIPInputStream(is); 287 return inputStream; 288 } 289 290 private InputStream openGZIPURL(URL u) 291 throws IOException{ 292 293 InputStream is = u.openStream(); 294 InputStream inputStream = new GZIPInputStream(is); 295 return inputStream; 296 } 297}