001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Dec 28, 2005 021 * 022 */ 023package org.biojava.nbio.core.util; 024 025import java.io.*; 026import java.net.URL; 027import java.util.Enumeration; 028import java.util.jar.JarEntry; 029import java.util.jar.JarFile; 030import java.util.zip.GZIPInputStream; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipFile; 033 034//import org.slf4j.Logger; 035//import org.slf4j.LoggerFactory; 036 037 038/** A class that provides an InputStream from a File. The file can be compressed or uncompressed. 039 * 040 * Currently supported 041 * compressions: 042 * <ul> 043 * <li>Gzip (extension .gz)</li> 044 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li> 045 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li> 046 * <li>Z (extension .Z) compressed using the unix compress command </li> 047 * <li>for any other extension, no compression is assumed </li> 048 * </ul> 049 * 050 * 051 * @author Andreas Prlic 052 * @since 1.5 053 * @version %I% %G% 054 * 055 */ 056public class InputStreamProvider { 057 058 //private final static Logger logger = LoggerFactory.getLogger(InputStreamProvider.class); 059 060 /** 061 * The magic number found at the start of a GZIP stream. 062 */ 063 public static final int GZIP_MAGIC = 0x1f8b; 064 public static final String CACHE_PROPERTY = "biojava.cache.files"; 065 066 private boolean cacheRawFiles ; 067 068 FlatFileCache cache ; 069 public InputStreamProvider() { 070 super(); 071 cacheRawFiles = false; 072 073 String prop = System.getProperty(CACHE_PROPERTY); 074 if ( prop != null && prop.equals("true")) { 075 cacheRawFiles = true; 076 cache = FlatFileCache.getInstance(); 077 } 078 079 } 080 081 /** 082 * Get an InputStream for given file path. 083 * The caller is responsible for closing the stream or otherwise 084 * a resource leak can occur. 085 * @param pathToFile the path of the file. 086 * @return an InputStream for the file located at the path. 087 * @throws IOException 088 */ 089 public InputStream getInputStream(String pathToFile) 090 throws IOException 091 { 092 File f = new File(pathToFile); 093 return getInputStream(f); 094 } 095 096 097 /** open the file and read the magic number from the beginning 098 * this is used to determine the compression type 099 * 100 * @param in an input stream to read from 101 * @return the magic number 102 * @throws IOException 103 */ 104 private int getMagicNumber(InputStream in) 105 throws IOException { 106 107 108 int t = in.read(); 109 if (t < 0) throw new EOFException("Failed to read magic number"); 110 int magic = (t & 0xff) << 8; 111 t = in.read(); 112 if (t < 0) throw new EOFException("Failed to read magic number"); 113 magic += t & 0xff; 114 115 return magic; 116 } 117 118 119 public InputStream getInputStream(URL u) 120 throws IOException{ 121 122 int magic = 0; 123 124 125 InputStream inStream = u.openStream(); 126 magic = getMagicNumber(inStream); 127 inStream.close(); 128 129 130 if (magic == UncompressInputStream.LZW_MAGIC ) { 131 // a Z compressed file 132 return openCompressedURL(u); 133 } else if (magic == GZIP_MAGIC ) { 134 return openGZIPURL(u); 135 } else if ( u.getPath().endsWith(".gz")) { 136 return openGZIPURL(u); 137 } else if ( u.getPath().endsWith(".Z")) { 138 // unix compressed 139 return openCompressedURL(u); 140 141 } else { 142 inStream = u.openStream(); 143 return inStream; 144 } 145 146 } 147 148 149 /** 150 * Get an InputStream for the file. 151 * The caller is responsible for closing the stream or otherwise 152 * a resource leak can occur. 153 * @param f a File 154 * @return an InputStream for the file 155 * @throws IOException 156 */ 157 public InputStream getInputStream(File f) 158 throws IOException 159 { 160 161 // use the magic numbers to determine the compression type, 162 // use file extension only as 2nd choice 163 164 int magic = 0; 165 166 167 InputStream test = getInputStreamFromFile(f); 168 magic = getMagicNumber(test); 169 test.close(); 170 171 172 InputStream inputStream = null; 173 174 String fileName = f.getName(); 175 176 if (magic == UncompressInputStream.LZW_MAGIC ) { 177 // a Z compressed file 178 return openCompressedFile(f); 179 } 180 181 else if (magic == GZIP_MAGIC ) { 182 return openGZIPFile(f); 183 } 184 185 else if ( fileName.endsWith(".gz")) { 186 return openGZIPFile(f); 187 } 188 189 else if ( fileName.endsWith(".zip")){ 190 191 ZipFile zipfile = new ZipFile(f); 192 193 // stream to first entry is returned ... 194 ZipEntry entry; 195 Enumeration<? extends ZipEntry> e = zipfile.entries(); 196 if ( e.hasMoreElements()){ 197 entry = e.nextElement(); 198 inputStream = zipfile.getInputStream(entry); 199 } else { 200 throw new IOException ("Zip file has no entries"); 201 } 202 203 } 204 205 else if ( fileName.endsWith(".jar")) { 206 207 JarFile jarFile = new JarFile(f); 208 209 // stream to first entry is returned 210 JarEntry entry; 211 Enumeration<JarEntry> e = jarFile.entries(); 212 if ( e.hasMoreElements()){ 213 entry = e.nextElement(); 214 inputStream = jarFile.getInputStream(entry); 215 } else { 216 throw new IOException ("Jar file has no entries"); 217 } 218 } 219 220 else if ( fileName.endsWith(".Z")) { 221 // unix compressed 222 return openCompressedFile(f); 223 224 } 225 226 else { 227 228 // no particular extension found, assume that it is an uncompressed file 229 inputStream = getInputStreamFromFile(f); 230 } 231 232 return inputStream; 233 } 234 235 236 /** 237 * Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache. 238 * 239 * @param f 240 * @return 241 * @throws FileNotFoundException 242 */ 243 private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{ 244 InputStream stream = null; 245 246 247 248 if ( cacheRawFiles ){ 249 stream = FlatFileCache.getInputStream(f.getAbsolutePath()); 250 251 if ( stream == null){ 252 FlatFileCache.addToCache(f.getAbsolutePath(),f); 253 stream = FlatFileCache.getInputStream(f.getAbsolutePath()); 254 } 255 } 256 257 if ( stream == null) 258 stream = new FileInputStream(f); 259 260 return stream; 261 } 262 263 264 private InputStream openCompressedFile(File f) 265 throws IOException{ 266 267 InputStream is = getInputStreamFromFile(f); 268 InputStream inputStream = new UncompressInputStream(is); 269 return inputStream; 270 } 271 272 private InputStream openCompressedURL(URL u) 273 throws IOException{ 274 275 InputStream is = u.openStream(); 276 InputStream inputStream = new UncompressInputStream(is); 277 return inputStream; 278 } 279 280 281 private InputStream openGZIPFile(File f) 282 throws IOException{ 283 284 InputStream is = getInputStreamFromFile(f); 285 InputStream inputStream = new GZIPInputStream(is); 286 return inputStream; 287 } 288 289 private InputStream openGZIPURL(URL u) 290 throws IOException{ 291 292 InputStream is = u.openStream(); 293 InputStream inputStream = new GZIPInputStream(is); 294 return inputStream; 295 } 296}