001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Dec 28, 2005 021 * 022 */ 023package org.biojava.nbio.core.util; 024 025import java.io.*; 026import java.net.URL; 027import java.util.Enumeration; 028import java.util.jar.JarEntry; 029import java.util.jar.JarFile; 030import java.util.zip.GZIPInputStream; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipFile; 033 034//import org.slf4j.Logger; 035//import org.slf4j.LoggerFactory; 036 037 038/** A class that provides an InputStream from a File. The file can be compressed or uncompressed. 039 * 040 * Currently supported 041 * compressions: 042 * <ul> 043 * <li>Gzip (extension .gz)</li> 044 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li> 045 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li> 046 * <li>Z (extension .Z) compressed using the unix compress command </li> 047 * <li>for any other extension, no compression is assumed </li> 048 * </ul> 049 * 050 * 051 * @author Andreas Prlic 052 * @since 1.5 053 * @version %I% %G% 054 * 055 */ 056public class InputStreamProvider { 057 058 //private final static Logger logger = LoggerFactory.getLogger(InputStreamProvider.class); 059 060 /** 061 * The magic number found at the start of a GZIP stream. 062 */ 063 public static final int GZIP_MAGIC = 0x1f8b; 064 public static final String CACHE_PROPERTY = "biojava.cache.files"; 065 066 private boolean cacheRawFiles ; 067 068 public InputStreamProvider() { 069 super(); 070 cacheRawFiles = false; 071 072 String prop = System.getProperty(CACHE_PROPERTY); 073 if ( prop != null && prop.equals("true")) { 074 cacheRawFiles = true; 075 076 } 077 078 } 079 080 /** 081 * Get an InputStream for given file path. 082 * The caller is responsible for closing the stream or otherwise 083 * a resource leak can occur. 084 * @param pathToFile the path of the file. 085 * @return an InputStream for the file located at the path. 086 * @throws IOException 087 */ 088 public InputStream getInputStream(String pathToFile) 089 throws IOException 090 { 091 File f = new File(pathToFile); 092 return getInputStream(f); 093 } 094 095 096 /** open the file and read the magic number from the beginning 097 * this is used to determine the compression type 098 * 099 * @param in an input stream to read from 100 * @return the magic number 101 * @throws IOException 102 */ 103 private int getMagicNumber(InputStream in) 104 throws IOException { 105 106 107 int t = in.read(); 108 if (t < 0) throw new EOFException("Failed to read magic number"); 109 int magic = (t & 0xff) << 8; 110 t = in.read(); 111 if (t < 0) throw new EOFException("Failed to read magic number"); 112 magic += t & 0xff; 113 114 return magic; 115 } 116 117 118 public InputStream getInputStream(URL u) 119 throws IOException{ 120 121 int magic = 0; 122 123 124 InputStream inStream = u.openStream(); 125 magic = getMagicNumber(inStream); 126 inStream.close(); 127 128 129 if (magic == UncompressInputStream.LZW_MAGIC ) { 130 // a Z compressed file 131 return openCompressedURL(u); 132 } else if (magic == GZIP_MAGIC ) { 133 return openGZIPURL(u); 134 } else if ( u.getPath().endsWith(".gz")) { 135 return openGZIPURL(u); 136 } else if ( u.getPath().endsWith(".Z")) { 137 // unix compressed 138 return openCompressedURL(u); 139 140 } else { 141 inStream = u.openStream(); 142 return inStream; 143 } 144 145 } 146 147 148 /** 149 * Get an InputStream for the file. 150 * The caller is responsible for closing the stream or otherwise 151 * a resource leak can occur. 152 * @param f a File 153 * @return an InputStream for the file 154 * @throws IOException 155 */ 156 public InputStream getInputStream(File f) 157 throws IOException 158 { 159 160 // use the magic numbers to determine the compression type, 161 // use file extension only as 2nd choice 162 163 int magic = 0; 164 165 166 InputStream test = getInputStreamFromFile(f); 167 magic = getMagicNumber(test); 168 test.close(); 169 170 171 InputStream inputStream = null; 172 173 String fileName = f.getName(); 174 175 if (magic == UncompressInputStream.LZW_MAGIC ) { 176 // a Z compressed file 177 return openCompressedFile(f); 178 } 179 180 else if (magic == GZIP_MAGIC ) { 181 return openGZIPFile(f); 182 } 183 184 else if ( fileName.endsWith(".gz")) { 185 return openGZIPFile(f); 186 } 187 188 else if ( fileName.endsWith(".zip")){ 189 190 ZipFile zipfile = new ZipFile(f); 191 192 // stream to first entry is returned ... 193 ZipEntry entry; 194 Enumeration<? extends ZipEntry> e = zipfile.entries(); 195 if ( e.hasMoreElements()){ 196 entry = e.nextElement(); 197 inputStream = zipfile.getInputStream(entry); 198 } else { 199 throw new IOException ("Zip file has no entries"); 200 } 201 202 } 203 204 else if ( fileName.endsWith(".jar")) { 205 206 JarFile jarFile = new JarFile(f); 207 208 // stream to first entry is returned 209 JarEntry entry; 210 Enumeration<JarEntry> e = jarFile.entries(); 211 if ( e.hasMoreElements()){ 212 entry = e.nextElement(); 213 inputStream = jarFile.getInputStream(entry); 214 } else { 215 throw new IOException ("Jar file has no entries"); 216 } 217 } 218 219 else if ( fileName.endsWith(".Z")) { 220 // unix compressed 221 return openCompressedFile(f); 222 223 } 224 225 else { 226 227 // no particular extension found, assume that it is an uncompressed file 228 inputStream = getInputStreamFromFile(f); 229 } 230 231 return inputStream; 232 } 233 234 235 /** 236 * Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache. 237 * 238 * @param f 239 * @return 240 * @throws FileNotFoundException 241 */ 242 private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{ 243 InputStream stream = null; 244 245 246 247 if ( cacheRawFiles ){ 248 stream = FlatFileCache.getInputStream(f.getAbsolutePath()); 249 250 if ( stream == null){ 251 FlatFileCache.addToCache(f.getAbsolutePath(),f); 252 stream = FlatFileCache.getInputStream(f.getAbsolutePath()); 253 } 254 } 255 256 if ( stream == null) 257 stream = new FileInputStream(f); 258 259 return stream; 260 } 261 262 263 private InputStream openCompressedFile(File f) 264 throws IOException{ 265 266 InputStream is = getInputStreamFromFile(f); 267 InputStream inputStream = new UncompressInputStream(is); 268 return inputStream; 269 } 270 271 private InputStream openCompressedURL(URL u) 272 throws IOException{ 273 274 InputStream is = u.openStream(); 275 InputStream inputStream = new UncompressInputStream(is); 276 return inputStream; 277 } 278 279 280 private InputStream openGZIPFile(File f) 281 throws IOException{ 282 283 InputStream is = getInputStreamFromFile(f); 284 InputStream inputStream = new GZIPInputStream(is); 285 return inputStream; 286 } 287 288 private InputStream openGZIPURL(URL u) 289 throws IOException{ 290 291 InputStream is = u.openStream(); 292 InputStream inputStream = new GZIPInputStream(is); 293 return inputStream; 294 } 295}