001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 * 
020 * Created on Dec 28, 2005
021 *
022 */
023package org.biojava.utils.io;
024
025import java.io.EOFException;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FileNotFoundException;
029import java.io.IOException;
030import java.io.InputStream;
031import java.net.URL;
032import java.util.Enumeration;
033import java.util.jar.JarEntry;
034import java.util.jar.JarFile;
035import java.util.zip.GZIPInputStream;
036import java.util.zip.ZipEntry;
037import java.util.zip.ZipFile;
038
039
040/** A class that provides an InputStream from a File. The file can be compressed or uncompressed. 
041 *  
042 * Currently supported
043 * compressions:
044 * <ul>
045 * <li>Gzip (extension .gz)</li>
046 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li> 
047 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li>
048 * <li>Z (extension .Z) compressed using the unix compress command </li>
049 * <li>for any other extension, no compression is assumed </li>
050 * </ul>
051 * 
052 * 
053 * @author Andreas Prlic
054 * @since 1.5
055 * @version %I% %G%
056 *
057 */
058public class InputStreamProvider {
059
060   /**
061    * The magic number found at the start of a GZIP stream.
062    */
063   public static final int GZIP_MAGIC = 0x1f8b;
064   public static final String CACHE_PROPERTY = "biojava.cache.files";
065   boolean cacheRawFiles ;
066
067   FlatFileCache cache ;
068   public InputStreamProvider() {
069      super();
070      cacheRawFiles = false;
071
072      String prop = System.getProperty(CACHE_PROPERTY);
073      if ( prop != null && prop.equals("true")) {
074         cacheRawFiles = true;
075         cache = FlatFileCache.getInstance();
076      }
077
078   }
079
080   /** get an InputStream for this file 
081    * 
082    * @param pathToFile the path of the file.
083    * @return an InputStream for the file located at the path.
084    * @throws IOException
085    */
086   public InputStream getInputStream(String pathToFile)
087   throws IOException
088   {
089      File f = new File(pathToFile);
090      return getInputStream(f);
091   }
092
093
094   /** open the file and read the magic number from the beginning
095    * this is used to determine the compression type
096    * 
097    * @param in an input stream to read from
098    * @return the magic number
099    * @throws IOException
100    */
101   private int getMagicNumber(InputStream in) 
102   throws IOException {
103
104
105      int t = in.read();
106      if (t < 0) throw new EOFException("Failed to read magic number");
107      int magic = (t & 0xff) << 8;
108      t = in.read();
109      if (t < 0) throw new EOFException("Failed to read magic number");
110      magic += t & 0xff;
111
112      return magic;
113   }
114
115
116   public InputStream getInputStream(URL u)
117   throws IOException{
118
119      int magic = 0;
120
121      try {
122         InputStream inStream = u.openStream(); 
123         magic = getMagicNumber(inStream);
124         inStream.close();
125      } catch (Exception e){
126         e.printStackTrace();
127      };
128
129      if (magic == UncompressInputStream.LZW_MAGIC ) {
130         // a Z compressed file
131         return openCompressedURL(u);
132      } else if (magic == GZIP_MAGIC ) {
133         return openGZIPURL(u); 
134      } else if ( u.toString().endsWith(".gz")) {
135         return openGZIPURL(u);
136      } else if ( u.toString().endsWith(".Z")) {
137         // unix compressed 
138         return openCompressedURL(u);
139
140      } else {
141         InputStream inStream = u.openStream();
142         return inStream;
143      }
144
145   }
146
147
148   /** get an InputStream for the file
149    * 
150    * @param f a File
151    * @return an InputStream for the file
152    * @throws IOException
153    */
154   public  InputStream getInputStream(File f) 
155   throws IOException
156   {
157
158      // use the magic numbers to determine the compression type, 
159      // use file extension only as 2nd choice 
160
161      int magic = 0;
162
163      try {
164         InputStream test = getInputStreamFromFile(f);
165         magic = getMagicNumber(test);
166         test.close();
167      } catch (Exception e){
168          System.err.println("Problem while loading: " + f);
169         e.printStackTrace();
170      };
171
172      InputStream inputStream = null;
173
174      String fileName = f.getName();
175
176      if (magic == UncompressInputStream.LZW_MAGIC ) {
177         // a Z compressed file
178         return openCompressedFile(f);
179      }
180
181      else if (magic == GZIP_MAGIC ) {
182         return openGZIPFile(f); 
183      }
184
185      else if ( fileName.endsWith(".gz")) {
186         return openGZIPFile(f);
187      } 
188
189      else if ( fileName.endsWith(".zip")){
190
191         ZipFile zipfile = new ZipFile(f);
192
193         // stream to first entry is returned ...
194         ZipEntry entry;
195         Enumeration e = zipfile.entries();
196         if ( e.hasMoreElements()){
197            entry = (ZipEntry) e.nextElement();
198            inputStream = zipfile.getInputStream(entry);
199
200         } else {
201            throw new IOException ("Zip file has no entries");
202         }
203
204      } 
205
206      else if ( fileName.endsWith(".jar")) {
207
208         JarFile jarFile = new JarFile(f);
209
210         // stream to first entry is returned
211         JarEntry entry;
212         Enumeration e = jarFile.entries();
213         if ( e.hasMoreElements()){
214            entry = (JarEntry) e.nextElement();
215
216            inputStream = jarFile.getInputStream(entry);
217         } else {
218            throw new IOException ("Jar file has no entries");
219         }
220      } 
221
222      else if ( fileName.endsWith(".Z")) {
223         // unix compressed 
224         return openCompressedFile(f);
225
226      }
227
228      else {
229
230         // no particular extension found, assume that it is an uncompressed file
231         inputStream = getInputStreamFromFile(f);
232      }
233
234      return inputStream;
235   }
236
237
238   /** Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache.
239    * 
240    * @param f
241    * @return
242    * @throws FileNotFoundException
243    */
244   private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{
245      InputStream stream = null;
246
247
248
249      if ( cacheRawFiles ){
250         stream = cache.getInputStream(f.getAbsolutePath());
251
252         if ( stream == null){
253            cache.addToCache(f.getAbsolutePath(),f);
254            stream = cache.getInputStream(f.getAbsolutePath());
255         }
256      }
257
258      if ( stream == null)
259         stream = new FileInputStream(f);                  
260      
261      return stream;
262   }
263
264
265   private InputStream openCompressedFile(File f)
266   throws IOException{
267
268      InputStream is           =  getInputStreamFromFile(f);
269      InputStream inputStream =  new UncompressInputStream(is);
270      return inputStream;
271   }
272
273   private InputStream openCompressedURL(URL u)
274   throws IOException{
275
276      InputStream is           =  u.openStream();
277      InputStream inputStream =  new UncompressInputStream(is);
278      return inputStream;
279   }
280
281
282   private InputStream openGZIPFile(File f) 
283   throws IOException{
284
285      InputStream is      = getInputStreamFromFile(f);
286      InputStream inputStream = new GZIPInputStream(is);
287      return inputStream;
288   }
289
290   private InputStream openGZIPURL(URL u) 
291   throws IOException{
292
293      InputStream is      = u.openStream();
294      InputStream inputStream = new GZIPInputStream(is);
295      return inputStream;
296   }
297}