001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Dec 28, 2005
021 *
022 */
023package org.biojava.nbio.core.util;
024
025import java.io.*;
026import java.net.URL;
027import java.util.Enumeration;
028import java.util.jar.JarEntry;
029import java.util.jar.JarFile;
030import java.util.zip.GZIPInputStream;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipFile;
033
034//import org.slf4j.Logger;
035//import org.slf4j.LoggerFactory;
036
037
038/** A class that provides an InputStream from a File. The file can be compressed or uncompressed.
039 *
040 * Currently supported
041 * compressions:
042 * <ul>
043 * <li>Gzip (extension .gz)</li>
044 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li>
045 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li>
046 * <li>Z (extension .Z) compressed using the unix compress command </li>
047 * <li>for any other extension, no compression is assumed </li>
048 * </ul>
049 *
050 *
051 * @author Andreas Prlic
052 * @since 1.5
053 * @version %I% %G%
054 *
055 */
056public class InputStreamProvider {
057
058        //private final static Logger logger = LoggerFactory.getLogger(InputStreamProvider.class);
059
060        /**
061         * The magic number found at the start of a GZIP stream.
062         */
063        public static final int GZIP_MAGIC = 0x1f8b;
064        public static final String CACHE_PROPERTY = "biojava.cache.files";
065
066        private boolean cacheRawFiles ;
067
068        FlatFileCache cache ;
069        public InputStreamProvider() {
070                super();
071                cacheRawFiles = false;
072
073                String prop = System.getProperty(CACHE_PROPERTY);
074                if ( prop != null && prop.equals("true")) {
075                        cacheRawFiles = true;
076                        cache = FlatFileCache.getInstance();
077                }
078
079        }
080
081        /**
082         * Get an InputStream for given file path.
083         * The caller is responsible for closing the stream or otherwise
084         * a resource leak can occur.
085         * @param pathToFile the path of the file.
086         * @return an InputStream for the file located at the path.
087         * @throws IOException
088         */
089        public InputStream getInputStream(String pathToFile)
090        throws IOException
091        {
092                File f = new File(pathToFile);
093                return getInputStream(f);
094        }
095
096
097        /** open the file and read the magic number from the beginning
098         * this is used to determine the compression type
099         *
100         * @param in an input stream to read from
101         * @return the magic number
102         * @throws IOException
103         */
104        private int getMagicNumber(InputStream in)
105        throws IOException {
106
107
108                int t = in.read();
109                if (t < 0) throw new EOFException("Failed to read magic number");
110                int magic = (t & 0xff) << 8;
111                t = in.read();
112                if (t < 0) throw new EOFException("Failed to read magic number");
113                magic += t & 0xff;
114
115                return magic;
116        }
117
118
119        public InputStream getInputStream(URL u)
120        throws IOException{
121
122                int magic = 0;
123
124
125                InputStream inStream = u.openStream();
126                magic = getMagicNumber(inStream);
127                inStream.close();
128
129
130                if (magic == UncompressInputStream.LZW_MAGIC ) {
131                        // a Z compressed file
132                        return openCompressedURL(u);
133                } else if (magic == GZIP_MAGIC ) {
134                        return openGZIPURL(u);
135                } else if ( u.getPath().endsWith(".gz")) {
136                        return openGZIPURL(u);
137                } else if ( u.getPath().endsWith(".Z")) {
138                        // unix compressed
139                        return openCompressedURL(u);
140
141                } else {
142                        inStream = u.openStream();
143                        return inStream;
144                }
145
146        }
147
148
149        /**
150         * Get an InputStream for the file.
151         * The caller is responsible for closing the stream or otherwise
152         * a resource leak can occur.
153         * @param f a File
154         * @return an InputStream for the file
155         * @throws IOException
156         */
157        public  InputStream getInputStream(File f)
158        throws IOException
159        {
160
161                // use the magic numbers to determine the compression type,
162                // use file extension only as 2nd choice
163
164                int magic = 0;
165
166
167                InputStream test = getInputStreamFromFile(f);
168                magic = getMagicNumber(test);
169                test.close();
170
171
172                InputStream inputStream = null;
173
174                String fileName = f.getName();
175
176                if (magic == UncompressInputStream.LZW_MAGIC ) {
177                        // a Z compressed file
178                        return openCompressedFile(f);
179                }
180
181                else if (magic == GZIP_MAGIC ) {
182                        return openGZIPFile(f);
183                }
184
185                else if ( fileName.endsWith(".gz")) {
186                        return openGZIPFile(f);
187                }
188
189                else if ( fileName.endsWith(".zip")){
190
191                        ZipFile zipfile = new ZipFile(f);
192
193                        // stream to first entry is returned ...
194                        ZipEntry entry;
195                        Enumeration<? extends ZipEntry> e = zipfile.entries();
196                        if ( e.hasMoreElements()){
197                                entry = e.nextElement();
198                                inputStream = zipfile.getInputStream(entry);
199                        } else {
200                                throw new IOException ("Zip file has no entries");
201                        }
202
203                }
204
205                else if ( fileName.endsWith(".jar")) {
206
207                        JarFile jarFile = new JarFile(f);
208
209                        // stream to first entry is returned
210                        JarEntry entry;
211                        Enumeration<JarEntry> e = jarFile.entries();
212                        if ( e.hasMoreElements()){
213                                entry = e.nextElement();
214                                inputStream = jarFile.getInputStream(entry);
215                        } else {
216                                throw new IOException ("Jar file has no entries");
217                        }
218                }
219
220                else if ( fileName.endsWith(".Z")) {
221                        // unix compressed
222                        return openCompressedFile(f);
223
224                }
225
226                else {
227
228                        // no particular extension found, assume that it is an uncompressed file
229                        inputStream = getInputStreamFromFile(f);
230                }
231
232                return inputStream;
233        }
234
235
236        /**
237         * Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache.
238         *
239         * @param f
240         * @return
241         * @throws FileNotFoundException
242         */
243        private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{
244                InputStream stream = null;
245
246
247
248                if ( cacheRawFiles ){
249                        stream = FlatFileCache.getInputStream(f.getAbsolutePath());
250
251                        if ( stream == null){
252                                FlatFileCache.addToCache(f.getAbsolutePath(),f);
253                                stream = FlatFileCache.getInputStream(f.getAbsolutePath());
254                        }
255                }
256
257                if ( stream == null)
258                        stream = new FileInputStream(f);
259
260                return stream;
261        }
262
263
264        private InputStream openCompressedFile(File f)
265        throws IOException{
266
267                InputStream is           =  getInputStreamFromFile(f);
268                InputStream inputStream =  new UncompressInputStream(is);
269                return inputStream;
270        }
271
272        private InputStream openCompressedURL(URL u)
273        throws IOException{
274
275                InputStream is           =  u.openStream();
276                InputStream inputStream =  new UncompressInputStream(is);
277                return inputStream;
278        }
279
280
281        private InputStream openGZIPFile(File f)
282        throws IOException{
283
284                InputStream is      = getInputStreamFromFile(f);
285                InputStream inputStream = new GZIPInputStream(is);
286                return inputStream;
287        }
288
289        private InputStream openGZIPURL(URL u)
290        throws IOException{
291
292                InputStream is      = u.openStream();
293                InputStream inputStream = new GZIPInputStream(is);
294                return inputStream;
295        }
296}