001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Dec 28, 2005
021 *
022 */
023package org.biojava.nbio.core.util;
024
025import java.io.*;
026import java.net.URL;
027import java.util.Enumeration;
028import java.util.jar.JarEntry;
029import java.util.jar.JarFile;
030import java.util.zip.GZIPInputStream;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipFile;
033
034//import org.slf4j.Logger;
035//import org.slf4j.LoggerFactory;
036
037
038/** A class that provides an InputStream from a File. The file can be compressed or uncompressed.
039 *
040 * Currently supported
041 * compressions:
042 * <ul>
043 * <li>Gzip (extension .gz)</li>
044 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li>
045 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li>
046 * <li>Z (extension .Z) compressed using the unix compress command </li>
047 * <li>for any other extension, no compression is assumed </li>
048 * </ul>
049 *
050 *
051 * @author Andreas Prlic
052 * @since 1.5
053 * @version %I% %G%
054 *
055 */
056public class InputStreamProvider {
057
058        //private final static Logger logger = LoggerFactory.getLogger(InputStreamProvider.class);
059
060        /**
061         * The magic number found at the start of a GZIP stream.
062         */
063        public static final int GZIP_MAGIC = 0x1f8b;
064        public static final String CACHE_PROPERTY = "biojava.cache.files";
065
066        private boolean cacheRawFiles ;
067
068        public InputStreamProvider() {
069                super();
070                cacheRawFiles = false;
071
072                String prop = System.getProperty(CACHE_PROPERTY);
073                if ( prop != null && prop.equals("true")) {
074                        cacheRawFiles = true;
075
076                }
077
078        }
079
080        /**
081         * Get an InputStream for given file path.
082         * The caller is responsible for closing the stream or otherwise
083         * a resource leak can occur.
084         * @param pathToFile the path of the file.
085         * @return an InputStream for the file located at the path.
086         * @throws IOException
087         */
088        public InputStream getInputStream(String pathToFile)
089        throws IOException
090        {
091                File f = new File(pathToFile);
092                return getInputStream(f);
093        }
094
095
096        /** open the file and read the magic number from the beginning
097         * this is used to determine the compression type
098         *
099         * @param in an input stream to read from
100         * @return the magic number
101         * @throws IOException
102         */
103        private int getMagicNumber(InputStream in)
104        throws IOException {
105
106
107                int t = in.read();
108                if (t < 0) throw new EOFException("Failed to read magic number");
109                int magic = (t & 0xff) << 8;
110                t = in.read();
111                if (t < 0) throw new EOFException("Failed to read magic number");
112                magic += t & 0xff;
113
114                return magic;
115        }
116
117
118        public InputStream getInputStream(URL u)
119        throws IOException{
120
121                int magic = 0;
122
123
124                InputStream inStream = u.openStream();
125                magic = getMagicNumber(inStream);
126                inStream.close();
127
128
129                if (magic == UncompressInputStream.LZW_MAGIC ) {
130                        // a Z compressed file
131                        return openCompressedURL(u);
132                } else if (magic == GZIP_MAGIC ) {
133                        return openGZIPURL(u);
134                } else if ( u.getPath().endsWith(".gz")) {
135                        return openGZIPURL(u);
136                } else if ( u.getPath().endsWith(".Z")) {
137                        // unix compressed
138                        return openCompressedURL(u);
139
140                } else {
141                        inStream = u.openStream();
142                        return inStream;
143                }
144
145        }
146
147
148        /**
149         * Get an InputStream for the file.
150         * The caller is responsible for closing the stream or otherwise
151         * a resource leak can occur.
152         * @param f a File
153         * @return an InputStream for the file
154         * @throws IOException
155         */
156        public  InputStream getInputStream(File f)
157        throws IOException
158        {
159
160                // use the magic numbers to determine the compression type,
161                // use file extension only as 2nd choice
162
163                int magic = 0;
164
165
166                InputStream test = getInputStreamFromFile(f);
167                magic = getMagicNumber(test);
168                test.close();
169
170
171                InputStream inputStream = null;
172
173                String fileName = f.getName();
174
175                if (magic == UncompressInputStream.LZW_MAGIC ) {
176                        // a Z compressed file
177                        return openCompressedFile(f);
178                }
179
180                else if (magic == GZIP_MAGIC ) {
181                        return openGZIPFile(f);
182                }
183
184                else if ( fileName.endsWith(".gz")) {
185                        return openGZIPFile(f);
186                }
187
188                else if ( fileName.endsWith(".zip")){
189
190                        ZipFile zipfile = new ZipFile(f);
191
192                        // stream to first entry is returned ...
193                        ZipEntry entry;
194                        Enumeration<? extends ZipEntry> e = zipfile.entries();
195                        if ( e.hasMoreElements()){
196                                entry = e.nextElement();
197                                inputStream = zipfile.getInputStream(entry);
198                        } else {
199                                throw new IOException ("Zip file has no entries");
200                        }
201
202                }
203
204                else if ( fileName.endsWith(".jar")) {
205
206                        JarFile jarFile = new JarFile(f);
207
208                        // stream to first entry is returned
209                        JarEntry entry;
210                        Enumeration<JarEntry> e = jarFile.entries();
211                        if ( e.hasMoreElements()){
212                                entry = e.nextElement();
213                                inputStream = jarFile.getInputStream(entry);
214                        } else {
215                                throw new IOException ("Jar file has no entries");
216                        }
217                }
218
219                else if ( fileName.endsWith(".Z")) {
220                        // unix compressed
221                        return openCompressedFile(f);
222
223                }
224
225                else {
226
227                        // no particular extension found, assume that it is an uncompressed file
228                        inputStream = getInputStreamFromFile(f);
229                }
230
231                return inputStream;
232        }
233
234
235        /**
236         * Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache.
237         *
238         * @param f
239         * @return
240         * @throws FileNotFoundException
241         */
242        private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{
243                InputStream stream = null;
244
245
246
247                if ( cacheRawFiles ){
248                        stream = FlatFileCache.getInputStream(f.getAbsolutePath());
249
250                        if ( stream == null){
251                                FlatFileCache.addToCache(f.getAbsolutePath(),f);
252                                stream = FlatFileCache.getInputStream(f.getAbsolutePath());
253                        }
254                }
255
256                if ( stream == null)
257                        stream = new FileInputStream(f);
258
259                return stream;
260        }
261
262
263        private InputStream openCompressedFile(File f)
264        throws IOException{
265
266                InputStream is           =  getInputStreamFromFile(f);
267                InputStream inputStream =  new UncompressInputStream(is);
268                return inputStream;
269        }
270
271        private InputStream openCompressedURL(URL u)
272        throws IOException{
273
274                InputStream is           =  u.openStream();
275                InputStream inputStream =  new UncompressInputStream(is);
276                return inputStream;
277        }
278
279
280        private InputStream openGZIPFile(File f)
281        throws IOException{
282
283                InputStream is      = getInputStreamFromFile(f);
284                InputStream inputStream = new GZIPInputStream(is);
285                return inputStream;
286        }
287
288        private InputStream openGZIPURL(URL u)
289        throws IOException{
290
291                InputStream is      = u.openStream();
292                InputStream inputStream = new GZIPInputStream(is);
293                return inputStream;
294        }
295}