001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Dec 28, 2005
021 *
022 */
023package org.biojava.nbio.core.util;
024
025import java.io.*;
026import java.net.URISyntaxException;
027import java.net.URL;
028import java.util.Enumeration;
029import java.util.jar.JarEntry;
030import java.util.jar.JarFile;
031import java.util.zip.GZIPInputStream;
032import java.util.zip.ZipEntry;
033import java.util.zip.ZipFile;
034
035//import org.slf4j.Logger;
036//import org.slf4j.LoggerFactory;
037
038
039/** A class that provides an InputStream from a File. The file can be compressed or uncompressed.
040 *
041 * Currently supported
042 * compressions:
043 * <ul>
044 * <li>Gzip (extension .gz)</li>
045 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li>
046 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li>
047 * <li>Z (extension .Z) compressed using the unix compress command </li>
048 * <li>for any other extension, no compression is assumed </li>
049 * </ul>
050 *
051 *
052 * @author Andreas Prlic
053 * @since 1.5
054 * @version %I% %G%
055 *
056 */
057public class InputStreamProvider {
058
059        //private final static Logger logger = LoggerFactory.getLogger(InputStreamProvider.class);
060
061        /**
062         * The magic number found at the start of a GZIP stream.
063         */
064        public static final int GZIP_MAGIC = 0x1f8b;
065        public static final String CACHE_PROPERTY = "biojava.cache.files";
066
067        private boolean cacheRawFiles ;
068
069        public InputStreamProvider() {
070                super();
071                cacheRawFiles = false;
072
073                String prop = System.getProperty(CACHE_PROPERTY);
074                if ( "true".equals(prop)) {
075                        cacheRawFiles = true;
076
077                }
078
079        }
080
081        /**
082         * Get an InputStream for given file path.
083         * The caller is responsible for closing the stream or otherwise
084         * a resource leak can occur.
085         * @param pathToFile the path of the file.
086         * @return an InputStream for the file located at the path.
087         * @throws IOException
088         */
089        public InputStream getInputStream(String pathToFile)
090        throws IOException
091        {
092                File f = new File(pathToFile);
093                return getInputStream(f);
094        }
095
096
097        /** open the file and read the magic number from the beginning
098         * this is used to determine the compression type
099         *
100         * @param in an input stream to read from
101         * @return the magic number
102         * @throws IOException
103         */
104        private int getMagicNumber(InputStream in)
105        throws IOException {
106
107
108                int t = in.read();
109                if (t < 0) throw new EOFException("Failed to read magic number");
110                int magic = (t & 0xff) << 8;
111                t = in.read();
112                if (t < 0) throw new EOFException("Failed to read magic number");
113                magic += t & 0xff;
114
115                return magic;
116        }
117
118
119        public InputStream getInputStream(URL u)
120        throws IOException{
121                
122                if ("file".equals(u.getProtocol())) {
123                        try {
124                                return getInputStream(new File(u.toURI().getPath()));
125                        } catch (URISyntaxException e) {
126                                throw new RuntimeException(e);
127                        }
128                }
129
130                int magic = 0;
131
132
133                InputStream inStream = u.openStream();
134                magic = getMagicNumber(inStream);
135                inStream.close();
136
137
138                if (magic == UncompressInputStream.LZW_MAGIC ) {
139                        // a Z compressed file
140                        return openCompressedURL(u);
141                } else if (magic == GZIP_MAGIC ) {
142                        return openGZIPURL(u);
143                } else if ( u.getPath().endsWith(".gz")) {
144                        return openGZIPURL(u);
145                } else if ( u.getPath().endsWith(".Z")) {
146                        // unix compressed
147                        return openCompressedURL(u);
148
149                } else {
150                        inStream = u.openStream();
151                        return inStream;
152                }
153
154        }
155
156
157        /**
158         * Get an InputStream for the file.
159         * The caller is responsible for closing the stream or otherwise
160         * a resource leak can occur.
161         * @param f a File
162         * @return an InputStream for the file
163         * @throws IOException
164         */
165        public  InputStream getInputStream(File f)
166        throws IOException
167        {
168
169                // use the magic numbers to determine the compression type,
170                // use file extension only as 2nd choice
171
172                int magic = 0;
173
174
175                InputStream test = getInputStreamFromFile(f);
176                magic = getMagicNumber(test);
177                test.close();
178
179
180                InputStream inputStream = null;
181
182                String fileName = f.getName();
183
184                if (magic == UncompressInputStream.LZW_MAGIC ) {
185                        // a Z compressed file
186                        return openCompressedFile(f);
187                }
188
189                else if (magic == GZIP_MAGIC ) {
190                        return openGZIPFile(f);
191                }
192
193                else if ( fileName.endsWith(".gz")) {
194                        return openGZIPFile(f);
195                }
196
197                else if ( fileName.endsWith(".zip")){
198
199                        ZipFile zipfile = new ZipFile(f);
200
201                        // stream to first entry is returned ...
202                        ZipEntry entry;
203                        Enumeration<? extends ZipEntry> e = zipfile.entries();
204                        if ( e.hasMoreElements()){
205                                entry = e.nextElement();
206                                inputStream = zipfile.getInputStream(entry);
207                        } else {
208                                throw new IOException ("Zip file has no entries");
209                        }
210
211                }
212
213                else if ( fileName.endsWith(".jar")) {
214
215                        JarFile jarFile = new JarFile(f);
216
217                        // stream to first entry is returned
218                        JarEntry entry;
219                        Enumeration<JarEntry> e = jarFile.entries();
220                        if ( e.hasMoreElements()){
221                                entry = e.nextElement();
222                                inputStream = jarFile.getInputStream(entry);
223                        } else {
224                                throw new IOException ("Jar file has no entries");
225                        }
226                }
227
228                else if ( fileName.endsWith(".Z")) {
229                        // unix compressed
230                        return openCompressedFile(f);
231
232                }
233
234                else {
235
236                        // no particular extension found, assume that it is an uncompressed file
237                        inputStream = getInputStreamFromFile(f);
238                }
239
240                return inputStream;
241        }
242
243
244        /**
245         * Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache.
246         *
247         * @param f
248         * @return
249         * @throws FileNotFoundException
250         */
251        private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{
252                InputStream stream = null;
253
254
255
256                if ( cacheRawFiles ){
257                        stream = FlatFileCache.getInputStream(f.getAbsolutePath());
258
259                        if ( stream == null){
260                                FlatFileCache.addToCache(f.getAbsolutePath(),f);
261                                stream = FlatFileCache.getInputStream(f.getAbsolutePath());
262                        }
263                }
264
265                if ( stream == null)
266                        stream = new FileInputStream(f);
267
268                return stream;
269        }
270
271
272        private InputStream openCompressedFile(File f)
273        throws IOException{
274
275                InputStream is           =  getInputStreamFromFile(f);
276                InputStream inputStream =  new UncompressInputStream(is);
277                return inputStream;
278        }
279
280        private InputStream openCompressedURL(URL u)
281        throws IOException{
282
283                InputStream is           =  u.openStream();
284                InputStream inputStream =  new UncompressInputStream(is);
285                return inputStream;
286        }
287
288
289        private InputStream openGZIPFile(File f)
290        throws IOException{
291
292                InputStream is      = getInputStreamFromFile(f);
293                InputStream inputStream = new GZIPInputStream(is);
294                return inputStream;
295        }
296
297        private InputStream openGZIPURL(URL u)
298        throws IOException{
299
300                InputStream is      = u.openStream();
301                InputStream inputStream = new GZIPInputStream(is);
302                return inputStream;
303        }
304}