001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021/*
022 * @(#)UncompressInputStream.java           0.3-3 06/05/2001
023 *
024 *  This file is part of the HTTPClient package
025 *  Copyright (C) 1996-2001 Ronald Tschalar
026 *
027 *  This library is free software; you can redistribute it and/or
028 *  modify it under the terms of the GNU Lesser General Public
029 *  License as published by the Free Software Foundation; either
030 *  version 2 of the License, or (at your option) any later version.
031 *
032 *  This library is distributed in the hope that it will be useful,
033 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
034 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
035 *  Lesser General Public License for more details.
036 *
037 *  You should have received a copy of the GNU Lesser General Public
038 *  License along with this library; if not, write to the Free
039 *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
040 *  MA 02111-1307, USA
041 *
042 *  For questions, suggestions, bug-reports, enhancement-requests etc.
043 *  I may be contacted at:
044 *
045 *  ronald@xxxxxxxxxxxxx
046 *
047 *  The HTTPClient's home page is located at:
048 *
049 *  http://www.innovation.ch/java/HTTPClient/
050 */
051
052package org.biojava.nbio.core.util;
053
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057import java.io.*;
058
059
060/**
061 * This class decompresses an input stream containing data compressed with
062 * the unix "compress" utility (LZC, a LZW variant). This code is based
063 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written
064 * by Peter Jannesen) and the original compress code.
065 *
066 *
067 *  This version has been modified from the original 0.3-3 version by the
068 *  Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor
069 *  public and to fix a couple of bugs.
070 *  Also:
071 *   - markSupported() returns false
072 *   - add uncompress() static method
073 *
074 * @version 0.3-3 06/05/2001
075 * @author  Ronald Tschalar
076 * @author  Unidata Program Center
077 * @author  Richard Holland - making LZW_MAGIC package-visible.
078 */
079public class UncompressInputStream extends FilterInputStream {
080
081        private final static Logger logger = LoggerFactory.getLogger(UncompressInputStream.class);
082
083        /**
084         * @param is the input stream to decompress
085         * @throws IOException if the header is malformed
086         */
087        public UncompressInputStream(InputStream is) throws IOException {
088                super(is);
089                parse_header();
090        }
091
092
093        byte[] one = new byte[1];
094
095        @Override
096public synchronized int read() throws IOException {
097                int b = read(one, 0, 1);
098                if (b == 1)
099                        return (one[0] & 0xff);
100                else
101                        return -1;
102        }
103
104
105        // string table stuff
106        private static final int TBL_CLEAR = 0x100;
107        private static final int TBL_FIRST = TBL_CLEAR + 1;
108
109        private int[] tab_prefix;
110        private byte[] tab_suffix;
111        private int[] zeros = new int[256];
112        private byte[] stack;
113
114        // various state
115        private boolean block_mode;
116        private int n_bits;
117        private int maxbits;
118        private int maxmaxcode;
119        private int maxcode;
120        private int bitmask;
121        private int oldcode;
122        private byte finchar;
123        private int stackp;
124        private int free_ent;
125
126        // input buffer
127        private byte[] data = new byte[10000];
128        private int bit_pos = 0, end = 0, got = 0;
129        private boolean eof = false;
130        private static final int EXTRA = 64;
131
132
133        @Override
134public synchronized int read(byte[] buf, int off, int len)
135                        throws IOException {
136                if (eof) return -1;
137                int start = off;
138
139/* Using local copies of various variables speeds things up by as
140                 * much as 30% !
141                 */
142                int[] l_tab_prefix = tab_prefix;
143                byte[] l_tab_suffix = tab_suffix;
144                byte[] l_stack = stack;
145                int l_n_bits = n_bits;
146                int l_maxcode = maxcode;
147                int l_maxmaxcode = maxmaxcode;
148                int l_bitmask = bitmask;
149                int l_oldcode = oldcode;
150                byte l_finchar = finchar;
151                int l_stackp = stackp;
152                int l_free_ent = free_ent;
153                byte[] l_data = data;
154                int l_bit_pos = bit_pos;
155
156
157// empty stack if stuff still left
158
159                int s_size = l_stack.length - l_stackp;
160                if (s_size > 0) {
161                        int num = (s_size >= len) ? len : s_size;
162                        System.arraycopy(l_stack, l_stackp, buf, off, num);
163                        off += num;
164                        len -= num;
165                        l_stackp += num;
166                }
167
168                if (len == 0) {
169                        stackp = l_stackp;
170                        return off - start;
171                }
172
173
174// loop, filling local buffer until enough data has been decompressed
175
176                main_loop: do {
177                        if (end < EXTRA) fill();
178
179                        int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
180                                        (end << 3) - (l_n_bits - 1);
181
182                        while (l_bit_pos < bit_in) {
183                                // handle 1-byte reads correctly
184                                if (len == 0) {
185                                        n_bits = l_n_bits;
186                                        maxcode = l_maxcode;
187                                        maxmaxcode = l_maxmaxcode;
188                                        bitmask = l_bitmask;
189                                        oldcode = l_oldcode;
190                                        finchar = l_finchar;
191                                        stackp = l_stackp;
192                                        free_ent = l_free_ent;
193                                        bit_pos = l_bit_pos;
194
195                                        return off - start;
196                                }
197
198                                // check for code-width expansion
199
200                                if (l_free_ent > l_maxcode) {
201                                        int n_bytes = l_n_bits << 3;
202                                        l_bit_pos = (l_bit_pos - 1) +
203                                                        n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
204
205                                        l_n_bits++;
206                                        l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode :
207                                                        (1 << l_n_bits) - 1;
208
209                                        logger.debug("Code-width expanded to ", l_n_bits);
210
211                                        l_bitmask = (1 << l_n_bits) - 1;
212                                        l_bit_pos = resetbuf(l_bit_pos);
213                                        continue main_loop;
214                                }
215
216
217                                // read next code
218
219                                int pos = l_bit_pos >> 3;
220                                int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) |
221                                                ((l_data[pos + 2] & 0xFF) << 16))
222                                                >> (l_bit_pos & 0x7)) & l_bitmask;
223                                l_bit_pos += l_n_bits;
224
225
226                                // handle first iteration
227
228                                if (l_oldcode == -1) {
229                                        if (code >= 256)
230                                                throw new IOException("corrupt input: " + code +
231                                                                " > 255");
232                                        l_finchar = (byte) (l_oldcode = code);
233                                        buf[off++] = l_finchar;
234                                        len--;
235                                        continue;
236                                }
237
238
239                                // handle CLEAR code
240
241                                if (code == TBL_CLEAR && block_mode) {
242                                        System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length);
243                                        l_free_ent = TBL_FIRST - 1;
244
245                                        int n_bytes = l_n_bits << 3;
246                                        l_bit_pos = (l_bit_pos - 1) +
247                                                        n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
248                                        l_n_bits = INIT_BITS;
249                                        l_maxcode = (1 << l_n_bits) - 1;
250                                        l_bitmask = l_maxcode;
251
252                                        logger.debug("Code tables reset");
253
254                                        l_bit_pos = resetbuf(l_bit_pos);
255                                        continue main_loop;
256                                }
257
258
259                                // setup
260
261                                int incode = code;
262                                l_stackp = l_stack.length;
263
264
265                                // Handle KwK case
266
267                                if (code >= l_free_ent) {
268                                        if (code > l_free_ent)
269                                                throw new IOException("corrupt input: code=" + code +
270                                                                ", free_ent=" + l_free_ent);
271
272                                        l_stack[--l_stackp] = l_finchar;
273                                        code = l_oldcode;
274                                }
275
276
277                                // Generate output characters in reverse order
278
279                                while (code >= 256) {
280                                        l_stack[--l_stackp] = l_tab_suffix[code];
281                                        code = l_tab_prefix[code];
282                                }
283                                l_finchar = l_tab_suffix[code];
284                                buf[off++] = l_finchar;
285                                len--;
286
287
288                                // And put them out in forward order
289
290                                s_size = l_stack.length - l_stackp;
291                                int num = (s_size >= len) ? len : s_size;
292                                System.arraycopy(l_stack, l_stackp, buf, off, num);
293                                off += num;
294                                len -= num;
295                                l_stackp += num;
296
297
298                                // generate new entry in table
299
300                                if (l_free_ent < l_maxmaxcode) {
301                                        l_tab_prefix[l_free_ent] = l_oldcode;
302                                        l_tab_suffix[l_free_ent] = l_finchar;
303                                        l_free_ent++;
304                                }
305
306
307                                // Remember previous code
308
309                                l_oldcode = incode;
310
311
312                                // if output buffer full, then return
313
314                                if (len == 0) {
315                                        n_bits = l_n_bits;
316                                        maxcode = l_maxcode;
317                                        bitmask = l_bitmask;
318                                        oldcode = l_oldcode;
319                                        finchar = l_finchar;
320                                        stackp = l_stackp;
321                                        free_ent = l_free_ent;
322                                        bit_pos = l_bit_pos;
323
324                                        return off - start;
325                                }
326                        }
327
328                        l_bit_pos = resetbuf(l_bit_pos);
329                } while (got > 0);
330
331                n_bits = l_n_bits;
332                maxcode = l_maxcode;
333                bitmask = l_bitmask;
334                oldcode = l_oldcode;
335                finchar = l_finchar;
336                stackp = l_stackp;
337                free_ent = l_free_ent;
338                bit_pos = l_bit_pos;
339
340                eof = true;
341                return off - start;
342        }
343
344
345        /**
346         * Moves the unread data in the buffer to the beginning and resets
347         * the pointers.
348         */
349        private final int resetbuf(int bit_pos) {
350                int pos = bit_pos >> 3;
351                System.arraycopy(data, pos, data, 0, end - pos);
352                end -= pos;
353                return 0;
354        }
355
356
357        private final void fill() throws IOException {
358                got = in.read(data, end, data.length - 1 - end);
359                if (got > 0) end += got;
360        }
361
362
363        @Override
364public synchronized long skip(long num) throws IOException {
365                byte[] tmp = new byte[(int) num];
366                int got = read(tmp, 0, (int) num);
367
368                if (got > 0)
369                        return got;
370                else
371                        return 0L;
372        }
373
374
375        @Override
376public synchronized int available() throws IOException {
377                if (eof) return 0;
378
379                return in.available();
380        }
381
382
383        static final int LZW_MAGIC = 0x1f9d;
384        private static final int MAX_BITS = 16;
385        private static final int INIT_BITS = 9;
386        private static final int HDR_MAXBITS = 0x1f;
387        private static final int HDR_EXTENDED = 0x20;
388        private static final int HDR_FREE = 0x40;
389        private static final int HDR_BLOCK_MODE = 0x80;
390
391        private void parse_header() throws IOException {
392// read in and check magic number
393
394                int t = in.read();
395                if (t < 0) throw new EOFException("Failed to read magic number");
396                int magic = (t & 0xff) << 8;
397                t = in.read();
398                if (t < 0) throw new EOFException("Failed to read magic number");
399                magic += t & 0xff;
400                if (magic != LZW_MAGIC)
401                        throw new IOException("Input not in compress format (read " +
402                                        "magic number 0x" +
403                                        Integer.toHexString(magic) + ")");
404
405
406// read in header byte
407
408                int header = in.read();
409                if (header < 0) throw new EOFException("Failed to read header");
410
411                block_mode = (header & HDR_BLOCK_MODE) > 0;
412                maxbits = header & HDR_MAXBITS;
413
414                if (maxbits > MAX_BITS)
415                        throw new IOException("Stream compressed with " + maxbits +
416                                        " bits, but can only handle " + MAX_BITS +
417                                        " bits");
418
419                if ((header & HDR_EXTENDED) > 0)
420                        throw new IOException("Header extension bit set");
421
422                if ((header & HDR_FREE) > 0)
423                        throw new IOException("Header bit 6 set");
424
425                logger.debug("block mode: {}", block_mode);
426                logger.debug("max bits:   {}", maxbits);
427
428
429// initialize stuff
430
431                maxmaxcode = 1 << maxbits;
432                n_bits = INIT_BITS;
433                maxcode = (1 << n_bits) - 1;
434                bitmask = maxcode;
435                oldcode = -1;
436                finchar = 0;
437                free_ent = block_mode ? TBL_FIRST : 256;
438
439                tab_prefix = new int[1 << maxbits];
440                tab_suffix = new byte[1 << maxbits];
441                stack = new byte[1 << maxbits];
442                stackp = stack.length;
443
444                for (int idx = 255; idx >= 0; idx--)
445                        tab_suffix[idx] = (byte) idx;
446        }
447
448        /**
449         * This stream does not support mark/reset on the stream.
450         *
451         * @return false
452         */
453        @Override
454public boolean markSupported() {
455                return false;
456        }
457
458        static public void uncompress( String fileInName, FileOutputStream out) throws IOException {
459                long start = System.currentTimeMillis();
460
461                InputStream in = new UncompressInputStream(  new FileInputStream(fileInName));
462
463        //  int total = 0;
464                byte[] buffer = new byte[100000];
465                while (true) {
466                        int bytesRead = in.read(buffer);
467                        if (bytesRead == -1) break;
468                        out.write(buffer, 0, bytesRead);
469         //   total += bytesRead;
470                }
471                in.close();
472                out.close();
473
474                if (debugTiming) {
475                        long end = System.currentTimeMillis();
476                //  logger.debug("Decompressed " + total + " bytes");
477                        logger.warn("Time: {} seconds", (end - start) / 1000);
478                }
479        }
480
481
482        private static final boolean debugTiming = false;
483
484        public static void main(String args[]) throws Exception {
485                if (args.length != 1) {
486                        logger.info("Usage: UncompressInputStream <file>");
487                        System.exit(1);
488                }
489
490                InputStream in =
491                                new UncompressInputStream(new FileInputStream(args[0]));
492
493                byte[] buf = new byte[100000];
494                int tot = 0;
495                long beg = System.currentTimeMillis();
496
497                while (true) {
498                        int got = in.read(buf);
499                        if (got < 0) break;
500                        System.out.write(buf, 0, got);
501                        tot += got;
502                }
503
504                long end = System.currentTimeMillis();
505                logger.info("Decompressed {} bytes", tot);
506                logger.info("Time: {} seconds", (end - beg) / 1000);
507                in.close();
508        }
509}