001/*
002 * @(#)UncompressInputStream.java           0.3-3 06/05/2001
003 *
004 *  This file is part of the HTTPClient package
005 *  Copyright (C) 1996-2001 Ronald Tschalar
006 *
007 *  This library is free software; you can redistribute it and/or
008 *  modify it under the terms of the GNU Lesser General Public
009 *  License as published by the Free Software Foundation; either
010 *  version 2 of the License, or (at your option) any later version.
011 *
012 *  This library is distributed in the hope that it will be useful,
013 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
014 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015 *  Lesser General Public License for more details.
016 *
017 *  You should have received a copy of the GNU Lesser General Public
018 *  License along with this library; if not, write to the Free
019 *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
020 *  MA 02111-1307, USA
021 *
022 *  For questions, suggestions, bug-reports, enhancement-requests etc.
023 *  I may be contacted at:
024 *
025 *  ronald@xxxxxxxxxxxxx
026 *
027 *  The HTTPClient's home page is located at:
028 *
029 *  http://www.innovation.ch/java/HTTPClient/
030 */
031
032package org.biojava.utils.io;
033
034import java.io.EOFException;
035import java.io.FileInputStream;
036import java.io.FileOutputStream;
037import java.io.FilterInputStream;
038import java.io.IOException;
039import java.io.InputStream;
040
041
042/**
043 * This class decompresses an input stream containing data compressed with
044 * the unix "compress" utility (LZC, a LZW variant). This code is based
045 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written
046 * by Peter Jannesen) and the original compress code.
047 *
048 *
049 *  This version has been modified from the original 0.3-3 version by the
050 *  Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor
051 *  public and to fix a couple of bugs.
052 *  Also:
053 *   - markSupported() returns false
054 *   - add uncompress() static method
055 *
056 * @version 0.3-3 06/05/2001
057 * @author  Ronald Tschalar
058 * @author  Unidata Program Center
059 * @author  Richard Holland - making LZW_MAGIC package-visible.
060 */
061public class UncompressInputStream extends FilterInputStream {
062  /**
063   * @param is the input stream to decompress
064   * @throws IOException if the header is malformed
065   */
066  public UncompressInputStream(InputStream is) throws IOException {
067    super(is);
068    parse_header();
069  }
070
071
072  byte[] one = new byte[1];
073
074  public synchronized int read() throws IOException {
075    int b = read(one, 0, 1);
076    if (b == 1)
077      return (one[0] & 0xff);
078    else
079      return -1;
080  }
081
082
083  // string table stuff
084  private static final int TBL_CLEAR = 0x100;
085  private static final int TBL_FIRST = TBL_CLEAR + 1;
086
087  private int[] tab_prefix;
088  private byte[] tab_suffix;
089  private int[] zeros = new int[256];
090  private byte[] stack;
091
092  // various state
093  private boolean block_mode;
094  private int n_bits;
095  private int maxbits;
096  private int maxmaxcode;
097  private int maxcode;
098  private int bitmask;
099  private int oldcode;
100  private byte finchar;
101  private int stackp;
102  private int free_ent;
103
104  // input buffer
105  private byte[] data = new byte[10000];
106  private int bit_pos = 0, end = 0, got = 0;
107  private boolean eof = false;
108  private static final int EXTRA = 64;
109
110
111  public synchronized int read(byte[] buf, int off, int len)
112      throws IOException {
113    if (eof) return -1;
114    int start = off;
115
116/* Using local copies of various variables speeds things up by as
117     * much as 30% !
118     */
119    int[] l_tab_prefix = tab_prefix;
120    byte[] l_tab_suffix = tab_suffix;
121    byte[] l_stack = stack;
122    int l_n_bits = n_bits;
123    int l_maxcode = maxcode;
124    int l_maxmaxcode = maxmaxcode;
125    int l_bitmask = bitmask;
126    int l_oldcode = oldcode;
127    byte l_finchar = finchar;
128    int l_stackp = stackp;
129    int l_free_ent = free_ent;
130    byte[] l_data = data;
131    int l_bit_pos = bit_pos;
132
133
134// empty stack if stuff still left
135
136    int s_size = l_stack.length - l_stackp;
137    if (s_size > 0) {
138      int num = (s_size >= len) ? len : s_size;
139      System.arraycopy(l_stack, l_stackp, buf, off, num);
140      off += num;
141      len -= num;
142      l_stackp += num;
143    }
144
145    if (len == 0) {
146      stackp = l_stackp;
147      return off - start;
148    }
149
150
151// loop, filling local buffer until enough data has been decompressed
152
153    main_loop: do {
154      if (end < EXTRA) fill();
155
156      int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
157          (end << 3) - (l_n_bits - 1);
158
159      while (l_bit_pos < bit_in) {
160        // handle 1-byte reads correctly
161        if (len == 0) {
162          n_bits = l_n_bits;
163          maxcode = l_maxcode;
164          maxmaxcode = l_maxmaxcode;
165          bitmask = l_bitmask;
166          oldcode = l_oldcode;
167          finchar = l_finchar;
168          stackp = l_stackp;
169          free_ent = l_free_ent;
170          bit_pos = l_bit_pos;
171
172          return off - start;
173        }
174
175        // check for code-width expansion
176
177        if (l_free_ent > l_maxcode) {
178          int n_bytes = l_n_bits << 3;
179          l_bit_pos = (l_bit_pos - 1) +
180              n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
181
182          l_n_bits++;
183          l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode :
184              (1 << l_n_bits) - 1;
185
186          if (debug)
187            System.err.println("Code-width expanded to " + l_n_bits);
188
189          l_bitmask = (1 << l_n_bits) - 1;
190          l_bit_pos = resetbuf(l_bit_pos);
191          continue main_loop;
192        }
193
194
195        // read next code
196
197        int pos = l_bit_pos >> 3;
198        int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) |
199            ((l_data[pos + 2] & 0xFF) << 16))
200            >> (l_bit_pos & 0x7)) & l_bitmask;
201        l_bit_pos += l_n_bits;
202
203
204        // handle first iteration
205
206        if (l_oldcode == -1) {
207          if (code >= 256)
208            throw new IOException("corrupt input: " + code +
209                " > 255");
210          l_finchar = (byte) (l_oldcode = code);
211          buf[off++] = l_finchar;
212          len--;
213          continue;
214        }
215
216
217        // handle CLEAR code
218
219        if (code == TBL_CLEAR && block_mode) {
220          System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length);
221          l_free_ent = TBL_FIRST - 1;
222
223          int n_bytes = l_n_bits << 3;
224          l_bit_pos = (l_bit_pos - 1) +
225              n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
226          l_n_bits = INIT_BITS;
227          l_maxcode = (1 << l_n_bits) - 1;
228          l_bitmask = l_maxcode;
229
230          if (debug) System.err.println("Code tables reset");
231
232          l_bit_pos = resetbuf(l_bit_pos);
233          continue main_loop;
234        }
235
236
237        // setup
238
239        int incode = code;
240        l_stackp = l_stack.length;
241
242
243        // Handle KwK case
244
245        if (code >= l_free_ent) {
246          if (code > l_free_ent)
247            throw new IOException("corrupt input: code=" + code +
248                ", free_ent=" + l_free_ent);
249
250          l_stack[--l_stackp] = l_finchar;
251          code = l_oldcode;
252        }
253
254
255        // Generate output characters in reverse order
256
257        while (code >= 256) {
258          l_stack[--l_stackp] = l_tab_suffix[code];
259          code = l_tab_prefix[code];
260        }
261        l_finchar = l_tab_suffix[code];
262        buf[off++] = l_finchar;
263        len--;
264
265
266        // And put them out in forward order
267
268        s_size = l_stack.length - l_stackp;
269        int num = (s_size >= len) ? len : s_size;
270        System.arraycopy(l_stack, l_stackp, buf, off, num);
271        off += num;
272        len -= num;
273        l_stackp += num;
274
275
276        // generate new entry in table
277
278        if (l_free_ent < l_maxmaxcode) {
279          l_tab_prefix[l_free_ent] = l_oldcode;
280          l_tab_suffix[l_free_ent] = l_finchar;
281          l_free_ent++;
282        }
283
284
285        // Remember previous code
286
287        l_oldcode = incode;
288
289
290        // if output buffer full, then return
291
292        if (len == 0) {
293          n_bits = l_n_bits;
294          maxcode = l_maxcode;
295          bitmask = l_bitmask;
296          oldcode = l_oldcode;
297          finchar = l_finchar;
298          stackp = l_stackp;
299          free_ent = l_free_ent;
300          bit_pos = l_bit_pos;
301
302          return off - start;
303        }
304      }
305
306      l_bit_pos = resetbuf(l_bit_pos);
307    } while (got > 0);
308
309    n_bits = l_n_bits;
310    maxcode = l_maxcode;
311    bitmask = l_bitmask;
312    oldcode = l_oldcode;
313    finchar = l_finchar;
314    stackp = l_stackp;
315    free_ent = l_free_ent;
316    bit_pos = l_bit_pos;
317
318    eof = true;
319    return off - start;
320  }
321
322
323  /**
324   * Moves the unread data in the buffer to the beginning and resets
325   * the pointers.
326   */
327  private final int resetbuf(int bit_pos) {
328    int pos = bit_pos >> 3;
329    System.arraycopy(data, pos, data, 0, end - pos);
330    end -= pos;
331    return 0;
332  }
333
334
335  private final void fill() throws IOException {
336    got = in.read(data, end, data.length - 1 - end);
337    if (got > 0) end += got;
338  }
339
340
341  public synchronized long skip(long num) throws IOException {
342    byte[] tmp = new byte[(int) num];
343    int got = read(tmp, 0, (int) num);
344
345    if (got > 0)
346      return (long) got;
347    else
348      return 0L;
349  }
350
351
352  public synchronized int available() throws IOException {
353    if (eof) return 0;
354
355    return in.available();
356  }
357
358
359  static final int LZW_MAGIC = 0x1f9d;
360  private static final int MAX_BITS = 16;
361  private static final int INIT_BITS = 9;
362  private static final int HDR_MAXBITS = 0x1f;
363  private static final int HDR_EXTENDED = 0x20;
364  private static final int HDR_FREE = 0x40;
365  private static final int HDR_BLOCK_MODE = 0x80;
366
367  private void parse_header() throws IOException {
368// read in and check magic number
369
370    int t = in.read();
371    if (t < 0) throw new EOFException("Failed to read magic number");
372    int magic = (t & 0xff) << 8;
373    t = in.read();
374    if (t < 0) throw new EOFException("Failed to read magic number");
375    magic += t & 0xff;
376    if (magic != LZW_MAGIC)
377      throw new IOException("Input not in compress format (read " +
378          "magic number 0x" +
379          Integer.toHexString(magic) + ")");
380
381
382// read in header byte
383
384    int header = in.read();
385    if (header < 0) throw new EOFException("Failed to read header");
386
387    block_mode = (header & HDR_BLOCK_MODE) > 0;
388    maxbits = header & HDR_MAXBITS;
389
390    if (maxbits > MAX_BITS)
391      throw new IOException("Stream compressed with " + maxbits +
392          " bits, but can only handle " + MAX_BITS +
393          " bits");
394
395    if ((header & HDR_EXTENDED) > 0)
396      throw new IOException("Header extension bit set");
397
398    if ((header & HDR_FREE) > 0)
399      throw new IOException("Header bit 6 set");
400
401    if (debug) {
402      System.err.println("block mode: " + block_mode);
403      System.err.println("max bits:   " + maxbits);
404    }
405
406
407// initialize stuff
408
409    maxmaxcode = 1 << maxbits;
410    n_bits = INIT_BITS;
411    maxcode = (1 << n_bits) - 1;
412    bitmask = maxcode;
413    oldcode = -1;
414    finchar = 0;
415    free_ent = block_mode ? TBL_FIRST : 256;
416
417    tab_prefix = new int[1 << maxbits];
418    tab_suffix = new byte[1 << maxbits];
419    stack = new byte[1 << maxbits];
420    stackp = stack.length;
421
422    for (int idx = 255; idx >= 0; idx--)
423      tab_suffix[idx] = (byte) idx;
424  }
425
426  /**
427   * This stream does not support mark/reset on the stream.
428   *
429   * @return false
430   */
431  public boolean markSupported() {
432    return false;
433  }
434
435  static public void uncompress( String fileInName, FileOutputStream out) throws IOException {
436    long start = System.currentTimeMillis();
437
438    InputStream in = new UncompressInputStream(  new FileInputStream(fileInName));
439
440    int total = 0;
441    byte[] buffer = new byte[100000];
442    while (true) {
443      int bytesRead = in.read(buffer);
444      if (bytesRead == -1) break;
445      out.write(buffer, 0, bytesRead);
446      total += bytesRead;
447    }
448    in.close();
449    out.close();
450
451    if (debugTiming) {
452      long end = System.currentTimeMillis();
453      System.err.println("Decompressed " + total + " bytes");
454      System.err.println("Time: " + (end - start) / 1000. + " seconds");
455    }
456  }
457
458
459  private static final boolean debug = false, debugTiming = false;
460
461  public static void main(String args[]) throws Exception {
462    if (args.length != 1) {
463      System.err.println("Usage: UncompressInputStream <file>");
464      System.exit(1);
465    }
466
467    InputStream in =
468        new UncompressInputStream(new FileInputStream(args[0]));
469
470    byte[] buf = new byte[100000];
471    int tot = 0;
472    long beg = System.currentTimeMillis();
473
474    while (true) {
475      int got = in.read(buf);
476      if (got < 0) break;
477      System.out.write(buf, 0, got);
478      tot += got;
479    }
480
481    long end = System.currentTimeMillis();
482    System.err.println("Decompressed " + tot + " bytes");
483    System.err.println("Time: " + (end - beg) / 1000. + " seconds");
484  }
485}