001/* 002 * @(#)UncompressInputStream.java 0.3-3 06/05/2001 003 * 004 * This file is part of the HTTPClient package 005 * Copyright (C) 1996-2001 Ronald Tschalar 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this library; if not, write to the Free 019 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 020 * MA 02111-1307, USA 021 * 022 * For questions, suggestions, bug-reports, enhancement-requests etc. 023 * I may be contacted at: 024 * 025 * ronald@xxxxxxxxxxxxx 026 * 027 * The HTTPClient's home page is located at: 028 * 029 * http://www.innovation.ch/java/HTTPClient/ 030 */ 031 032package org.biojava.utils.io; 033 034import java.io.EOFException; 035import java.io.FileInputStream; 036import java.io.FileOutputStream; 037import java.io.FilterInputStream; 038import java.io.IOException; 039import java.io.InputStream; 040 041 042/** 043 * This class decompresses an input stream containing data compressed with 044 * the unix "compress" utility (LZC, a LZW variant). This code is based 045 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written 046 * by Peter Jannesen) and the original compress code. 047 * 048 * 049 * This version has been modified from the original 0.3-3 version by the 050 * Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor 051 * public and to fix a couple of bugs. 052 * Also: 053 * - markSupported() returns false 054 * - add uncompress() static method 055 * 056 * @version 0.3-3 06/05/2001 057 * @author Ronald Tschalar 058 * @author Unidata Program Center 059 * @author Richard Holland - making LZW_MAGIC package-visible. 060 */ 061public class UncompressInputStream extends FilterInputStream { 062 /** 063 * @param is the input stream to decompress 064 * @throws IOException if the header is malformed 065 */ 066 public UncompressInputStream(InputStream is) throws IOException { 067 super(is); 068 parse_header(); 069 } 070 071 072 byte[] one = new byte[1]; 073 074 public synchronized int read() throws IOException { 075 int b = read(one, 0, 1); 076 if (b == 1) 077 return (one[0] & 0xff); 078 else 079 return -1; 080 } 081 082 083 // string table stuff 084 private static final int TBL_CLEAR = 0x100; 085 private static final int TBL_FIRST = TBL_CLEAR + 1; 086 087 private int[] tab_prefix; 088 private byte[] tab_suffix; 089 private int[] zeros = new int[256]; 090 private byte[] stack; 091 092 // various state 093 private boolean block_mode; 094 private int n_bits; 095 private int maxbits; 096 private int maxmaxcode; 097 private int maxcode; 098 private int bitmask; 099 private int oldcode; 100 private byte finchar; 101 private int stackp; 102 private int free_ent; 103 104 // input buffer 105 private byte[] data = new byte[10000]; 106 private int bit_pos = 0, end = 0, got = 0; 107 private boolean eof = false; 108 private static final int EXTRA = 64; 109 110 111 public synchronized int read(byte[] buf, int off, int len) 112 throws IOException { 113 if (eof) return -1; 114 int start = off; 115 116/* Using local copies of various variables speeds things up by as 117 * much as 30% ! 118 */ 119 int[] l_tab_prefix = tab_prefix; 120 byte[] l_tab_suffix = tab_suffix; 121 byte[] l_stack = stack; 122 int l_n_bits = n_bits; 123 int l_maxcode = maxcode; 124 int l_maxmaxcode = maxmaxcode; 125 int l_bitmask = bitmask; 126 int l_oldcode = oldcode; 127 byte l_finchar = finchar; 128 int l_stackp = stackp; 129 int l_free_ent = free_ent; 130 byte[] l_data = data; 131 int l_bit_pos = bit_pos; 132 133 134// empty stack if stuff still left 135 136 int s_size = l_stack.length - l_stackp; 137 if (s_size > 0) { 138 int num = (s_size >= len) ? len : s_size; 139 System.arraycopy(l_stack, l_stackp, buf, off, num); 140 off += num; 141 len -= num; 142 l_stackp += num; 143 } 144 145 if (len == 0) { 146 stackp = l_stackp; 147 return off - start; 148 } 149 150 151// loop, filling local buffer until enough data has been decompressed 152 153 main_loop: do { 154 if (end < EXTRA) fill(); 155 156 int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 : 157 (end << 3) - (l_n_bits - 1); 158 159 while (l_bit_pos < bit_in) { 160 // handle 1-byte reads correctly 161 if (len == 0) { 162 n_bits = l_n_bits; 163 maxcode = l_maxcode; 164 maxmaxcode = l_maxmaxcode; 165 bitmask = l_bitmask; 166 oldcode = l_oldcode; 167 finchar = l_finchar; 168 stackp = l_stackp; 169 free_ent = l_free_ent; 170 bit_pos = l_bit_pos; 171 172 return off - start; 173 } 174 175 // check for code-width expansion 176 177 if (l_free_ent > l_maxcode) { 178 int n_bytes = l_n_bits << 3; 179 l_bit_pos = (l_bit_pos - 1) + 180 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes; 181 182 l_n_bits++; 183 l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode : 184 (1 << l_n_bits) - 1; 185 186 if (debug) 187 System.err.println("Code-width expanded to " + l_n_bits); 188 189 l_bitmask = (1 << l_n_bits) - 1; 190 l_bit_pos = resetbuf(l_bit_pos); 191 continue main_loop; 192 } 193 194 195 // read next code 196 197 int pos = l_bit_pos >> 3; 198 int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) | 199 ((l_data[pos + 2] & 0xFF) << 16)) 200 >> (l_bit_pos & 0x7)) & l_bitmask; 201 l_bit_pos += l_n_bits; 202 203 204 // handle first iteration 205 206 if (l_oldcode == -1) { 207 if (code >= 256) 208 throw new IOException("corrupt input: " + code + 209 " > 255"); 210 l_finchar = (byte) (l_oldcode = code); 211 buf[off++] = l_finchar; 212 len--; 213 continue; 214 } 215 216 217 // handle CLEAR code 218 219 if (code == TBL_CLEAR && block_mode) { 220 System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length); 221 l_free_ent = TBL_FIRST - 1; 222 223 int n_bytes = l_n_bits << 3; 224 l_bit_pos = (l_bit_pos - 1) + 225 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes; 226 l_n_bits = INIT_BITS; 227 l_maxcode = (1 << l_n_bits) - 1; 228 l_bitmask = l_maxcode; 229 230 if (debug) System.err.println("Code tables reset"); 231 232 l_bit_pos = resetbuf(l_bit_pos); 233 continue main_loop; 234 } 235 236 237 // setup 238 239 int incode = code; 240 l_stackp = l_stack.length; 241 242 243 // Handle KwK case 244 245 if (code >= l_free_ent) { 246 if (code > l_free_ent) 247 throw new IOException("corrupt input: code=" + code + 248 ", free_ent=" + l_free_ent); 249 250 l_stack[--l_stackp] = l_finchar; 251 code = l_oldcode; 252 } 253 254 255 // Generate output characters in reverse order 256 257 while (code >= 256) { 258 l_stack[--l_stackp] = l_tab_suffix[code]; 259 code = l_tab_prefix[code]; 260 } 261 l_finchar = l_tab_suffix[code]; 262 buf[off++] = l_finchar; 263 len--; 264 265 266 // And put them out in forward order 267 268 s_size = l_stack.length - l_stackp; 269 int num = (s_size >= len) ? len : s_size; 270 System.arraycopy(l_stack, l_stackp, buf, off, num); 271 off += num; 272 len -= num; 273 l_stackp += num; 274 275 276 // generate new entry in table 277 278 if (l_free_ent < l_maxmaxcode) { 279 l_tab_prefix[l_free_ent] = l_oldcode; 280 l_tab_suffix[l_free_ent] = l_finchar; 281 l_free_ent++; 282 } 283 284 285 // Remember previous code 286 287 l_oldcode = incode; 288 289 290 // if output buffer full, then return 291 292 if (len == 0) { 293 n_bits = l_n_bits; 294 maxcode = l_maxcode; 295 bitmask = l_bitmask; 296 oldcode = l_oldcode; 297 finchar = l_finchar; 298 stackp = l_stackp; 299 free_ent = l_free_ent; 300 bit_pos = l_bit_pos; 301 302 return off - start; 303 } 304 } 305 306 l_bit_pos = resetbuf(l_bit_pos); 307 } while (got > 0); 308 309 n_bits = l_n_bits; 310 maxcode = l_maxcode; 311 bitmask = l_bitmask; 312 oldcode = l_oldcode; 313 finchar = l_finchar; 314 stackp = l_stackp; 315 free_ent = l_free_ent; 316 bit_pos = l_bit_pos; 317 318 eof = true; 319 return off - start; 320 } 321 322 323 /** 324 * Moves the unread data in the buffer to the beginning and resets 325 * the pointers. 326 */ 327 private final int resetbuf(int bit_pos) { 328 int pos = bit_pos >> 3; 329 System.arraycopy(data, pos, data, 0, end - pos); 330 end -= pos; 331 return 0; 332 } 333 334 335 private final void fill() throws IOException { 336 got = in.read(data, end, data.length - 1 - end); 337 if (got > 0) end += got; 338 } 339 340 341 public synchronized long skip(long num) throws IOException { 342 byte[] tmp = new byte[(int) num]; 343 int got = read(tmp, 0, (int) num); 344 345 if (got > 0) 346 return (long) got; 347 else 348 return 0L; 349 } 350 351 352 public synchronized int available() throws IOException { 353 if (eof) return 0; 354 355 return in.available(); 356 } 357 358 359 static final int LZW_MAGIC = 0x1f9d; 360 private static final int MAX_BITS = 16; 361 private static final int INIT_BITS = 9; 362 private static final int HDR_MAXBITS = 0x1f; 363 private static final int HDR_EXTENDED = 0x20; 364 private static final int HDR_FREE = 0x40; 365 private static final int HDR_BLOCK_MODE = 0x80; 366 367 private void parse_header() throws IOException { 368// read in and check magic number 369 370 int t = in.read(); 371 if (t < 0) throw new EOFException("Failed to read magic number"); 372 int magic = (t & 0xff) << 8; 373 t = in.read(); 374 if (t < 0) throw new EOFException("Failed to read magic number"); 375 magic += t & 0xff; 376 if (magic != LZW_MAGIC) 377 throw new IOException("Input not in compress format (read " + 378 "magic number 0x" + 379 Integer.toHexString(magic) + ")"); 380 381 382// read in header byte 383 384 int header = in.read(); 385 if (header < 0) throw new EOFException("Failed to read header"); 386 387 block_mode = (header & HDR_BLOCK_MODE) > 0; 388 maxbits = header & HDR_MAXBITS; 389 390 if (maxbits > MAX_BITS) 391 throw new IOException("Stream compressed with " + maxbits + 392 " bits, but can only handle " + MAX_BITS + 393 " bits"); 394 395 if ((header & HDR_EXTENDED) > 0) 396 throw new IOException("Header extension bit set"); 397 398 if ((header & HDR_FREE) > 0) 399 throw new IOException("Header bit 6 set"); 400 401 if (debug) { 402 System.err.println("block mode: " + block_mode); 403 System.err.println("max bits: " + maxbits); 404 } 405 406 407// initialize stuff 408 409 maxmaxcode = 1 << maxbits; 410 n_bits = INIT_BITS; 411 maxcode = (1 << n_bits) - 1; 412 bitmask = maxcode; 413 oldcode = -1; 414 finchar = 0; 415 free_ent = block_mode ? TBL_FIRST : 256; 416 417 tab_prefix = new int[1 << maxbits]; 418 tab_suffix = new byte[1 << maxbits]; 419 stack = new byte[1 << maxbits]; 420 stackp = stack.length; 421 422 for (int idx = 255; idx >= 0; idx--) 423 tab_suffix[idx] = (byte) idx; 424 } 425 426 /** 427 * This stream does not support mark/reset on the stream. 428 * 429 * @return false 430 */ 431 public boolean markSupported() { 432 return false; 433 } 434 435 static public void uncompress( String fileInName, FileOutputStream out) throws IOException { 436 long start = System.currentTimeMillis(); 437 438 InputStream in = new UncompressInputStream( new FileInputStream(fileInName)); 439 440 int total = 0; 441 byte[] buffer = new byte[100000]; 442 while (true) { 443 int bytesRead = in.read(buffer); 444 if (bytesRead == -1) break; 445 out.write(buffer, 0, bytesRead); 446 total += bytesRead; 447 } 448 in.close(); 449 out.close(); 450 451 if (debugTiming) { 452 long end = System.currentTimeMillis(); 453 System.err.println("Decompressed " + total + " bytes"); 454 System.err.println("Time: " + (end - start) / 1000. + " seconds"); 455 } 456 } 457 458 459 private static final boolean debug = false, debugTiming = false; 460 461 public static void main(String args[]) throws Exception { 462 if (args.length != 1) { 463 System.err.println("Usage: UncompressInputStream <file>"); 464 System.exit(1); 465 } 466 467 InputStream in = 468 new UncompressInputStream(new FileInputStream(args[0])); 469 470 byte[] buf = new byte[100000]; 471 int tot = 0; 472 long beg = System.currentTimeMillis(); 473 474 while (true) { 475 int got = in.read(buf); 476 if (got < 0) break; 477 System.out.write(buf, 0, got); 478 tot += got; 479 } 480 481 long end = System.currentTimeMillis(); 482 System.err.println("Decompressed " + tot + " bytes"); 483 System.err.println("Time: " + (end - beg) / 1000. + " seconds"); 484 } 485}