001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021/* 022 * @(#)UncompressInputStream.java 0.3-3 06/05/2001 023 * 024 * This file is part of the HTTPClient package 025 * Copyright (C) 1996-2001 Ronald Tschalar 026 * 027 * This library is free software; you can redistribute it and/or 028 * modify it under the terms of the GNU Lesser General Public 029 * License as published by the Free Software Foundation; either 030 * version 2 of the License, or (at your option) any later version. 031 * 032 * This library is distributed in the hope that it will be useful, 033 * but WITHOUT ANY WARRANTY; without even the implied warranty of 034 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 035 * Lesser General Public License for more details. 036 * 037 * You should have received a copy of the GNU Lesser General Public 038 * License along with this library; if not, write to the Free 039 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 040 * MA 02111-1307, USA 041 * 042 * For questions, suggestions, bug-reports, enhancement-requests etc. 043 * I may be contacted at: 044 * 045 * ronald@xxxxxxxxxxxxx 046 * 047 * The HTTPClient's home page is located at: 048 * 049 * http://www.innovation.ch/java/HTTPClient/ 050 */ 051 052package org.biojava.nbio.core.util; 053 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import java.io.*; 058 059 060/** 061 * This class decompresses an input stream containing data compressed with 062 * the unix "compress" utility (LZC, a LZW variant). This code is based 063 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written 064 * by Peter Jannesen) and the original compress code. 065 * 066 * 067 * This version has been modified from the original 0.3-3 version by the 068 * Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor 069 * public and to fix a couple of bugs. 070 * Also: 071 * - markSupported() returns false 072 * - add uncompress() static method 073 * 074 * @version 0.3-3 06/05/2001 075 * @author Ronald Tschalar 076 * @author Unidata Program Center 077 * @author Richard Holland - making LZW_MAGIC package-visible. 078 */ 079public class UncompressInputStream extends FilterInputStream { 080 081 private final static Logger logger = LoggerFactory.getLogger(UncompressInputStream.class); 082 083 /** 084 * @param is the input stream to decompress 085 * @throws IOException if the header is malformed 086 */ 087 public UncompressInputStream(InputStream is) throws IOException { 088 super(is); 089 parse_header(); 090 } 091 092 093 byte[] one = new byte[1]; 094 095 @Override 096public synchronized int read() throws IOException { 097 int b = read(one, 0, 1); 098 if (b == 1) 099 return (one[0] & 0xff); 100 else 101 return -1; 102 } 103 104 105 // string table stuff 106 private static final int TBL_CLEAR = 0x100; 107 private static final int TBL_FIRST = TBL_CLEAR + 1; 108 109 private int[] tab_prefix; 110 private byte[] tab_suffix; 111 private int[] zeros = new int[256]; 112 private byte[] stack; 113 114 // various state 115 private boolean block_mode; 116 private int n_bits; 117 private int maxbits; 118 private int maxmaxcode; 119 private int maxcode; 120 private int bitmask; 121 private int oldcode; 122 private byte finchar; 123 private int stackp; 124 private int free_ent; 125 126 // input buffer 127 private byte[] data = new byte[10000]; 128 private int bit_pos = 0, end = 0, got = 0; 129 private boolean eof = false; 130 private static final int EXTRA = 64; 131 132 133 @Override 134public synchronized int read(byte[] buf, int off, int len) 135 throws IOException { 136 if (eof) return -1; 137 int start = off; 138 139/* Using local copies of various variables speeds things up by as 140 * much as 30% ! 141 */ 142 int[] l_tab_prefix = tab_prefix; 143 byte[] l_tab_suffix = tab_suffix; 144 byte[] l_stack = stack; 145 int l_n_bits = n_bits; 146 int l_maxcode = maxcode; 147 int l_maxmaxcode = maxmaxcode; 148 int l_bitmask = bitmask; 149 int l_oldcode = oldcode; 150 byte l_finchar = finchar; 151 int l_stackp = stackp; 152 int l_free_ent = free_ent; 153 byte[] l_data = data; 154 int l_bit_pos = bit_pos; 155 156 157// empty stack if stuff still left 158 159 int s_size = l_stack.length - l_stackp; 160 if (s_size > 0) { 161 int num = (s_size >= len) ? len : s_size; 162 System.arraycopy(l_stack, l_stackp, buf, off, num); 163 off += num; 164 len -= num; 165 l_stackp += num; 166 } 167 168 if (len == 0) { 169 stackp = l_stackp; 170 return off - start; 171 } 172 173 174// loop, filling local buffer until enough data has been decompressed 175 176 main_loop: do { 177 if (end < EXTRA) fill(); 178 179 int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 : 180 (end << 3) - (l_n_bits - 1); 181 182 while (l_bit_pos < bit_in) { 183 // handle 1-byte reads correctly 184 if (len == 0) { 185 n_bits = l_n_bits; 186 maxcode = l_maxcode; 187 maxmaxcode = l_maxmaxcode; 188 bitmask = l_bitmask; 189 oldcode = l_oldcode; 190 finchar = l_finchar; 191 stackp = l_stackp; 192 free_ent = l_free_ent; 193 bit_pos = l_bit_pos; 194 195 return off - start; 196 } 197 198 // check for code-width expansion 199 200 if (l_free_ent > l_maxcode) { 201 int n_bytes = l_n_bits << 3; 202 l_bit_pos = (l_bit_pos - 1) + 203 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes; 204 205 l_n_bits++; 206 l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode : 207 (1 << l_n_bits) - 1; 208 209 logger.debug("Code-width expanded to ", l_n_bits); 210 211 l_bitmask = (1 << l_n_bits) - 1; 212 l_bit_pos = resetbuf(l_bit_pos); 213 continue main_loop; 214 } 215 216 217 // read next code 218 219 int pos = l_bit_pos >> 3; 220 int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) | 221 ((l_data[pos + 2] & 0xFF) << 16)) 222 >> (l_bit_pos & 0x7)) & l_bitmask; 223 l_bit_pos += l_n_bits; 224 225 226 // handle first iteration 227 228 if (l_oldcode == -1) { 229 if (code >= 256) 230 throw new IOException("corrupt input: " + code + 231 " > 255"); 232 l_finchar = (byte) (l_oldcode = code); 233 buf[off++] = l_finchar; 234 len--; 235 continue; 236 } 237 238 239 // handle CLEAR code 240 241 if (code == TBL_CLEAR && block_mode) { 242 System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length); 243 l_free_ent = TBL_FIRST - 1; 244 245 int n_bytes = l_n_bits << 3; 246 l_bit_pos = (l_bit_pos - 1) + 247 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes; 248 l_n_bits = INIT_BITS; 249 l_maxcode = (1 << l_n_bits) - 1; 250 l_bitmask = l_maxcode; 251 252 logger.debug("Code tables reset"); 253 254 l_bit_pos = resetbuf(l_bit_pos); 255 continue main_loop; 256 } 257 258 259 // setup 260 261 int incode = code; 262 l_stackp = l_stack.length; 263 264 265 // Handle KwK case 266 267 if (code >= l_free_ent) { 268 if (code > l_free_ent) 269 throw new IOException("corrupt input: code=" + code + 270 ", free_ent=" + l_free_ent); 271 272 l_stack[--l_stackp] = l_finchar; 273 code = l_oldcode; 274 } 275 276 277 // Generate output characters in reverse order 278 279 while (code >= 256) { 280 l_stack[--l_stackp] = l_tab_suffix[code]; 281 code = l_tab_prefix[code]; 282 } 283 l_finchar = l_tab_suffix[code]; 284 buf[off++] = l_finchar; 285 len--; 286 287 288 // And put them out in forward order 289 290 s_size = l_stack.length - l_stackp; 291 int num = (s_size >= len) ? len : s_size; 292 System.arraycopy(l_stack, l_stackp, buf, off, num); 293 off += num; 294 len -= num; 295 l_stackp += num; 296 297 298 // generate new entry in table 299 300 if (l_free_ent < l_maxmaxcode) { 301 l_tab_prefix[l_free_ent] = l_oldcode; 302 l_tab_suffix[l_free_ent] = l_finchar; 303 l_free_ent++; 304 } 305 306 307 // Remember previous code 308 309 l_oldcode = incode; 310 311 312 // if output buffer full, then return 313 314 if (len == 0) { 315 n_bits = l_n_bits; 316 maxcode = l_maxcode; 317 bitmask = l_bitmask; 318 oldcode = l_oldcode; 319 finchar = l_finchar; 320 stackp = l_stackp; 321 free_ent = l_free_ent; 322 bit_pos = l_bit_pos; 323 324 return off - start; 325 } 326 } 327 328 l_bit_pos = resetbuf(l_bit_pos); 329 } while (got > 0); 330 331 n_bits = l_n_bits; 332 maxcode = l_maxcode; 333 bitmask = l_bitmask; 334 oldcode = l_oldcode; 335 finchar = l_finchar; 336 stackp = l_stackp; 337 free_ent = l_free_ent; 338 bit_pos = l_bit_pos; 339 340 eof = true; 341 return off - start; 342 } 343 344 345 /** 346 * Moves the unread data in the buffer to the beginning and resets 347 * the pointers. 348 */ 349 private final int resetbuf(int bit_pos) { 350 int pos = bit_pos >> 3; 351 System.arraycopy(data, pos, data, 0, end - pos); 352 end -= pos; 353 return 0; 354 } 355 356 357 private final void fill() throws IOException { 358 got = in.read(data, end, data.length - 1 - end); 359 if (got > 0) end += got; 360 } 361 362 363 @Override 364public synchronized long skip(long num) throws IOException { 365 byte[] tmp = new byte[(int) num]; 366 int got = read(tmp, 0, (int) num); 367 368 if (got > 0) 369 return got; 370 else 371 return 0L; 372 } 373 374 375 @Override 376public synchronized int available() throws IOException { 377 if (eof) return 0; 378 379 return in.available(); 380 } 381 382 383 static final int LZW_MAGIC = 0x1f9d; 384 private static final int MAX_BITS = 16; 385 private static final int INIT_BITS = 9; 386 private static final int HDR_MAXBITS = 0x1f; 387 private static final int HDR_EXTENDED = 0x20; 388 private static final int HDR_FREE = 0x40; 389 private static final int HDR_BLOCK_MODE = 0x80; 390 391 private void parse_header() throws IOException { 392// read in and check magic number 393 394 int t = in.read(); 395 if (t < 0) throw new EOFException("Failed to read magic number"); 396 int magic = (t & 0xff) << 8; 397 t = in.read(); 398 if (t < 0) throw new EOFException("Failed to read magic number"); 399 magic += t & 0xff; 400 if (magic != LZW_MAGIC) 401 throw new IOException("Input not in compress format (read " + 402 "magic number 0x" + 403 Integer.toHexString(magic) + ")"); 404 405 406// read in header byte 407 408 int header = in.read(); 409 if (header < 0) throw new EOFException("Failed to read header"); 410 411 block_mode = (header & HDR_BLOCK_MODE) > 0; 412 maxbits = header & HDR_MAXBITS; 413 414 if (maxbits > MAX_BITS) 415 throw new IOException("Stream compressed with " + maxbits + 416 " bits, but can only handle " + MAX_BITS + 417 " bits"); 418 419 if ((header & HDR_EXTENDED) > 0) 420 throw new IOException("Header extension bit set"); 421 422 if ((header & HDR_FREE) > 0) 423 throw new IOException("Header bit 6 set"); 424 425 logger.debug("block mode: {}", block_mode); 426 logger.debug("max bits: {}", maxbits); 427 428 429// initialize stuff 430 431 maxmaxcode = 1 << maxbits; 432 n_bits = INIT_BITS; 433 maxcode = (1 << n_bits) - 1; 434 bitmask = maxcode; 435 oldcode = -1; 436 finchar = 0; 437 free_ent = block_mode ? TBL_FIRST : 256; 438 439 tab_prefix = new int[1 << maxbits]; 440 tab_suffix = new byte[1 << maxbits]; 441 stack = new byte[1 << maxbits]; 442 stackp = stack.length; 443 444 for (int idx = 255; idx >= 0; idx--) 445 tab_suffix[idx] = (byte) idx; 446 } 447 448 /** 449 * This stream does not support mark/reset on the stream. 450 * 451 * @return false 452 */ 453 @Override 454public boolean markSupported() { 455 return false; 456 } 457 458 static public void uncompress( String fileInName, FileOutputStream out) throws IOException { 459 long start = System.currentTimeMillis(); 460 461 InputStream in = new UncompressInputStream( new FileInputStream(fileInName)); 462 463 // int total = 0; 464 byte[] buffer = new byte[100000]; 465 while (true) { 466 int bytesRead = in.read(buffer); 467 if (bytesRead == -1) break; 468 out.write(buffer, 0, bytesRead); 469 // total += bytesRead; 470 } 471 in.close(); 472 out.close(); 473 474 if (debugTiming) { 475 long end = System.currentTimeMillis(); 476 // logger.debug("Decompressed " + total + " bytes"); 477 logger.warn("Time: {} seconds", (end - start) / 1000); 478 } 479 } 480 481 482 private static final boolean debugTiming = false; 483 484 public static void main(String args[]) throws Exception { 485 if (args.length != 1) { 486 logger.info("Usage: UncompressInputStream <file>"); 487 System.exit(1); 488 } 489 490 InputStream in = 491 new UncompressInputStream(new FileInputStream(args[0])); 492 493 byte[] buf = new byte[100000]; 494 int tot = 0; 495 long beg = System.currentTimeMillis(); 496 497 while (true) { 498 int got = in.read(buf); 499 if (got < 0) break; 500 System.out.write(buf, 0, got); 501 tot += got; 502 } 503 504 long end = System.currentTimeMillis(); 505 logger.info("Decompressed {} bytes", tot); 506 logger.info("Time: {} seconds", (end - beg) / 1000); 507 in.close(); 508 } 509}