001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021/* 022 * @(#)UncompressInputStream.java 0.3-3 06/05/2001 023 * 024 * This file is part of the HTTPClient package 025 * Copyright (C) 1996-2001 Ronald Tschalar 026 * 027 * This library is free software; you can redistribute it and/or 028 * modify it under the terms of the GNU Lesser General Public 029 * License as published by the Free Software Foundation; either 030 * version 2 of the License, or (at your option) any later version. 031 * 032 * This library is distributed in the hope that it will be useful, 033 * but WITHOUT ANY WARRANTY; without even the implied warranty of 034 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 035 * Lesser General Public License for more details. 036 * 037 * You should have received a copy of the GNU Lesser General Public 038 * License along with this library; if not, write to the Free 039 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 040 * MA 02111-1307, USA 041 * 042 * For questions, suggestions, bug-reports, enhancement-requests etc. 043 * I may be contacted at: 044 * 045 * ronald@xxxxxxxxxxxxx 046 * 047 * The HTTPClient's home page is located at: 048 * 049 * http://www.innovation.ch/java/HTTPClient/ 050 */ 051 052package org.biojava.nbio.core.util; 053 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import java.io.*; 058 059 060/** 061 * This class decompresses an input stream containing data compressed with 062 * the unix "compress" utility (LZC, a LZW variant). This code is based 063 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written 064 * by Peter Jannesen) and the original compress code. 065 * 066 * 067 * This version has been modified from the original 0.3-3 version by the 068 * Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor 069 * public and to fix a couple of bugs. 070 * Also: 071 * - markSupported() returns false 072 * - add uncompress() static method 073 * 074 * @version 0.3-3 06/05/2001 075 * @author Ronald Tschalar 076 * @author Unidata Program Center 077 * @author Richard Holland - making LZW_MAGIC package-visible. 078 * 079 * @version 0.3-5 2008/01/19 080 * @author Fred Hansen (zweibieren@yahoo.com) 081 * Fixed available() and the EOF condition for mainloop. 082 * Also added some comments. 083 * 084 * @version 1.0 2018/01/08 085 * @author Fred Hansen (zweibieren@yahoo.com) 086 * added uncompress(InputStream,OutputStream) 087 * and called it from main(String[]) 088 * and uncompress(String, FileOutputStream) 089 * normalize indentation 090 * rewrite skip method 091 * amend logging code in uncompress(String, FileOutputStream) 092 */ 093public class UncompressInputStream extends FilterInputStream { 094 private final static Logger logger 095 = LoggerFactory.getLogger(UncompressInputStream.class); 096 097 /** 098 * @param is the input stream to decompress 099 * @throws IOException if the header is malformed 100 */ 101 public UncompressInputStream(InputStream is) throws IOException { 102 super(is); 103 parse_header(); 104 } 105 106 107 @Override 108 public synchronized int read() throws IOException { 109 byte[] one = new byte[1]; 110 int b = read(one, 0, 1); 111 if (b == 1) 112 return (one[0] & 0xff); 113 else 114 return -1; 115 } 116 117 118 // string table stuff 119 private static final int TBL_CLEAR = 0x100; 120 private static final int TBL_FIRST = TBL_CLEAR + 1; 121 122 private int[] tab_prefix; 123 private byte[] tab_suffix; 124 final private int[] zeros = new int[256]; 125 private byte[] stack; 126 127 // various state 128 private boolean block_mode; 129 private int n_bits; 130 private int maxbits; 131 private int maxmaxcode; 132 private int maxcode; 133 private int bitmask; 134 private int oldcode; 135 private byte finchar; 136 private int stackp; 137 private int free_ent; 138 139 /* input buffer 140 The input stream must be considered in chunks 141 Each chunk is of length eight times the current code length. 142 Thus the chunk contains eight codes; NOT on byte boundaries. 143 */ 144 final private byte[] data = new byte[10000]; 145 private int 146 bit_pos = 0, // current bitwise location in bitstream 147 end = 0, // index of next byte to fill in data 148 got = 0; // number of bytes gotten by most recent read() 149 private boolean eof = false; 150 private static final int EXTRA = 64; 151 152 153 @Override 154 public synchronized int read(byte[] buf, int off, int len) 155 throws IOException { 156 if (eof) return -1; 157 int start = off; 158 159 /* Using local copies of various variables speeds things up by as 160 * much as 30% ! 161 */ 162 int[] l_tab_prefix = tab_prefix; 163 byte[] l_tab_suffix = tab_suffix; 164 byte[] l_stack = stack; 165 int l_n_bits = n_bits; 166 int l_maxcode = maxcode; 167 int l_maxmaxcode = maxmaxcode; 168 int l_bitmask = bitmask; 169 int l_oldcode = oldcode; 170 byte l_finchar = finchar; 171 int l_stackp = stackp; 172 int l_free_ent = free_ent; 173 byte[] l_data = data; 174 int l_bit_pos = bit_pos; 175 176 // empty stack if stuff still left 177 int s_size = l_stack.length - l_stackp; 178 if (s_size > 0) { 179 int num = (s_size >= len) ? len : s_size; 180 System.arraycopy(l_stack, l_stackp, buf, off, num); 181 off += num; 182 len -= num; 183 l_stackp += num; 184 } 185 186 if (len == 0) { 187 stackp = l_stackp; 188 return off - start; 189 } 190 191 // loop, filling local buffer until enough data has been decompressed 192 main_loop: do { 193 if (end < EXTRA) fill(); 194 195 int bit_end = (got > 0) 196 ? (end - end % l_n_bits) << 3 // set to a "chunk" boundary 197 : (end << 3) - (l_n_bits - 1); // no more data, set to last code 198 199 while (l_bit_pos < bit_end) { // handle 1-byte reads correctly 200 if (len == 0) { 201 n_bits = l_n_bits; 202 maxcode = l_maxcode; 203 maxmaxcode = l_maxmaxcode; 204 bitmask = l_bitmask; 205 oldcode = l_oldcode; 206 finchar = l_finchar; 207 stackp = l_stackp; 208 free_ent = l_free_ent; 209 bit_pos = l_bit_pos; 210 211 return off - start; 212 } 213 214 // check for code-width expansion 215 216 if (l_free_ent > l_maxcode) { 217 int n_bytes = l_n_bits << 3; 218 l_bit_pos = (l_bit_pos - 1) + 219 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes; 220 221 l_n_bits++; 222 l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode : 223 (1 << l_n_bits) - 1; 224 225 logger.debug("Code-width expanded to ", l_n_bits); 226 227 l_bitmask = (1 << l_n_bits) - 1; 228 l_bit_pos = resetbuf(l_bit_pos); 229 continue main_loop; 230 } 231 232 233 // read next code 234 235 int pos = l_bit_pos >> 3; 236 int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) | 237 ((l_data[pos + 2] & 0xFF) << 16)) 238 >> (l_bit_pos & 0x7)) & l_bitmask; 239 l_bit_pos += l_n_bits; 240 241 242 // handle first iteration 243 244 if (l_oldcode == -1) { 245 if (code >= 256) 246 throw new IOException("corrupt input: " + code + 247 " > 255"); 248 l_finchar = (byte) (l_oldcode = code); 249 buf[off++] = l_finchar; 250 len--; 251 continue; 252 } 253 254 255 // handle CLEAR code 256 257 if (code == TBL_CLEAR && block_mode) { 258 System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length); 259 l_free_ent = TBL_FIRST - 1; 260 261 int n_bytes = l_n_bits << 3; 262 l_bit_pos = (l_bit_pos - 1) + 263 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes; 264 l_n_bits = INIT_BITS; 265 l_maxcode = (1 << l_n_bits) - 1; 266 l_bitmask = l_maxcode; 267 268 logger.debug("Code tables reset"); 269 270 l_bit_pos = resetbuf(l_bit_pos); 271 continue main_loop; 272 } 273 274 275 // setup 276 277 int incode = code; 278 l_stackp = l_stack.length; 279 280 281 // Handle KwK case 282 283 if (code >= l_free_ent) { 284 if (code > l_free_ent) 285 throw new IOException("corrupt input: code=" + code + 286 ", free_ent=" + l_free_ent); 287 288 l_stack[--l_stackp] = l_finchar; 289 code = l_oldcode; 290 } 291 292 293 // Generate output characters in reverse order 294 295 while (code >= 256) { 296 l_stack[--l_stackp] = l_tab_suffix[code]; 297 code = l_tab_prefix[code]; 298 } 299 l_finchar = l_tab_suffix[code]; 300 buf[off++] = l_finchar; 301 len--; 302 303 304 // And put them out in forward order 305 306 s_size = l_stack.length - l_stackp; 307 int num = (s_size >= len) ? len : s_size; 308 System.arraycopy(l_stack, l_stackp, buf, off, num); 309 off += num; 310 len -= num; 311 l_stackp += num; 312 313 314 // generate new entry in table 315 316 if (l_free_ent < l_maxmaxcode) { 317 l_tab_prefix[l_free_ent] = l_oldcode; 318 l_tab_suffix[l_free_ent] = l_finchar; 319 l_free_ent++; 320 } 321 322 323 // Remember previous code 324 325 l_oldcode = incode; 326 327 328 // if output buffer full, then return 329 330 if (len == 0) { 331 n_bits = l_n_bits; 332 maxcode = l_maxcode; 333 bitmask = l_bitmask; 334 oldcode = l_oldcode; 335 finchar = l_finchar; 336 stackp = l_stackp; 337 free_ent = l_free_ent; 338 bit_pos = l_bit_pos; 339 340 return off - start; 341 } 342 } 343 344 l_bit_pos = resetbuf(l_bit_pos); 345 } while 346 // old code: (got>0) fails if code width expands near EOF 347 (got > 0 // usually true 348 || l_bit_pos < (end << 3) - (l_n_bits - 1)); // last few bytes 349 350 n_bits = l_n_bits; 351 maxcode = l_maxcode; 352 bitmask = l_bitmask; 353 oldcode = l_oldcode; 354 finchar = l_finchar; 355 stackp = l_stackp; 356 free_ent = l_free_ent; 357 bit_pos = l_bit_pos; 358 359 eof = true; 360 return off - start; 361 } 362 363 364 /** 365 * Moves the unread data in the buffer to the beginning and resets 366 * the pointers. 367 */ 368 private int resetbuf(int bit_pos) { 369 int pos = bit_pos >> 3; 370 System.arraycopy(data, pos, data, 0, end - pos); 371 end -= pos; 372 return 0; 373 } 374 375 376 private void fill() throws IOException { 377 got = in.read(data, end, data.length - 1 - end); 378 if (got > 0) end += got; 379 } 380 381 382 @Override 383 public synchronized long skip(long num) throws IOException { 384 return Math.max(0, read(new byte[(int) num])); 385 } 386 387 388 @Override 389 public synchronized int available() throws IOException { 390 if (eof) return 0; 391 // the old code was: return in.available(); 392 // it fails because this.read() can return bytes 393 // even after in.available() is zero 394 // -- zweibieren 395 int avail = in.available(); 396 return (avail == 0) ? 1 : avail; 397 } 398 399 400 static final int LZW_MAGIC = 0x1f9d; 401 private static final int MAX_BITS = 16; 402 private static final int INIT_BITS = 9; 403 private static final int HDR_MAXBITS = 0x1f; 404 private static final int HDR_EXTENDED = 0x20; 405 private static final int HDR_FREE = 0x40; 406 private static final int HDR_BLOCK_MODE = 0x80; 407 408 private void parse_header() throws IOException { 409 // read in and check magic number 410 int t = in.read(); 411 if (t < 0) throw new EOFException("Failed to read magic number"); 412 int magic = (t & 0xff) << 8; 413 t = in.read(); 414 if (t < 0) throw new EOFException("Failed to read magic number"); 415 magic += t & 0xff; 416 if (magic != LZW_MAGIC) 417 throw new IOException("Input not in compress format (read " + 418 "magic number 0x" + 419 Integer.toHexString(magic) + ")"); 420 421 // read in header byte 422 int header = in.read(); 423 if (header < 0) throw new EOFException("Failed to read header"); 424 425 block_mode = (header & HDR_BLOCK_MODE) > 0; 426 maxbits = header & HDR_MAXBITS; 427 428 if (maxbits > MAX_BITS) 429 throw new IOException("Stream compressed with " + maxbits + 430 " bits, but can only handle " + MAX_BITS + 431 " bits"); 432 433 if ((header & HDR_EXTENDED) > 0) 434 throw new IOException("Header extension bit set"); 435 436 if ((header & HDR_FREE) > 0) 437 throw new IOException("Header bit 6 set"); 438 439 logger.debug("block mode: {}", block_mode); 440 logger.debug("max bits: {}", maxbits); 441 442 // initialize stuff 443 maxmaxcode = 1 << maxbits; 444 n_bits = INIT_BITS; 445 maxcode = (1 << n_bits) - 1; 446 bitmask = maxcode; 447 oldcode = -1; 448 finchar = 0; 449 free_ent = block_mode ? TBL_FIRST : 256; 450 451 tab_prefix = new int[1 << maxbits]; 452 tab_suffix = new byte[1 << maxbits]; 453 stack = new byte[1 << maxbits]; 454 stackp = stack.length; 455 456 for (int idx = 255; idx >= 0; idx--) 457 tab_suffix[idx] = (byte) idx; 458 } 459 460 /** 461 * This stream does not support mark/reset on the stream. 462 * 463 * @return false 464 */ 465 @Override 466 public boolean markSupported() { 467 return false; 468 } 469 470 /** 471 * Read a named file and uncompress it. 472 * @param fileInName Name of compressed file. 473 * @param out A destination for the result. It is closed after data is sent. 474 * @return number of bytes sent to the output stream, 475 * @throws IOException for any error 476 */ 477 public static long uncompress(String fileInName, FileOutputStream out) 478 throws IOException { 479 long start = System.currentTimeMillis(); 480 long total; 481 try (InputStream fin = new FileInputStream(fileInName)) { 482 total = uncompress(fin, out); 483 } 484 out.close(); 485 486 if (debugTiming) { 487 long end = System.currentTimeMillis(); 488 logger.info("Decompressed {} bytes", total); 489 UncompressInputStream.logger.info("Time: {} seconds", (end - start) / 1000); 490 } 491 return total; 492 } 493 494 /** 495 * Read an input stream and uncompress it to an output stream. 496 * @param in the incoming InputStream. It is NOT closed. 497 * @param out the destination OutputStream. It is NOT closed. 498 * @return number of bytes sent to the output stream 499 * @throws IOException for any error 500 */ 501 public static long uncompress(InputStream in, OutputStream out) 502 throws IOException { 503 UncompressInputStream ucis = new UncompressInputStream(in); 504 long total = 0; 505 byte[] buffer = new byte[100000]; 506 while (true) { 507 int bytesRead = ucis.read(buffer); 508 if (bytesRead == -1) break; 509 out.write(buffer, 0, bytesRead); 510 total += bytesRead; 511 } 512 return total; 513 } 514 515 private static final boolean debugTiming = false; 516 517 518}