1 #define DEBG(x) 2 #define DEBG1(x) 3 /* inflate.c -- Not copyrighted 1992 by Mark Adler 4 version c10p1, 10 January 1993 */ 5 6 /* You can do whatever you like with this source file, though I would 7 prefer that if you modify it and redistribute it that you include 8 comments to that effect with your name and the date. Thank you. 9 [The history has been moved to the file ChangeLog.] 10 */ 11 12 /* 13 Inflate deflated (PKZIP's method 8 compressed) data. The compression 14 method searches for as much of the current string of bytes (up to a 15 length of 258) in the previous 32K bytes. If it doesn't find any 16 matches (of at least length 3), it codes the next byte. Otherwise, it 17 codes the length of the matched string and its distance backwards from 18 the current position. There is a single Huffman code that codes both 19 single bytes (called "literals") and match lengths. A second Huffman 20 code codes the distance information, which follows a length code. Each 21 length or distance code actually represents a base value and a number 22 of "extra" (sometimes zero) bits to get to add to the base value. At 23 the end of each deflated block is a special end-of-block (EOB) literal/ 24 length code. The decoding process is basically: get a literal/length 25 code; if EOB then done; if a literal, emit the decoded byte; if a 26 length then get the distance and emit the referred-to bytes from the 27 sliding window of previously emitted data. 28 29 There are (currently) three kinds of inflate blocks: stored, fixed, and 30 dynamic. The compressor deals with some chunk of data at a time, and 31 decides which method to use on a chunk-by-chunk basis. A chunk might 32 typically be 32K or 64K. If the chunk is uncompressible, then the 33 "stored" method is used. In this case, the bytes are simply stored as 34 is, eight bits per byte, with none of the above coding. The bytes are 35 preceded by a count, since there is no longer an EOB code. 36 37 If the data is compressible, then either the fixed or dynamic methods 38 are used. In the dynamic method, the compressed data is preceded by 39 an encoding of the literal/length and distance Huffman codes that are 40 to be used to decode this block. The representation is itself Huffman 41 coded, and so is preceded by a description of that code. These code 42 descriptions take up a little space, and so for small blocks, there is 43 a predefined set of codes, called the fixed codes. The fixed method is 44 used if the block codes up smaller that way (usually for quite small 45 chunks), otherwise the dynamic method is used. In the latter case, the 46 codes are customized to the probabilities in the current block, and so 47 can code it much better than the pre-determined fixed codes. 48 49 The Huffman codes themselves are decoded using a mutli-level table 50 lookup, in order to maximize the speed of decoding plus the speed of 51 building the decoding tables. See the comments below that precede the 52 lbits and dbits tuning parameters. 53 */ 54 55 56 /* 57 Notes beyond the 1.93a appnote.txt: 58 59 1. Distance pointers never point before the beginning of the output 60 stream. 61 2. Distance pointers can point back across blocks, up to 32k away. 62 3. There is an implied maximum of 7 bits for the bit length table and 63 15 bits for the actual data. 64 4. If only one code exists, then it is encoded using one bit. (Zero 65 would be more efficient, but perhaps a little confusing.) If two 66 codes exist, they are coded using one bit each (0 and 1). 67 5. There is no way of sending zero distance codes--a dummy must be 68 sent if there are none. (History: a pre 2.0 version of PKZIP would 69 store blocks with no distance codes, but this was discovered to be 70 too harsh a criterion.) Valid only for 1.93a. 2.04c does allow 71 zero distance codes, which is sent as one code of zero bits in 72 length. 73 6. There are up to 286 literal/length codes. Code 256 represents the 74 end-of-block. Note however that the static length tree defines 75 288 codes just to fill out the Huffman codes. Codes 286 and 287 76 cannot be used though, since there is no length base or extra bits 77 defined for them. Similarily, there are up to 30 distance codes. 78 However, static trees define 32 codes (all 5 bits) to fill out the 79 Huffman codes, but the last two had better not show up in the data. 80 7. Unzip can check dynamic Huffman blocks for complete code sets. 81 The exception is that a single code would not be complete (see #4). 82 8. The five bits following the block type is really the number of 83 literal codes sent minus 257. 84 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits 85 (1+6+6). Therefore, to output three times the length, you output 86 three codes (1+1+1), whereas to output four times the same length, 87 you only need two codes (1+3). Hmm. 88 10. In the tree reconstruction algorithm, Code = Code + Increment 89 only if BitLength(i) is not zero. (Pretty obvious.) 90 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) 91 12. Note: length code 284 can represent 227-258, but length code 285 92 really is 258. The last length deserves its own, short code 93 since it gets used a lot in very redundant files. The length 94 258 is special since 258 - 3 (the min match length) is 255. 95 13. The literal/length and distance code bit lengths are read as a 96 single stream of lengths. It is possible (and advantageous) for 97 a repeat code (16, 17, or 18) to go across the boundary between 98 the two sets of lengths. 99 */ 100 101 #ifndef lint 102 static char rcsid[] = "$Id: inflate.c,v 0.10 1993/02/04 13:21:06 jloup Exp $"; 103 #endif 104 105 /* #include "tailor.h" */ 106 #include "gzip.h" 107 #define slide window 108 109 #if defined(STDC_HEADERS) || defined(HAVE_STDLIB_H) 110 # include <sys/types.h> 111 # include <stdlib.h> 112 #endif 113 114 /* Huffman code lookup table entry--this entry is four bytes for machines 115 that have 16-bit pointers (e.g. PC's in the small or medium model). 116 Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 117 means that v is a literal, 16 < e < 32 means that v is a pointer to 118 the next table, which codes e - 16 bits, and lastly e == 99 indicates 119 an unused code. If a code with e == 99 is looked up, this implies an 120 error in the data. */ 121 struct huft { 122 uch e; /* number of extra bits or operation */ 123 uch b; /* number of bits in this code or subcode */ 124 union { 125 ush n; /* literal, length base, or distance base */ 126 struct huft *t; /* pointer to next level of table */ 127 } v; 128 }; 129 130 131 /* Function prototypes */ 132 int huft_build OF((unsigned *, unsigned, unsigned, ush *, ush *, 133 struct huft **, int *)); 134 int huft_free OF((struct huft *)); 135 int inflate_codes OF((struct huft *, struct huft *, int, int)); 136 int inflate_stored OF((void)); 137 int inflate_fixed OF((void)); 138 int inflate_dynamic OF((void)); 139 int inflate_block OF((int *)); 140 int inflate OF((void)); 141 142 143 /* The inflate algorithm uses a sliding 32K byte window on the uncompressed 144 stream to find repeated byte strings. This is implemented here as a 145 circular buffer. The index is updated simply by incrementing and then 146 and'ing with 0x7fff (32K-1). */ 147 /* It is left to other modules to supply the 32K area. It is assumed 148 to be usable as if it were declared "uch slide[32768];" or as just 149 "uch *slide;" and then malloc'ed in the latter case. The definition 150 must be in unzip.h, included above. */ 151 /* unsigned wp; current position in slide */ 152 #define wp outcnt 153 #define flush_output(w) (wp=(w),flush_window()) 154 155 /* Tables for deflate from PKZIP's appnote.txt. */ 156 static unsigned border[] = { /* Order of the bit length code lengths */ 157 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; 158 static ush cplens[] = { /* Copy lengths for literal codes 257..285 */ 159 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 160 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 161 /* note: see note #13 above about the 258 in this list. */ 162 static ush cplext[] = { /* Extra bits for literal codes 257..285 */ 163 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 164 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ 165 static ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ 166 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 167 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 168 8193, 12289, 16385, 24577}; 169 static ush cpdext[] = { /* Extra bits for distance codes */ 170 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 171 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 172 12, 12, 13, 13}; 173 174 175 176 /* Macros for inflate() bit peeking and grabbing. 177 The usage is: 178 179 NEEDBITS(j) 180 x = b & mask_bits[j]; 181 DUMPBITS(j) 182 183 where NEEDBITS makes sure that b has at least j bits in it, and 184 DUMPBITS removes the bits from b. The macros use the variable k 185 for the number of bits in b. Normally, b and k are register 186 variables for speed, and are initialized at the begining of a 187 routine that uses these macros from a global bit buffer and count. 188 189 If we assume that EOB will be the longest code, then we will never 190 ask for bits with NEEDBITS that are beyond the end of the stream. 191 So, NEEDBITS should not read any more bytes than are needed to 192 meet the request. Then no bytes need to be "returned" to the buffer 193 at the end of the last block. 194 195 However, this assumption is not true for fixed blocks--the EOB code 196 is 7 bits, but the other literal/length codes can be 8 or 9 bits. 197 (The EOB code is shorter than other codes becuase fixed blocks are 198 generally short. So, while a block always has an EOB, many other 199 literal/length codes have a significantly lower probability of 200 showing up at all.) However, by making the first table have a 201 lookup of seven bits, the EOB code will be found in that first 202 lookup, and so will not require that too many bits be pulled from 203 the stream. 204 */ 205 206 ulg bb; /* bit buffer */ 207 unsigned bk; /* bits in bit buffer */ 208 209 ush mask_bits[] = { 210 0x0000, 211 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, 212 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff 213 }; 214 215 #ifdef CRYPT 216 uch cc; 217 # define NEXTBYTE() \ 218 (decrypt ? (cc = get_byte(), zdecode(cc), cc) : get_byte()) 219 #else 220 # define NEXTBYTE() (uch)get_byte() 221 #endif 222 #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}} 223 #define DUMPBITS(n) {b>>=(n);k-=(n);} 224 225 226 /* 227 Huffman code decoding is performed using a multi-level table lookup. 228 The fastest way to decode is to simply build a lookup table whose 229 size is determined by the longest code. However, the time it takes 230 to build this table can also be a factor if the data being decoded 231 is not very long. The most common codes are necessarily the 232 shortest codes, so those codes dominate the decoding time, and hence 233 the speed. The idea is you can have a shorter table that decodes the 234 shorter, more probable codes, and then point to subsidiary tables for 235 the longer codes. The time it costs to decode the longer codes is 236 then traded against the time it takes to make longer tables. 237 238 This results of this trade are in the variables lbits and dbits 239 below. lbits is the number of bits the first level table for literal/ 240 length codes can decode in one step, and dbits is the same thing for 241 the distance codes. Subsequent tables are also less than or equal to 242 those sizes. These values may be adjusted either when all of the 243 codes are shorter than that, in which case the longest code length in 244 bits is used, or when the shortest code is *longer* than the requested 245 table size, in which case the length of the shortest code in bits is 246 used. 247 248 There are two different values for the two tables, since they code a 249 different number of possibilities each. The literal/length table 250 codes 286 possible values, or in a flat code, a little over eight 251 bits. The distance table codes 30 possible values, or a little less 252 than five bits, flat. The optimum values for speed end up being 253 about one bit more than those, so lbits is 8+1 and dbits is 5+1. 254 The optimum values may differ though from machine to machine, and 255 possibly even between compilers. Your mileage may vary. 256 */ 257 258 259 int lbits = 9; /* bits in base literal/length lookup table */ 260 int dbits = 6; /* bits in base distance lookup table */ 261 262 263 /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ 264 #define BMAX 16 /* maximum bit length of any code (16 for explode) */ 265 #define N_MAX 288 /* maximum number of codes in any set */ 266 267 268 unsigned hufts; /* track memory usage */ 269 270 271 int huft_build(b, n, s, d, e, t, m) /* */ 272 unsigned *b; /* code lengths in bits (all assumed <= BMAX) */ 273 unsigned n; /* number of codes (assumed <= N_MAX) */ 274 unsigned s; /* number of simple-valued codes (0..s-1) */ 275 ush *d; /* list of base values for non-simple codes */ 276 ush *e; /* list of extra bits for non-simple codes */ 277 struct huft **t; /* result: starting table */ 278 int *m; /* maximum lookup bits, returns actual */ 279 /* Given a list of code lengths and a maximum table size, make a set of 280 tables to decode that set of codes. Return zero on success, one if 281 the given code set is incomplete (the tables are still built in this 282 case), two if the input is invalid (all zero length codes or an 283 oversubscribed set of lengths), and three if not enough memory. */ 284 { 285 unsigned a; /* counter for codes of length k */ 286 unsigned c[BMAX+1]; /* bit length count table */ 287 unsigned f; /* i repeats in table every f entries */ 288 int g; /* maximum code length */ 289 int h; /* table level */ 290 register unsigned i; /* counter, current code */ 291 register unsigned j; /* counter */ 292 register int k; /* number of bits in current code */ 293 int l; /* bits per table (returned in m) */ 294 register unsigned *p; /* pointer into c[], b[], or v[] */ 295 register struct huft *q; /* points to current table */ 296 struct huft r; /* table entry for structure assignment */ 297 struct huft *u[BMAX]; /* table stack */ 298 unsigned v[N_MAX]; /* values in order of bit length */ 299 register int w; /* bits before this table == (l * h) */ 300 unsigned x[BMAX+1]; /* bit offsets, then code stack */ 301 unsigned *xp; /* pointer into x */ 302 int y; /* number of dummy codes added */ 303 unsigned z; /* number of entries in current table */ 304 305 DEBG("huft1 "); 306 307 /* Generate counts for each bit length */ 308 memzero(c, sizeof(c)); 309 p = b; i = n; 310 do { 311 Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"), 312 n-i, *p)); 313 c[*p++]++; /* assume all entries <= BMAX */ 314 } while (--i); 315 if (c[0] == n) /* null input--all zero length codes */ 316 { 317 *t = (struct huft *)NULL; 318 *m = 0; 319 return 0; 320 } 321 322 DEBG("huft2 "); 323 324 /* Find minimum and maximum length, bound *m by those */ 325 l = *m; 326 for (j = 1; j <= BMAX; j++) 327 if (c[j]) 328 break; 329 k = j; /* minimum code length */ 330 if ((unsigned)l < j) 331 l = j; 332 for (i = BMAX; i; i--) 333 if (c[i]) 334 break; 335 g = i; /* maximum code length */ 336 if ((unsigned)l > i) 337 l = i; 338 *m = l; 339 340 DEBG("huft3 "); 341 342 /* Adjust last length count to fill out codes, if needed */ 343 for (y = 1 << j; j < i; j++, y <<= 1) 344 if ((y -= c[j]) < 0) 345 return 2; /* bad input: more codes than bits */ 346 if ((y -= c[i]) < 0) 347 return 2; 348 c[i] += y; 349 350 DEBG("huft4 "); 351 352 /* Generate starting offsets into the value table for each length */ 353 x[1] = j = 0; 354 p = c + 1; xp = x + 2; 355 while (--i) { /* note that i == g from above */ 356 *xp++ = (j += *p++); 357 } 358 359 DEBG("huft5 "); 360 361 /* Make a table of values in order of bit lengths */ 362 p = b; i = 0; 363 do { 364 if ((j = *p++) != 0) 365 v[x[j]++] = i; 366 } while (++i < n); 367 368 DEBG("h6 "); 369 370 /* Generate the Huffman codes and for each, make the table entries */ 371 x[0] = i = 0; /* first Huffman code is zero */ 372 p = v; /* grab values in bit order */ 373 h = -1; /* no tables yet--level -1 */ 374 w = -l; /* bits decoded == (l * h) */ 375 u[0] = (struct huft *)NULL; /* just to keep compilers happy */ 376 q = (struct huft *)NULL; /* ditto */ 377 z = 0; /* ditto */ 378 DEBG("h6a "); 379 380 /* go through the bit lengths (k already is bits in shortest code) */ 381 for (; k <= g; k++) 382 { 383 DEBG("h6b "); 384 a = c[k]; 385 while (a--) 386 { 387 DEBG("h6b1 "); 388 /* here i is the Huffman code of length k bits for value *p */ 389 /* make tables up to required level */ 390 while (k > w + l) 391 { 392 DEBG1("1 "); 393 h++; 394 w += l; /* previous table always l bits */ 395 396 /* compute minimum size table less than or equal to l bits */ 397 z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */ 398 if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ 399 { /* too few codes for k-w bit table */ 400 DEBG1("2 "); 401 f -= a + 1; /* deduct codes from patterns left */ 402 xp = c + k; 403 while (++j < z) /* try smaller tables up to z bits */ 404 { 405 if ((f <<= 1) <= *++xp) 406 break; /* enough codes to use up j bits */ 407 f -= *xp; /* else deduct codes from patterns */ 408 } 409 } 410 DEBG1("3 "); 411 z = 1 << j; /* table entries for j-bit table */ 412 413 /* allocate and link in new table */ 414 if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) == 415 (struct huft *)NULL) 416 { 417 DEBG1("31 "); 418 error("malloc failed\n"); 419 if (h) 420 huft_free(u[0]); 421 return 3; /* not enough memory */ 422 } 423 DEBG1("4 "); 424 hufts += z + 1; /* track memory usage */ 425 *t = q + 1; /* link to list for huft_free() */ 426 *(t = &(q->v.t)) = (struct huft *)NULL; 427 u[h] = ++q; /* table starts after link */ 428 429 DEBG1("5 "); 430 /* connect to last table, if there is one */ 431 if (h) 432 { 433 x[h] = i; /* save pattern for backing up */ 434 r.b = (uch)l; /* bits to dump before this table */ 435 r.e = (uch)(16 + j); /* bits in this table */ 436 r.v.t = q; /* pointer to this table */ 437 j = i >> (w - l); /* (get around Turbo C bug) */ 438 u[h-1][j] = r; /* connect to last table */ 439 } 440 DEBG1("6 "); 441 } 442 DEBG("h6c "); 443 444 /* set up table entry in r */ 445 r.b = (uch)(k - w); 446 if (p >= v + n) 447 r.e = 99; /* out of values--invalid code */ 448 else if (*p < s) 449 { 450 r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */ 451 r.v.n = *p++; /* simple code is just the value */ 452 } 453 else 454 { 455 r.e = (uch)e[*p - s]; /* non-simple--look up in lists */ 456 r.v.n = d[*p++ - s]; 457 } 458 DEBG("h6d "); 459 460 /* fill code-like entries with r */ 461 f = 1 << (k - w); 462 for (j = i >> w; j < z; j += f) 463 q[j] = r; 464 465 /* backwards increment the k-bit code i */ 466 for (j = 1 << (k - 1); i & j; j >>= 1) 467 i ^= j; 468 i ^= j; 469 470 /* backup over finished tables */ 471 while ((i & ((1 << w) - 1)) != x[h]) 472 { 473 h--; /* don't need to update q */ 474 w -= l; 475 } 476 DEBG("h6e "); 477 } 478 DEBG("h6f "); 479 } 480 481 DEBG("huft7 "); 482 483 /* Return true (1) if we were given an incomplete table */ 484 return y != 0 && g != 1; 485 } 486 487 488 489 int huft_free(t) /* */ 490 struct huft *t; /* table to free */ 491 /* Free the malloc'ed tables built by huft_build(), which makes a linked 492 list of the tables it made, with the links in a dummy first entry of 493 each table. */ 494 { 495 register struct huft *p, *q; 496 497 498 /* Go through linked list, freeing from the malloced (t[-1]) address. */ 499 p = t; 500 while (p != (struct huft *)NULL) 501 { 502 q = (--p)->v.t; 503 free(p); 504 p = q; 505 } 506 return 0; 507 } 508 509 510 int inflate_codes(tl, td, bl, bd) /* */ 511 struct huft *tl, *td; /* literal/length and distance decoder tables */ 512 int bl, bd; /* number of bits decoded by tl[] and td[] */ 513 /* inflate (decompress) the codes in a deflated (compressed) block. 514 Return an error code or zero if it all goes ok. */ 515 { 516 register unsigned e; /* table entry flag/number of extra bits */ 517 unsigned n, d; /* length and index for copy */ 518 unsigned w; /* current window position */ 519 struct huft *t; /* pointer to table entry */ 520 unsigned ml, md; /* masks for bl and bd bits */ 521 register ulg b; /* bit buffer */ 522 register unsigned k; /* number of bits in bit buffer */ 523 524 525 /* make local copies of globals */ 526 b = bb; /* initialize bit buffer */ 527 k = bk; 528 w = wp; /* initialize window position */ 529 530 /* inflate the coded data */ 531 ml = mask_bits[bl]; /* precompute masks for speed */ 532 md = mask_bits[bd]; 533 for (;;) /* do until end of block */ 534 { 535 NEEDBITS((unsigned)bl) 536 if ((e = (t = tl + ((unsigned)b & ml))->e) > 16) 537 do { 538 if (e == 99) 539 return 1; 540 DUMPBITS(t->b) 541 e -= 16; 542 NEEDBITS(e) 543 } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); 544 DUMPBITS(t->b) 545 if (e == 16) /* then it's a literal */ 546 { 547 slide[w++] = (uch)t->v.n; 548 Tracevv((stderr, "%c", slide[w-1])); 549 if (w == WSIZE) 550 { 551 flush_output(w); 552 w = 0; 553 } 554 } 555 else /* it's an EOB or a length */ 556 { 557 /* exit if end of block */ 558 if (e == 15) 559 break; 560 561 /* get length of block to copy */ 562 NEEDBITS(e) 563 n = t->v.n + ((unsigned)b & mask_bits[e]); 564 DUMPBITS(e); 565 566 /* decode distance of block to copy */ 567 NEEDBITS((unsigned)bd) 568 if ((e = (t = td + ((unsigned)b & md))->e) > 16) 569 do { 570 if (e == 99) 571 return 1; 572 DUMPBITS(t->b) 573 e -= 16; 574 NEEDBITS(e) 575 } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); 576 DUMPBITS(t->b) 577 NEEDBITS(e) 578 d = w - t->v.n - ((unsigned)b & mask_bits[e]); 579 DUMPBITS(e) 580 Tracevv((stderr,"\\[%d,%d]", w-d, n)); 581 582 /* do the copy */ 583 do { 584 n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e); 585 #if !defined(NOMEMCPY) && !defined(DEBUG) 586 if (w - d >= e) /* (this test assumes unsigned comparison) */ 587 { 588 memcpy(slide + w, slide + d, e); 589 w += e; 590 d += e; 591 } 592 else /* do it slow to avoid memcpy() overlap */ 593 #endif /* !NOMEMCPY */ 594 do { 595 slide[w++] = slide[d++]; 596 Tracevv((stderr, "%c", slide[w-1])); 597 } while (--e); 598 if (w == WSIZE) 599 { 600 flush_output(w); 601 w = 0; 602 } 603 } while (n); 604 } 605 } 606 607 608 /* restore the globals from the locals */ 609 wp = w; /* restore global window pointer */ 610 bb = b; /* restore global bit buffer */ 611 bk = k; 612 613 /* done */ 614 return 0; 615 } 616 617 618 619 int inflate_stored() /* */ 620 /* "decompress" an inflated type 0 (stored) block. */ 621 { 622 unsigned n; /* number of bytes in block */ 623 unsigned w; /* current window position */ 624 register ulg b; /* bit buffer */ 625 register unsigned k; /* number of bits in bit buffer */ 626 627 DEBG("<stor"); 628 629 /* make local copies of globals */ 630 b = bb; /* initialize bit buffer */ 631 k = bk; 632 w = wp; /* initialize window position */ 633 634 635 /* go to byte boundary */ 636 n = k & 7; 637 DUMPBITS(n); 638 639 640 /* get the length and its complement */ 641 NEEDBITS(16) 642 n = ((unsigned)b & 0xffff); 643 DUMPBITS(16) 644 NEEDBITS(16) 645 if (n != (unsigned)((~b) & 0xffff)) 646 return 1; /* error in compressed data */ 647 DUMPBITS(16) 648 649 650 /* read and output the compressed data */ 651 while (n--) 652 { 653 NEEDBITS(8) 654 slide[w++] = (uch)b; 655 if (w == WSIZE) 656 { 657 flush_output(w); 658 w = 0; 659 } 660 DUMPBITS(8) 661 } 662 663 664 /* restore the globals from the locals */ 665 wp = w; /* restore global window pointer */ 666 bb = b; /* restore global bit buffer */ 667 bk = k; 668 669 DEBG(">"); 670 return 0; 671 } 672 673 674 675 int inflate_fixed() /* */ 676 /* decompress an inflated type 1 (fixed Huffman codes) block. We should 677 either replace this with a custom decoder, or at least precompute the 678 Huffman tables. */ 679 { 680 int i; /* temporary variable */ 681 struct huft *tl; /* literal/length code table */ 682 struct huft *td; /* distance code table */ 683 int bl; /* lookup bits for tl */ 684 int bd; /* lookup bits for td */ 685 unsigned l[288]; /* length list for huft_build */ 686 687 DEBG("<fix"); 688 689 /* set up literal table */ 690 for (i = 0; i < 144; i++) 691 l[i] = 8; 692 for (; i < 256; i++) 693 l[i] = 9; 694 for (; i < 280; i++) 695 l[i] = 7; 696 for (; i < 288; i++) /* make a complete, but wrong code set */ 697 l[i] = 8; 698 bl = 7; 699 if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) 700 return i; 701 702 703 /* set up distance table */ 704 for (i = 0; i < 30; i++) /* make an incomplete code set */ 705 l[i] = 5; 706 bd = 5; 707 if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) 708 { 709 huft_free(tl); 710 711 DEBG(">"); 712 return i; 713 } 714 715 716 /* decompress until an end-of-block code */ 717 if (inflate_codes(tl, td, bl, bd)) 718 return 1; 719 720 721 /* free the decoding tables, return */ 722 huft_free(tl); 723 huft_free(td); 724 return 0; 725 } 726 727 728 729 int inflate_dynamic() /* */ 730 /* decompress an inflated type 2 (dynamic Huffman codes) block. */ 731 { 732 int i; /* temporary variables */ 733 unsigned j; 734 unsigned l; /* last length */ 735 unsigned m; /* mask for bit lengths table */ 736 unsigned n; /* number of lengths to get */ 737 struct huft *tl; /* literal/length code table */ 738 struct huft *td; /* distance code table */ 739 int bl; /* lookup bits for tl */ 740 int bd; /* lookup bits for td */ 741 unsigned nb; /* number of bit length codes */ 742 unsigned nl; /* number of literal/length codes */ 743 unsigned nd; /* number of distance codes */ 744 #ifdef PKZIP_BUG_WORKAROUND 745 unsigned ll[288+32]; /* literal/length and distance code lengths */ 746 #else 747 unsigned ll[286+30]; /* literal/length and distance code lengths */ 748 #endif 749 register ulg b; /* bit buffer */ 750 register unsigned k; /* number of bits in bit buffer */ 751 752 DEBG("<dyn"); 753 754 /* make local bit buffer */ 755 b = bb; 756 k = bk; 757 758 759 /* read in table lengths */ 760 NEEDBITS(5) 761 nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */ 762 DUMPBITS(5) 763 NEEDBITS(5) 764 nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */ 765 DUMPBITS(5) 766 NEEDBITS(4) 767 nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */ 768 DUMPBITS(4) 769 #ifdef PKZIP_BUG_WORKAROUND 770 if (nl > 288 || nd > 32) 771 #else 772 if (nl > 286 || nd > 30) 773 #endif 774 return 1; /* bad lengths */ 775 776 DEBG("dyn1 "); 777 778 /* read in bit-length-code lengths */ 779 for (j = 0; j < nb; j++) 780 { 781 NEEDBITS(3) 782 ll[border[j]] = (unsigned)b & 7; 783 DUMPBITS(3) 784 } 785 for (; j < 19; j++) 786 ll[border[j]] = 0; 787 788 DEBG("dyn2 "); 789 790 /* build decoding table for trees--single level, 7 bit lookup */ 791 bl = 7; 792 if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) 793 { 794 if (i == 1) 795 huft_free(tl); 796 return i; /* incomplete code set */ 797 } 798 799 DEBG("dyn3 "); 800 801 /* read in literal and distance code lengths */ 802 n = nl + nd; 803 m = mask_bits[bl]; 804 i = l = 0; 805 while ((unsigned)i < n) 806 { 807 NEEDBITS((unsigned)bl) 808 j = (td = tl + ((unsigned)b & m))->b; 809 DUMPBITS(j) 810 j = td->v.n; 811 if (j < 16) /* length of code in bits (0..15) */ 812 ll[i++] = l = j; /* save last length in l */ 813 else if (j == 16) /* repeat last length 3 to 6 times */ 814 { 815 NEEDBITS(2) 816 j = 3 + ((unsigned)b & 3); 817 DUMPBITS(2) 818 if ((unsigned)i + j > n) 819 return 1; 820 while (j--) 821 ll[i++] = l; 822 } 823 else if (j == 17) /* 3 to 10 zero length codes */ 824 { 825 NEEDBITS(3) 826 j = 3 + ((unsigned)b & 7); 827 DUMPBITS(3) 828 if ((unsigned)i + j > n) 829 return 1; 830 while (j--) 831 ll[i++] = 0; 832 l = 0; 833 } 834 else /* j == 18: 11 to 138 zero length codes */ 835 { 836 NEEDBITS(7) 837 j = 11 + ((unsigned)b & 0x7f); 838 DUMPBITS(7) 839 if ((unsigned)i + j > n) 840 return 1; 841 while (j--) 842 ll[i++] = 0; 843 l = 0; 844 } 845 } 846 847 DEBG("dyn4 "); 848 849 /* free decoding table for trees */ 850 huft_free(tl); 851 852 DEBG("dyn5 "); 853 854 /* restore the global bit buffer */ 855 bb = b; 856 bk = k; 857 858 DEBG("dyn5a "); 859 860 /* build the decoding tables for literal/length and distance codes */ 861 bl = lbits; 862 if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) 863 { 864 DEBG("dyn5b "); 865 if (i == 1) { 866 error(" incomplete literal tree\n"); 867 huft_free(tl); 868 } 869 return i; /* incomplete code set */ 870 } 871 DEBG("dyn5c "); 872 bd = dbits; 873 if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) 874 { 875 DEBG("dyn5d "); 876 if (i == 1) { 877 error(" incomplete distance tree\n"); 878 #ifdef PKZIP_BUG_WORKAROUND 879 i = 0; 880 } 881 #else 882 huft_free(td); 883 } 884 huft_free(tl); 885 return i; /* incomplete code set */ 886 #endif 887 } 888 889 DEBG("dyn6 "); 890 891 /* decompress until an end-of-block code */ 892 if (inflate_codes(tl, td, bl, bd)) 893 return 1; 894 895 DEBG("dyn7 "); 896 897 /* free the decoding tables, return */ 898 huft_free(tl); 899 huft_free(td); 900 901 DEBG(">"); 902 return 0; 903 } 904 905 906 907 int inflate_block(e) /* */ 908 int *e; /* last block flag */ 909 /* decompress an inflated block */ 910 { 911 unsigned t; /* block type */ 912 register ulg b; /* bit buffer */ 913 register unsigned k; /* number of bits in bit buffer */ 914 915 DEBG("<blk"); 916 917 /* make local bit buffer */ 918 b = bb; 919 k = bk; 920 921 922 /* read in last block bit */ 923 NEEDBITS(1) 924 *e = (int)b & 1; 925 DUMPBITS(1) 926 927 928 /* read in block type */ 929 NEEDBITS(2) 930 t = (unsigned)b & 3; 931 DUMPBITS(2) 932 933 934 /* restore the global bit buffer */ 935 bb = b; 936 bk = k; 937 938 /* inflate that block type */ 939 if (t == 2) 940 return inflate_dynamic(); 941 if (t == 0) 942 return inflate_stored(); 943 if (t == 1) 944 return inflate_fixed(); 945 946 DEBG(">"); 947 948 /* bad block type */ 949 return 2; 950 } 951 952 953 954 int inflate() /* */ 955 /* decompress an inflated entry */ 956 { 957 int e; /* last block flag */ 958 int r; /* result code */ 959 unsigned h; /* maximum struct huft's malloc'ed */ 960 961 962 /* initialize window, bit buffer */ 963 wp = 0; 964 bk = 0; 965 bb = 0; 966 967 968 /* decompress until the last block */ 969 h = 0; 970 do { 971 hufts = 0; 972 if ((r = inflate_block(&e)) != 0) 973 return r; 974 if (hufts > h) 975 h = hufts; 976 } while (!e); 977 978 /* Undo too much lookahead. The next read will be byte aligned so we 979 * can discard unused bits in the last meaningful byte. 980 */ 981 while (bk >= 8) { 982 bk -= 8; 983 inptr--; 984 } 985 986 /* flush out slide */ 987 flush_output(wp); 988 989 990 /* return success */ 991 #ifdef DEBUG 992 fprintf(stderr, "<%u> ", h); 993 #endif /* DEBUG */ 994 return 0; 995 }