2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 #include "bzlib_private.h"
25 /*---------------------------------------------------*/
27 void makeMaps_d ( DState* s )
31 for (i = 0; i < 256; i++)
33 s->seqToUnseq[s->nInUse] = i;
39 /*---------------------------------------------------*/
41 { retVal = rrr; goto save_state_and_return; };
43 #define GET_BITS(lll,vvv,nnn) \
44 case lll: s->state = lll; \
46 if (s->bsLive >= nnn) { \
49 (s->bsLive-nnn)) & ((1 << nnn)-1); \
54 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
56 = (s->bsBuff << 8) | \
58 (*((UChar*)(s->strm->next_in)))); \
61 s->strm->avail_in--; \
62 s->strm->total_in_lo32++; \
63 if (s->strm->total_in_lo32 == 0) \
64 s->strm->total_in_hi32++; \
67 #define UNGET_BITS(nnn) \
70 #define GET_UCHAR(lll,uuu) \
73 #define GET_BIT(lll,uuu) \
76 /*---------------------------------------------------*/
77 #define GET_MTF_VAL(label1,label2,lval) \
79 if (groupPos == 0) { \
81 if (groupNo >= nSelectors) \
82 RETURN(BZ_DATA_ERROR); \
83 groupPos = BZ_G_SIZE; \
84 gSel = s->selector[groupNo]; \
85 gMinlen = s->minLens[gSel]; \
86 gLimit = &(s->limit[gSel][0]); \
87 gPerm = &(s->perm[gSel][0]); \
88 gBase = &(s->base[gSel][0]); \
89 gHufCode = &(s->hufcode[gSel][0]); \
93 GET_BITS(label1, zvec, zn); \
94 if (gHufCode[zvec]) { \
95 UNGET_BITS(gHufCode[zvec] >> 10); \
96 lval = gHufCode[zvec] & 511; \
99 if (zn > 20 /* the longest code */) \
100 RETURN(BZ_DATA_ERROR); \
101 if (zvec <= gLimit[zn]) break; \
103 GET_BIT(label2, zj); \
104 zvec = (zvec << 1) | zj; \
106 if (zvec - gBase[zn] < 0 \
107 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
108 RETURN(BZ_DATA_ERROR); \
109 lval = gPerm[zvec - gBase[zn]]; \
114 /*---------------------------------------------------*/
115 Int32 BZ2_decompress ( DState* s )
119 Int32 minLen, maxLen;
120 bz_stream* strm = s->strm;
122 /* stuff that needs to be saved/restored */
149 if (s->state == BZ_X_MAGIC_1) {
150 /*initialise the save area*/
154 s->save_alphaSize = 0;
156 s->save_nSelectors = 0;
159 s->save_groupPos = 0;
161 s->save_nblockMAX = 0;
172 s->save_gLimit = NULL;
173 s->save_gBase = NULL;
174 s->save_gPerm = NULL;
175 s->save_gHufCode = NULL;
178 /*restore from the save area*/
182 alphaSize = s->save_alphaSize;
183 nGroups = s->save_nGroups;
184 nSelectors = s->save_nSelectors;
186 groupNo = s->save_groupNo;
187 groupPos = s->save_groupPos;
188 nextSym = s->save_nextSym;
189 nblockMAX = s->save_nblockMAX;
190 nblock = s->save_nblock;
199 gMinlen = s->save_gMinlen;
200 gLimit = s->save_gLimit;
201 gBase = s->save_gBase;
202 gPerm = s->save_gPerm;
203 gHufCode = s->save_gHufCode;
209 GET_UCHAR(BZ_X_MAGIC_1, uc);
210 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
212 GET_UCHAR(BZ_X_MAGIC_2, uc);
213 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
215 GET_UCHAR(BZ_X_MAGIC_3, uc)
216 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
218 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
219 if (s->blockSize100k < (BZ_HDR_0 + 1) ||
220 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
221 s->blockSize100k -= BZ_HDR_0;
223 if (s->smallDecompress) {
224 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
226 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
228 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
230 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
231 if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
234 GET_UCHAR(BZ_X_BLKHDR_1, uc);
236 if (uc == 0x17) goto endhdr_2;
237 if (uc != 0x31) RETURN(BZ_DATA_ERROR);
238 GET_UCHAR(BZ_X_BLKHDR_2, uc);
239 if (uc != 0x41) RETURN(BZ_DATA_ERROR);
240 GET_UCHAR(BZ_X_BLKHDR_3, uc);
241 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
242 GET_UCHAR(BZ_X_BLKHDR_4, uc);
243 if (uc != 0x26) RETURN(BZ_DATA_ERROR);
244 GET_UCHAR(BZ_X_BLKHDR_5, uc);
245 if (uc != 0x53) RETURN(BZ_DATA_ERROR);
246 GET_UCHAR(BZ_X_BLKHDR_6, uc);
247 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
250 if (s->verbosity >= 2)
251 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
253 s->storedBlockCRC = 0;
254 GET_UCHAR(BZ_X_BCRC_1, uc);
255 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
256 GET_UCHAR(BZ_X_BCRC_2, uc);
257 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
258 GET_UCHAR(BZ_X_BCRC_3, uc);
259 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
260 GET_UCHAR(BZ_X_BCRC_4, uc);
261 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
263 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
266 GET_UCHAR(BZ_X_ORIGPTR_1, uc);
267 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
268 GET_UCHAR(BZ_X_ORIGPTR_2, uc);
269 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
270 GET_UCHAR(BZ_X_ORIGPTR_3, uc);
271 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
274 RETURN(BZ_DATA_ERROR);
275 if (s->origPtr > 10 + 100000*s->blockSize100k)
276 RETURN(BZ_DATA_ERROR);
278 /*--- Receive the mapping table ---*/
279 for (i = 0; i < 16; i++) {
280 GET_BIT(BZ_X_MAPPING_1, uc);
282 s->inUse16[i] = True; else
283 s->inUse16[i] = False;
286 for (i = 0; i < 256; i++) s->inUse[i] = False;
288 for (i = 0; i < 16; i++)
290 for (j = 0; j < 16; j++) {
291 GET_BIT(BZ_X_MAPPING_2, uc);
292 if (uc == 1) s->inUse[i * 16 + j] = True;
295 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
296 alphaSize = s->nInUse+2;
298 /*--- Now the selectors ---*/
299 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
300 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
301 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
302 if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
303 for (i = 0; i < nSelectors; i++) {
306 GET_BIT(BZ_X_SELECTOR_3, uc);
309 if (j >= nGroups) RETURN(BZ_DATA_ERROR);
311 s->selectorMtf[i] = j;
314 /*--- Undo the MTF values for the selectors. ---*/
316 UChar pos[BZ_N_GROUPS], tmp, v;
317 for (v = 0; v < nGroups; v++) pos[v] = v;
319 for (i = 0; i < nSelectors; i++) {
320 v = s->selectorMtf[i];
322 while (v > 0) { pos[v] = pos[v-1]; v--; }
324 s->selector[i] = tmp;
328 /*--- Now the coding tables ---*/
329 for (t = 0; t < nGroups; t++) {
330 GET_BITS(BZ_X_CODING_1, curr, 5);
331 for (i = 0; i < alphaSize; i++) {
333 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
334 GET_BIT(BZ_X_CODING_2, uc);
336 GET_BIT(BZ_X_CODING_3, uc);
337 if (uc == 0) curr++; else curr--;
343 /*--- Create the Huffman decoding tables ---*/
344 for (t = 0; t < nGroups; t++) {
347 for (i = 0; i < alphaSize; i++) {
348 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
349 if (s->len[t][i] < minLen) minLen = s->len[t][i];
351 BZ2_hbCreateDecodeTables (
357 minLen, maxLen, alphaSize
359 s->minLens[t] = minLen;
362 /*--- Now the MTF values ---*/
365 nblockMAX = 100000 * s->blockSize100k;
369 for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
375 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
376 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
377 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
380 s->mtfbase[ii] = kk + 1;
383 /*-- end MTF init --*/
386 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
390 if (nextSym == EOB) break;
392 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
397 /* Check that N doesn't get too big, so that es doesn't
398 go negative. The maximum value that can be
399 RUNA/RUNB encoded is equal to the block size (post
400 the initial RLE), viz, 900k, so bounding N at 2
401 million should guard against overflow without
402 rejecting any legitimate inputs. */
403 if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
404 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
405 if (nextSym == BZ_RUNB) es = es + (1+1) * N;
407 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
409 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
412 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
413 s->unzftab[uc] += es;
415 if (s->smallDecompress)
417 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
418 s->ll16[nblock] = (UInt16)uc;
424 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
425 s->tt[nblock] = (UInt32)uc;
434 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
436 /*-- uc = MTF ( nextSym-1 ) --*/
439 unsigned char *ppx = s->mtfa + s->mtfbase[0];
443 if (nextSym >= MTFL_SIZE) {
444 lno = nextSym / MTFL_SIZE;
445 nextSym %= MTFL_SIZE;
446 ppx = s->mtfa + s->mtfbase[lno];
450 case 9: ppx[9] = ppx[10];
451 case 10: ppx[10] = ppx[11];
452 case 11: ppx[11] = ppx[12];
453 case 12: ppx[12] = ppx[13];
454 case 13: ppx[13] = ppx[14];
455 case 14: ppx[14] = ppx[15];
458 case 8: ppx[8] = ppx[7];
459 case 7: ppx[7] = ppx[6];
460 case 6: ppx[6] = ppx[5];
461 case 5: ppx[5] = ppx[4];
462 case 4: ppx[4] = ppx[3];
463 case 3: ppx[3] = ppx[2];
464 case 2: ppx[2] = ppx[1];
465 case 1: ppx[1] = ppx[0];
473 s->mtfa[s->mtfbase[lno]] = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
477 if (s->mtfbase[0] == 0) {
481 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
482 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
483 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
486 s->mtfbase[ii] = kk + 1;
490 s->mtfa[s->mtfbase[0]] = uc;
494 Int32 ii, jj, kk, pp, lno, off;
496 nn = (UInt32)(nextSym - 1);
498 if (nn < MTFL_SIZE) {
499 /* avoid general-case expense */
504 s->mtfa[(z) ] = s->mtfa[(z)-1];
505 s->mtfa[(z)-1] = s->mtfa[(z)-2];
506 s->mtfa[(z)-2] = s->mtfa[(z)-3];
507 s->mtfa[(z)-3] = s->mtfa[(z)-4];
511 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
516 lno = nn / MTFL_SIZE;
517 off = nn % MTFL_SIZE;
518 pp = s->mtfbase[lno] + off;
520 while (pp > s->mtfbase[lno]) {
521 s->mtfa[pp] = s->mtfa[pp-1]; pp--;
526 s->mtfa[s->mtfbase[lno]]
527 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
531 s->mtfa[s->mtfbase[0]] = uc;
532 if (s->mtfbase[0] == 0) {
534 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
535 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
536 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
539 s->mtfbase[ii] = kk + 1;
545 /*-- end uc = MTF ( nextSym-1 ) --*/
547 s->unzftab[s->seqToUnseq[uc]]++;
548 if (s->smallDecompress)
549 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
550 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
553 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
558 /* Now we know what nblock is, we can do a better sanity
561 if (s->origPtr < 0 || s->origPtr >= nblock)
562 RETURN(BZ_DATA_ERROR);
564 /*-- Set up cftab to facilitate generation of T^(-1) --*/
565 /* Check: unzftab entries in range. */
566 for (i = 0; i <= 255; i++) {
567 if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
568 RETURN(BZ_DATA_ERROR);
570 /* Actually generate cftab. */
572 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
573 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
574 /* Check: cftab entries in range. */
575 for (i = 0; i <= 256; i++) {
576 if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
577 /* s->cftab[i] can legitimately be == nblock */
578 RETURN(BZ_DATA_ERROR);
581 /* Check: cftab entries non-descending. */
582 for (i = 1; i <= 256; i++) {
583 if (s->cftab[i-1] > s->cftab[i]) {
584 RETURN(BZ_DATA_ERROR);
588 s->state_out_len = 0;
590 BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
591 s->state = BZ_X_OUTPUT;
592 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
594 if (s->smallDecompress) {
596 /*-- Make a copy of cftab, used in generation of T --*/
597 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
599 /*-- compute the T vector --*/
600 for (i = 0; i < nblock; i++) {
601 uc = (UChar)(s->ll16[i]);
602 SET_LL(i, s->cftabCopy[uc]);
606 /*-- Compute T^(-1) by pointer reversal on T --*/
610 Int32 tmp = GET_LL(j);
615 while (i != s->origPtr);
617 s->tPos = s->origPtr;
619 if (s->blockRandomised) {
621 BZ_GET_SMALL(s->k0); s->nblock_used++;
622 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
624 BZ_GET_SMALL(s->k0); s->nblock_used++;
629 /*-- compute the T^(-1) vector --*/
630 for (i = 0; i < nblock; i++) {
631 uc = (UChar)(s->tt[i] & 0xff);
632 s->tt[s->cftab[uc]] |= (i << 8);
636 s->tPos = s->tt[s->origPtr] >> 8;
638 if (s->blockRandomised) {
640 BZ_GET_FAST(s->k0); s->nblock_used++;
641 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
643 BZ_GET_FAST(s->k0); s->nblock_used++;
654 GET_UCHAR(BZ_X_ENDHDR_2, uc);
655 if (uc != 0x72) RETURN(BZ_DATA_ERROR);
656 GET_UCHAR(BZ_X_ENDHDR_3, uc);
657 if (uc != 0x45) RETURN(BZ_DATA_ERROR);
658 GET_UCHAR(BZ_X_ENDHDR_4, uc);
659 if (uc != 0x38) RETURN(BZ_DATA_ERROR);
660 GET_UCHAR(BZ_X_ENDHDR_5, uc);
661 if (uc != 0x50) RETURN(BZ_DATA_ERROR);
662 GET_UCHAR(BZ_X_ENDHDR_6, uc);
663 if (uc != 0x90) RETURN(BZ_DATA_ERROR);
665 s->storedCombinedCRC = 0;
666 GET_UCHAR(BZ_X_CCRC_1, uc);
667 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
668 GET_UCHAR(BZ_X_CCRC_2, uc);
669 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
670 GET_UCHAR(BZ_X_CCRC_3, uc);
671 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
672 GET_UCHAR(BZ_X_CCRC_4, uc);
673 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
675 s->state = BZ_X_IDLE;
676 RETURN(BZ_STREAM_END);
678 default: AssertH ( False, 4001 );
681 AssertH ( False, 4002 );
683 save_state_and_return:
688 s->save_alphaSize = alphaSize;
689 s->save_nGroups = nGroups;
690 s->save_nSelectors = nSelectors;
692 s->save_groupNo = groupNo;
693 s->save_groupPos = groupPos;
694 s->save_nextSym = nextSym;
695 s->save_nblockMAX = nblockMAX;
696 s->save_nblock = nblock;
705 s->save_gMinlen = gMinlen;
706 s->save_gLimit = gLimit;
707 s->save_gBase = gBase;
708 s->save_gPerm = gPerm;
709 s->save_gHufCode = gHufCode;
715 /*-------------------------------------------------------------*/
716 /*--- end decompress.c ---*/
717 /*-------------------------------------------------------------*/