From ac517b0f7cabb8d7ba87f97b7a2638444588570a Mon Sep 17 00:00:00 2001 From: Anas Nashif Date: Tue, 30 Oct 2012 13:40:00 -0700 Subject: [PATCH] faster --- bzlib_private.h | 7 +++-- decompress.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++-------- huffman.c | 19 +++++++++-- 3 files changed, 106 insertions(+), 17 deletions(-) diff --git a/bzlib_private.h b/bzlib_private.h index 5d0217f..2d46a30 100644 --- a/bzlib_private.h +++ b/bzlib_private.h @@ -340,6 +340,7 @@ BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); #define MTFA_SIZE 4096 #define MTFL_SIZE 16 +#define HUFCODE_SIZE 10 /*-- Structure holding all the decompression-side stuff. --*/ @@ -407,6 +408,7 @@ typedef Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; Int32 minLens[BZ_N_GROUPS]; + Int16 hufcode[BZ_N_GROUPS][1 << HUFCODE_SIZE]; /* save area for scalars in the main decompress code */ Int32 save_i; @@ -433,6 +435,7 @@ typedef Int32* save_gLimit; Int32* save_gBase; Int32* save_gPerm; + Int16* save_gHufCode; } DState; @@ -488,8 +491,8 @@ extern Int32 BZ2_decompress ( DState* ); extern void -BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, - Int32, Int32, Int32 ); +BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, Int16 *, + UChar*, Int32, Int32, Int32 ); #endif diff --git a/decompress.c b/decompress.c index 311f566..5fd7bee 100644 --- a/decompress.c +++ b/decompress.c @@ -64,6 +64,9 @@ void makeMaps_d ( DState* s ) s->strm->total_in_hi32++; \ } +#define UNGET_BITS(nnn) \ + s->bsLive += nnn + #define GET_UCHAR(lll,uuu) \ GET_BITS(lll,uuu,8) @@ -83,23 +86,29 @@ void makeMaps_d ( DState* s ) gLimit = &(s->limit[gSel][0]); \ gPerm = &(s->perm[gSel][0]); \ gBase = &(s->base[gSel][0]); \ + gHufCode = &(s->hufcode[gSel][0]); \ } \ groupPos--; \ - zn = gMinlen; \ + zn = HUFCODE_SIZE; \ GET_BITS(label1, zvec, zn); \ - while (1) { \ - if (zn > 20 /* the longest code */) \ - RETURN(BZ_DATA_ERROR); \ - if (zvec <= gLimit[zn]) break; \ - zn++; \ - GET_BIT(label2, zj); \ - zvec = (zvec << 1) | zj; \ - }; \ - if (zvec - gBase[zn] < 0 \ + if (gHufCode[zvec]) { \ + UNGET_BITS(gHufCode[zvec] >> 10); \ + lval = gHufCode[zvec] & 511; \ + } else { \ + while (1) { \ + if (zn > 20 /* the longest code */) \ + RETURN(BZ_DATA_ERROR); \ + if (zvec <= gLimit[zn]) break; \ + zn++; \ + GET_BIT(label2, zj); \ + zvec = (zvec << 1) | zj; \ + }; \ + if (zvec - gBase[zn] < 0 \ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ - RETURN(BZ_DATA_ERROR); \ - lval = gPerm[zvec - gBase[zn]]; \ -} + RETURN(BZ_DATA_ERROR); \ + lval = gPerm[zvec - gBase[zn]]; \ + } \ +} \ /*---------------------------------------------------*/ @@ -135,6 +144,7 @@ Int32 BZ2_decompress ( DState* s ) Int32* gLimit; Int32* gBase; Int32* gPerm; + Int16* gHufCode; if (s->state == BZ_X_MAGIC_1) { /*initialise the save area*/ @@ -162,6 +172,7 @@ Int32 BZ2_decompress ( DState* s ) s->save_gLimit = NULL; s->save_gBase = NULL; s->save_gPerm = NULL; + s->save_gHufCode = NULL; } /*restore from the save area*/ @@ -189,6 +200,7 @@ Int32 BZ2_decompress ( DState* s ) gLimit = s->save_gLimit; gBase = s->save_gBase; gPerm = s->save_gPerm; + gHufCode = s->save_gHufCode; retVal = BZ_OK; @@ -340,6 +352,7 @@ Int32 BZ2_decompress ( DState* s ) &(s->limit[t][0]), &(s->base[t][0]), &(s->perm[t][0]), + &(s->hufcode[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize ); @@ -421,6 +434,62 @@ Int32 BZ2_decompress ( DState* s ) if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); /*-- uc = MTF ( nextSym-1 ) --*/ +#if MTFL_SIZE == 16 + { + unsigned char *ppx = s->mtfa + s->mtfbase[0]; + int lno = 0; + + nextSym--; + if (nextSym >= MTFL_SIZE) { + lno = nextSym / MTFL_SIZE; + nextSym %= MTFL_SIZE; + ppx = s->mtfa + s->mtfbase[lno]; + } + uc = ppx[nextSym]; + switch(nextSym) { + case 9: ppx[9] = ppx[10]; + case 10: ppx[10] = ppx[11]; + case 11: ppx[11] = ppx[12]; + case 12: ppx[12] = ppx[13]; + case 13: ppx[13] = ppx[14]; + case 14: ppx[14] = ppx[15]; + case 15: goto copy; + + case 8: ppx[8] = ppx[7]; + case 7: ppx[7] = ppx[6]; + case 6: ppx[6] = ppx[5]; + case 5: ppx[5] = ppx[4]; + case 4: ppx[4] = ppx[3]; + case 3: ppx[3] = ppx[2]; + case 2: ppx[2] = ppx[1]; + case 1: ppx[1] = ppx[0]; + default: break; + } + if (lno) { + s->mtfbase[lno]++; + copy: + while (lno > 0) { + s->mtfbase[lno]--; + s->mtfa[s->mtfbase[lno]] = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; + lno--; + } + s->mtfbase[0]--; + if (s->mtfbase[0] == 0) { + int ii, jj, kk; + s->mtfa[0] = uc; + kk = MTFA_SIZE-1; + for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { + for (jj = MTFL_SIZE-1; jj >= 0; jj--) { + s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; + kk--; + } + s->mtfbase[ii] = kk + 1; + } + } + } + s->mtfa[s->mtfbase[0]] = uc; + } +#else { Int32 ii, jj, kk, pp, lno, off; UInt32 nn; @@ -472,6 +541,7 @@ Int32 BZ2_decompress ( DState* s ) } } } +#endif /*-- end uc = MTF ( nextSym-1 ) --*/ s->unzftab[s->seqToUnseq[uc]]++; @@ -636,6 +706,7 @@ Int32 BZ2_decompress ( DState* s ) s->save_gLimit = gLimit; s->save_gBase = gBase; s->save_gPerm = gPerm; + s->save_gHufCode = gHufCode; return retVal; } diff --git a/huffman.c b/huffman.c index 2283fdb..aec358b 100644 --- a/huffman.c +++ b/huffman.c @@ -170,13 +170,16 @@ void BZ2_hbAssignCodes ( Int32 *code, void BZ2_hbCreateDecodeTables ( Int32 *limit, Int32 *base, Int32 *perm, + Int16 *hufcode, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize ) { - Int32 pp, i, j, vec; + Int32 pp, i, j, vec, k, vec2; + for (i = 0; i < (1 << HUFCODE_SIZE); i++) + hufcode[i] = 0; pp = 0; for (i = minLen; i <= maxLen; i++) for (j = 0; j < alphaSize; j++) @@ -187,16 +190,28 @@ void BZ2_hbCreateDecodeTables ( Int32 *limit, for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; - for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; + for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = -1; vec = 0; for (i = minLen; i <= maxLen; i++) { + if (i <= HUFCODE_SIZE) { + for (j = base[i]; j < base[i + 1]; j++) { + vec2 = (vec + j - base[i]) << (HUFCODE_SIZE - i); + k = (1 << (HUFCODE_SIZE - i)); + if (vec2 + k > (1 << HUFCODE_SIZE)) + k = (1 << HUFCODE_SIZE) - vec2; + for (; --k >= 0; vec2++) + hufcode[vec2] = perm[j] | 512 | (HUFCODE_SIZE - i) << 10; + } + } vec += (base[i+1] - base[i]); limit[i] = vec-1; vec <<= 1; } for (i = minLen + 1; i <= maxLen; i++) base[i] = ((limit[i-1] + 1) << 1) - base[i]; + limit[maxLen + 1] = 0x7fffffff; /* make it terminate */ + base[maxLen + 1] = 0; } -- 2.7.4