From b3628cb0c51bb93f681707b2d0cdbce5ccdae818 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 12 Feb 2018 12:09:38 -0500 Subject: [PATCH] Avoid Resetting the Context When Possible --- lib/lz4.c | 61 ++++++++++++++++++++++++++++++++++++++---------------- lib/lz4frame.c | 21 +++++++++++++++++-- tests/framebench.c | 2 +- 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 46935f8..d558b3b 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -538,7 +538,20 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( const U32 acceleration) { const BYTE* ip = (const BYTE*) source; - const BYTE* base; + + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + const int resetTable = cctx->tableType != unusedTable && ( + cctx->tableType != tableType || + (tableType == byU16 && + cctx->currentOffset + inputSize >= 0xFFFFU) || + tableType == byPtr); + + size_t currentOffset = ((tableType == byU32 || tableType == byU16) && + !resetTable) ? cctx->currentOffset : 0; + const BYTE* base = (const BYTE*) source - currentOffset; const BYTE* lowLimit; const BYTE* const lowRefLimit = ip - cctx->dictSize; const BYTE* const dictionary = cctx->dictionary; @@ -556,17 +569,18 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( U32 forwardH; + /* TODO: resurrect dictIssue check */ + (void)dictIssue; + /* Init conditions */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported inputSize, too large (or negative) */ switch(dict) { case noDict: default: - base = (const BYTE*)source; lowLimit = (const BYTE*)source; break; case withPrefix64k: - base = (const BYTE*)source - cctx->currentOffset; lowLimit = (const BYTE*)source - cctx->dictSize; break; case usingExtDict: @@ -575,6 +589,13 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( break; } if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ + + if (resetTable) { + DEBUGLOG(4, "Resetting table in %p", cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + } + if (inputSizehashTable, tableType, base); - } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0) + } while ( match < lowRefLimit || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) ); } @@ -700,7 +721,7 @@ _next_match: lowLimit = (const BYTE*)source; } } LZ4_putPosition(ip, cctx->hashTable, tableType, base); - if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1) + if ( match >= lowRefLimit && (match+MAX_DISTANCE>=ip) && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) ) { token=op++; *token=0; goto _next_match; } @@ -730,6 +751,8 @@ _last_literals: retval = (((char*)op)-dest); _clean_up: + cctx->currentOffset += (U32)inputSize; + cctx->dictSize += (U32)inputSize; cctx->tableType = tableType; /* End */ @@ -767,14 +790,16 @@ int LZ4_compress_fast_safeExtState(void* state, const char* source, char* dest, int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { + int result; #if (LZ4_HEAPMODE) - void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + LZ4_stream_t* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + ctxPtr->internal_donotuse.currentOffset = 0; #else LZ4_stream_t ctx; - void* const ctxPtr = &ctx; + LZ4_stream_t* const ctxPtr = &ctx; #endif - - int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); + LZ4_resetStream(ctxPtr); + result = LZ4_compress_fast_safeExtState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); #if (LZ4_HEAPMODE) FREEMEM(ctxPtr); @@ -1005,20 +1030,23 @@ LZ4_stream_t* LZ4_createStream(void) { LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64); LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + DEBUGLOG(4, "LZ4_createStream %p", lz4s); LZ4_resetStream(lz4s); return lz4s; } void LZ4_resetStream (LZ4_stream_t* LZ4_stream) { - DEBUGLOG(4, "LZ4_resetStream"); + DEBUGLOG(5, "LZ4_resetStream %p", LZ4_stream); MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); + LZ4_stream->internal_donotuse.currentOffset = 0; LZ4_stream->internal_donotuse.tableType = unusedTable; } int LZ4_freeStream (LZ4_stream_t* LZ4_stream) { if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); FREEMEM(LZ4_stream); return (0); } @@ -1032,6 +1060,8 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) const BYTE* const dictEnd = p + dictSize; const BYTE* base; + DEBUGLOG(4, "LZ4_loadDict %p", LZ4_dict); + if ((dict->initCheck) || (dict->tableType != byU32 && dict->tableType != unusedTable) || (dict->currentOffset > 1 GB)) /* Uninitialized structure, or reuse overflow */ @@ -1066,6 +1096,7 @@ static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) U32 const delta = LZ4_dict->currentOffset - 64 KB; const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; int i; + DEBUGLOG(4, "LZ4_renormDictT %p", LZ4_dict); for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; else LZ4_dict->hashTable[i] -= delta; @@ -1100,14 +1131,10 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch /* prefix mode : source data follows dictionary */ if (dictEnd == (const BYTE*)source) { - int result; if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration); + return LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration); else - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration); - streamPtr->dictSize += (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; + return LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration); } /* external dictionary mode */ @@ -1118,7 +1145,6 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration); streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; return result; } } @@ -1139,7 +1165,6 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; return result; } diff --git a/lib/lz4frame.c b/lib/lz4frame.c index a394d1f..bf70b5c 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -354,6 +354,7 @@ size_t LZ4F_compressFrame_usingCDict(void* dstBuffer, size_t dstCapacity, prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* only one block => no need for inter-block link */ if (prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { + LZ4_resetStream(&lz4ctx); cctxI.lz4CtxPtr = &lz4ctx; cctxI.lz4CtxLevel = 1; } /* fast compression context pre-created on stack */ @@ -521,7 +522,14 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr, cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC(); if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed); cctxPtr->lz4CtxLevel = ctxTypeID; - } } + } else if (cctxPtr->lz4CtxLevel != ctxTypeID) { + /* otherwise, we must be transitioning from HC -> LZ4. + * In that case, avoid reallocating, since a LZ4 ctx + * fits in an HC ctx. Just reset. */ + LZ4_resetStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr); + cctxPtr->lz4CtxLevel = ctxTypeID; + } + } /* Buffer Management */ if (cctxPtr->prefs.frameInfo.blockSizeID == 0) @@ -654,11 +662,20 @@ static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize, static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { int const acceleration = (level < -1) ? -level : 1; + LZ4_stream_t_internal* internal_ctx = &((LZ4_stream_t*)ctx)->internal_donotuse; + assert(!internal_ctx->initCheck); + if (internal_ctx->currentOffset > 1 GB) { + /* Init the context */ + LZ4_resetStream((LZ4_stream_t*)ctx); + } + /* Clear any local dictionary */ + internal_ctx->dictionary = NULL; + internal_ctx->dictSize = 0; if (cdict) { memcpy(ctx, cdict->fastCtx, sizeof(*cdict->fastCtx)); return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); } - return LZ4_compress_fast_extState(ctx, src, dst, srcSize, dstCapacity, acceleration); + return LZ4_compress_fast_safeExtState(ctx, src, dst, srcSize, dstCapacity, acceleration); } static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) diff --git a/tests/framebench.c b/tests/framebench.c index 8dcfa41..21c3704 100644 --- a/tests/framebench.c +++ b/tests/framebench.c @@ -118,7 +118,7 @@ size_t compress_extState(bench_params_t *p) { char *oend = obuf + osize; size_t oused; - oused = LZ4_compress_fast_extState(ctx, ibuf + ((iter * 2654435761U) % num_ibuf) * isize, obuf, isize, oend - obuf, 0); + oused = LZ4_compress_fast_safeExtState(ctx, ibuf + ((iter * 2654435761U) % num_ibuf) * isize, obuf, isize, oend - obuf, 0); obuf += oused; return obuf - p->obuf; -- 2.7.4