From a8a5dfd426cec6e3ac6c689a314aff5817de9ce3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Apr 2018 18:09:51 -0700 Subject: [PATCH] fixed clang performance in lz4_fast The simple change from `matchIndex+MAX_DISTANCE < current` towards `current - matchIndex > MAX_DISTANCE` is enough to generate a 10% performance drop under clang. Quite massive. (I missed as my eyes were concentrated on gcc performance at that time). The second version is more robust, because it also survives a situation where `matchIndex > current` due to overflows. The first version requires matchIndex to not overflow. Hence were added `assert()` conditions. The only case where this can happen is with dictCtx compression, in the case where the dictionary context is not initialized before loading the dictionary. So it's enough to always initialize the context while loading the dictionary. --- lib/lz4.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index bb6b619..3ca1126 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -777,7 +777,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue; /* match outside of valid area */ - if ((tableType != byU16) && (current - matchIndex > MAX_DISTANCE)) continue; /* too far - note: works even if matchIndex overflows */ + assert(matchIndex < current); + if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue; /* too far - note: works even if matchIndex overflows */ if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE); /* too_far presumed impossible with byU16 */ if (LZ4_read32(match) == LZ4_read32(ip)) { @@ -918,8 +919,9 @@ _next_match: match = base + matchIndex; } LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) - && ((tableType==byU16) ? 1 : (current - matchIndex <= MAX_DISTANCE)) + && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current)) && (LZ4_read32(match) == LZ4_read32(ip)) ) { token=op++; *token=0; @@ -1304,7 +1306,11 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); - LZ4_prepareTable(dict, 0, tableType); + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); /* We always increment the offset by 64 KB, since, if the dict is longer, * we truncate it to the last 64k, and if it's shorter, we still want to -- 2.7.4