2-stages LZ4_count
authorYann Collet <cyan@fb.com>
Mon, 6 Nov 2017 23:42:50 +0000 (15:42 -0800)
committerYann Collet <cyan@fb.com>
Mon, 6 Nov 2017 23:42:50 +0000 (15:42 -0800)
separate first branch from the rest of the compare loop
to get dedicated prediction.

measured a 3-4% compression speed improvement.

lib/lz4.c

index 64a2e82..ff6496c 100644 (file)
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -407,7 +407,15 @@ static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLi
 {
     const BYTE* const pStart = pIn;
 
-    while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) {
+            pIn+=STEPSIZE; pMatch+=STEPSIZE;
+        } else {
+            return LZ4_NbCommonBytes(diff);
+    }   }
+
+    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
         pIn += LZ4_NbCommonBytes(diff);