Slightly improved compression speed
authoryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
Mon, 21 Nov 2011 23:56:21 +0000 (23:56 +0000)
committeryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
Mon, 21 Nov 2011 23:56:21 +0000 (23:56 +0000)
git-svn-id: https://lz4.googlecode.com/svn/trunk@41 650e7d94-2a16-8b24-b05c-7c0b3f6821cd

lz4.c

diff --git a/lz4.c b/lz4.c
index 1cf04d8..92fe61a 100644 (file)
--- a/lz4.c
+++ b/lz4.c
@@ -160,6 +160,7 @@ int LZ4_compressCtx(void** ctx,
 \r
        BYTE* op = (BYTE*) dest;\r
        \r
+       const size_t DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };\r
        int len, length;\r
        const int skipStrength = SKIPSTRENGTH;\r
        U32 forwardH;\r
@@ -227,13 +228,16 @@ _next_match:
                // Start Counting\r
                ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified\r
                anchor = ip;\r
-               while (A32(ref) == A32(ip))\r
+               while (ip<matchlimit-3)\r
                {\r
-                       ip+=4; ref+=4;\r
-                       if (ip>matchlimit-4) { ref -= ip - (matchlimit-3); ip = matchlimit-3; break; }\r
+                       int diff = A32(ref) ^ A32(ip);\r
+                       if (!diff) { ip+=4; ref+=4; continue; }\r
+                       ip += DeBruijnBytePos[((U32)((diff & -diff) * 0x077CB531U)) >> 27];\r
+                       goto _endCount;\r
                }\r
-               if (A16(ref) == A16(ip)) { ip+=2; ref+=2; }\r
-               if (*ref == *ip) ip++;\r
+               if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }\r
+               if ((ip<matchlimit) && (*ref == *ip)) ip++;\r
+_endCount:\r
                len = (ip - anchor);\r
                \r
                // Encode MatchLength\r
@@ -298,6 +302,7 @@ int LZ4_compress64kCtx(void** ctx,
 \r
        BYTE* op = (BYTE*) dest;\r
        \r
+       const size_t DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };\r
        int len, length;\r
        const int skipStrength = SKIPSTRENGTH;\r
        U32 forwardH;\r
@@ -366,9 +371,9 @@ _next_match:
                anchor = ip;\r
                while (ip<matchlimit-3)\r
                {\r
-                       if (A32(ref) == A32(ip)) { ip+=4; ref+=4; continue; }\r
-                       if (A16(ref) == A16(ip)) { ip+=2; ref+=2; }\r
-                       if (*ref == *ip) ip++;\r
+                       int diff = A32(ref) ^ A32(ip);\r
+                       if (!diff) { ip+=4; ref+=4; continue; }\r
+                       ip += DeBruijnBytePos[((U32)((diff & -diff) * 0x077CB531U)) >> 27];\r
                        goto _endCount;\r
                }\r
                if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }\r