Greatly improved compression and decompression speed, at the expense of some compress...

author yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>

Sun, 5 Jun 2011 21:23:42 +0000 (21:23 +0000)

committer yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>

Sun, 5 Jun 2011 21:23:42 +0000 (21:23 +0000)
author yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
Sun, 5 Jun 2011 21:23:42 +0000 (21:23 +0000)
committer yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
Sun, 5 Jun 2011 21:23:42 +0000 (21:23 +0000)
diff --git a/lz4.c b/lz4.c

index fbf89c0..8608ea1 100644 (file)
--- a/lz4.c
+++ b/lz4.c
@@ -36,13 +36,13 @@
  \r
  \r
  //**************************************\r
-// Performance parameter                <---------------------------------------------------------\r
+// Performance parameter               \r
  //**************************************\r
  // Lowering this value reduce memory usage\r
  // It may also improve speed, especially if you reach L1 cache size (32KB for Intel, 64KB for AMD)\r
  // Expanding memory usage typically improves compression ratio\r
  // Memory usage formula : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)\r
-#define HASH_LOG 17                        \r
+#define HASH_LOG 12\r
  \r
  \r
  //**************************************\r
@@ -125,6 +125,7 @@ int LZ4_compressCtx(void** ctx,
  \r
         BYTE    *ip = (BYTE*) source,      /* input pointer */ \r
                         *anchor = (BYTE*) source,\r
+                       *incompressible = anchor + INCOMPRESSIBLE,\r
                         *iend = (BYTE*) source + isize,\r
                         *ilimit = iend - MINMATCH - 1;\r
  \r
@@ -134,7 +135,6 @@ int LZ4_compressCtx(void** ctx,
         \r
         int             len, length, sequence, h;\r
         U32             step=1;\r
-       S32             limit=INCOMPRESSIBLE;\r
  \r
  \r
         // Init \r
@@ -154,23 +154,23 @@ int LZ4_compressCtx(void** ctx,
                 ref = HashTable[h];\r
                 HashTable[h] = ip;\r
  \r
-               // Check Min Match\r
-               if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence))\r
+               // Min Match\r
+               if (( ((ip-ref) >> MAXD_LOG)) || (*(U32*)ref != sequence))\r
                 { \r
-                       if (ip-anchor>limit) { limit <<= 1; step += 1 + (step>>2); }\r
-                       ip += step; \r
+                       if (ip>incompressible) { incompressible += INCOMPRESSIBLE << (step >> 1); step++; }\r
+                       ip+=step; \r
                         continue; \r
-               }       \r
+               }\r
+               step=1;\r
  \r
-               // catch up\r
-               if (step>1) { HashTable[h] = ref; ip -= (step-1); step=1; continue; }\r
-               limit = INCOMPRESSIBLE; \r
+               // Catch up\r
+               while ((ip>anchor) && (*(ip-1)==*(ref-1))) { ip--; ref--; }  \r
  \r
                 // Encode Literal length\r
-               len = length = ip - anchor;\r
+               length = ip - anchor;\r
                 orun = op++;\r
-               if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } \r
-               else *orun = (len<<ML_BITS);\r
+               if (length>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } \r
+               else *orun = (length<<ML_BITS);\r
  \r
                 // Copy Literals\r
                 l_end = op + length;\r
@@ -183,7 +183,16 @@ int LZ4_compressCtx(void** ctx,
                 // Start Counting\r
                 ip+=MINMATCH;  ref+=MINMATCH;   // MinMatch verified\r
                 anchor = ip;\r
-               while ((ip<iend) && (*ref == *ip)) { ip++; ref++; }   // Ends at *ip!=*ref\r
+               while (ip<(iend-3))\r
+               {\r
+                       if (*(U32*)ref == *(U32*)ip) { ip+=4; ref+=4; continue; }   \r
+                       if (*(U16*)ref == *(U16*)ip) { ip+=2; ref+=2; }\r
+                       if (*ref == *ip) ip++;\r
+                       goto _endCount;\r
+               }\r
+               if ((ip<(iend-1)) && (*(U16*)ref == *(U16*)ip)) { ip+=2; ref+=2; }\r
+               if ((ip<iend) && (*ref == *ip)) ip++;\r
+_endCount:\r
                 len = (ip - anchor);\r
                 \r
                 // Encode MatchLength\r
@@ -192,6 +201,7 @@ int LZ4_compressCtx(void** ctx,
  \r
                 // Prepare next loop\r
                 anchor = ip; \r
+               incompressible = anchor + INCOMPRESSIBLE;\r
         }\r
  \r
         // Encode Last Literals\r
@@ -222,7 +232,7 @@ int LZ4_uncompress(char* source,
         BYTE    *ip = (BYTE*) source;\r
  \r
         BYTE    *op = (BYTE*) dest, \r
-                       *oend=(BYTE*) dest + osize,\r
+                       *oend= op + osize,\r
                         *ref, *cpy,\r
                         runcode;\r
         \r
diff --git a/lz4.h b/lz4.h

index 031e8ee..429a20a 100644 (file)
--- a/lz4.h
+++ b/lz4.h
@@ -66,9 +66,9 @@ int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int m
  LZ4_uncompress :\r
         return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)\r
                          If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction\r
-                        This version never writes beyond dest + osize, and is therefore protected against malicious data packets\r
+                        This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets\r
         note 1 : isize is the input size, therefore the compressed size\r
-       note 2 : destination buffer must be already allocated\r
+       note 2 : destination buffer must already be allocated, with at least maxOutputSize bytes\r
         note 3 : this version is slower by up to 10%, and is therefore not recommended for general use\r
  */\r
  \r
@@ -88,20 +88,21 @@ LZ4_compressCtx :
  */\r
  \r
  \r
-//****************************\r
-// Deprecated Functions\r
-//****************************\r
+//*********************************\r
+// Faster Decoding function\r
+//*********************************\r
  \r
-int LZ4_decode   (char* source, char* dest, int isize);\r
+#define LZ4_uncompress_fast LZ4_decode\r
+int LZ4_decode (char* source, char* dest, int isize);\r
  \r
  /*\r
-LZ4_decode :\r
+LZ4_decode : This version is the fastest one, besting LZ4_uncompress by a few %.\r
         return : the number of bytes in decoded buffer dest\r
         note 1 : isize is the input size, therefore the compressed size\r
         note 2 : destination buffer must be already allocated. \r
                         The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer\r
                         The destination buffer size must be at least "decompressedSize + 3 Bytes"\r
-                       This version is unprotected against malicious data packets designed to create buffer overflow errors.\r
+                       This version is **unprotected** against malicious data packets designed to create buffer overflow errors.\r
                         It is therefore deprecated, but still present in this version for compatibility.\r
  */\r
  \r
author	yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
	Sun, 5 Jun 2011 21:23:42 +0000 (21:23 +0000)
committer	yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>
	Sun, 5 Jun 2011 21:23:42 +0000 (21:23 +0000)