partial search, while preserving compression ratio
authorYann Collet <cyan@fb.com>
Thu, 2 Nov 2017 22:37:18 +0000 (15:37 -0700)
committerYann Collet <cyan@fb.com>
Thu, 2 Nov 2017 22:37:18 +0000 (15:37 -0700)
tag interesting places

lib/lz4hc.c
lib/lz4opt.h

index 884f5d7..44e0b0a 100644 (file)
@@ -363,14 +363,14 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
 #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
     static const BYTE* start = NULL;
     static U32 totalCost = 0;
-    U32 const pos = (U32)(*anchor - start);
+    U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
     U32 const ll = (U32)(*ip - *anchor);
     U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
     U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
     U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
     if (start==NULL) start = *anchor;  /* only works for single segment */
     //g_debuglog_enable = (pos >= 112705) & (pos <= 112760);
-    DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u / %u",
+    DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
                 pos,
                 (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
                 cost, totalCost);
index 1e696f9..37cc73a 100644 (file)
@@ -46,6 +46,7 @@ typedef struct {
     int off;
     int mlen;
     int litlen;
+    int toSearch;
 } LZ4HC_optimal_t;
 
 
@@ -278,6 +279,7 @@ static int LZ4HC_compress_optimal (
                 opt[rPos].off = 0;
                 opt[rPos].litlen = llen + rPos;
                 opt[rPos].price = cost;
+                opt[rPos].toSearch = 1;
                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
                             rPos, cost, opt[rPos].litlen);
         }   }
@@ -294,16 +296,21 @@ static int LZ4HC_compress_optimal (
                     opt[mlen].off = offset;
                     opt[mlen].litlen = llen;
                     opt[mlen].price = cost;
+                    opt[mlen].toSearch = (((mlen - 18) % 255) == 0);
                     DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
                                 mlen, cost, mlen);
         }   }   }
         last_match_pos = matches[nb_matches_initial-1].len;
+        opt[last_match_pos-2].toSearch = 1;
+        opt[last_match_pos-1].toSearch = 1;
+        opt[last_match_pos].toSearch = 1;
         {   int addLit;
             for (addLit = 1; addLit <= 3; addLit ++) {
                 opt[last_match_pos+addLit].mlen = 1; /* literal */
                 opt[last_match_pos+addLit].off = 0;
                 opt[last_match_pos+addLit].litlen = addLit;
                 opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                opt[last_match_pos+addLit].toSearch = 1;
                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
                             last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
         }   }
@@ -314,6 +321,7 @@ static int LZ4HC_compress_optimal (
             int nb_matches;
 
             if (curPtr >= mflimit) break;
+            if (opt[cur].toSearch == 0) continue;
 
             DEBUGLOG(7, "search at rPos:%u", cur);
             //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
@@ -341,6 +349,7 @@ static int LZ4HC_compress_optimal (
                         opt[pos].off = 0;
                         opt[pos].litlen = baseLitlen+litlen;
                         opt[pos].price = price;
+                        opt[pos].toSearch = 1;
                         DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
                                     pos, price, opt[pos].litlen);
             }   }   }
@@ -380,14 +389,19 @@ static int LZ4HC_compress_optimal (
                             opt[pos].off = offset;
                             opt[pos].litlen = ll;
                             opt[pos].price = price;
+                            opt[pos].toSearch = (((ml-18) % 255) == 0);
             }   }   }   }
             /* complete following positions with literals */
+            opt[last_match_pos-2].toSearch = 1;
+            opt[last_match_pos-1].toSearch = 1;
+            opt[last_match_pos].toSearch = 1;
             {   int addLit;
                 for (addLit = 1; addLit <= 3; addLit ++) {
                     opt[last_match_pos+addLit].mlen = 1; /* literal */
                     opt[last_match_pos+addLit].off = 0;
                     opt[last_match_pos+addLit].litlen = addLit;
                     opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                    opt[last_match_pos+addLit].toSearch = 1;
                     DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
             }   }
         }  /* for (cur = 1; cur <= last_match_pos; cur++) */