integrated chain swapper into HC match finder
authorYann Collet <cyan@fb.com>
Sat, 5 May 2018 02:13:33 +0000 (19:13 -0700)
committerYann Collet <cyan@fb.com>
Sat, 5 May 2018 02:13:33 +0000 (19:13 -0700)
slower than expected
Pattern analyzer and Chain Swapper
work slower when both activated.
Reasons unclear.

lib/lz4hc.c

index 6c50db5..c58cd9d 100644 (file)
@@ -227,6 +227,7 @@ LZ4HC_InsertAndGetWiderMatch (
     const BYTE* const dictBase = hc4->dictBase;
     int const lookBackLength = (int)(ip-iLowLimit);
     int nbAttempts = maxNbAttempts;
+    int matchChainPos = 0;
     U32 const pattern = LZ4_read32(ip);
     U32 matchIndex;
     U32 dictMatchIndex;
@@ -241,31 +242,30 @@ LZ4HC_InsertAndGetWiderMatch (
                 matchIndex, lowestMatchIndex);
 
     while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
-        DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
+        int mlt=0;
         nbAttempts--;
         assert(matchIndex < ipIndex);
         if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
             /* do nothing */
-        } else if (matchIndex >= dictLimit) {
+        } else if (matchIndex >= dictLimit) {   /* within current Prefix */
             const BYTE* const matchPtr = base + matchIndex;
             assert(matchPtr >= lowPrefixPtr);
             assert(matchPtr < ip);
             assert(longest >= 1);
             if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
                 if (LZ4_read32(matchPtr) == pattern) {
-                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
                     int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+                    mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
                     mlt -= back;
                     if (mlt > longest) {
                         longest = mlt;
-                        *matchpos = matchPtr+back;
-                        *startpos = ip+back;
+                        *matchpos = matchPtr + back;
+                        *startpos = ip + back;
             }   }   }
-        } else {   /* matchIndex < dictLimit */
+        } else {   /* lowestMatchIndex <= matchIndex < dictLimit */
             const BYTE* const matchPtr = dictBase + matchIndex;
             if (LZ4_read32(matchPtr) == pattern) {
                 const BYTE* const dictStart = dictBase + hc4->lowLimit;
-                int mlt;
                 int back = 0;
                 const BYTE* vLimit = ip + (dictLimit - matchIndex);
                 if (vLimit > iHighLimit) vLimit = iHighLimit;
@@ -280,9 +280,10 @@ LZ4HC_InsertAndGetWiderMatch (
                     *startpos = ip + back;
         }   }   }
 
-        {   U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
-            matchIndex -= nextOffset;
-            if (patternAnalysis && nextOffset==1) {
+        {   U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+            if (1 && patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+                U32 const matchCandidateIdx = matchIndex-1;
+                //static U64 g_nbPatternAnalysis = 0; g_nbPatternAnalysis++; if (g_nbPatternAnalysis % (((g_nbPatternAnalysis/1000000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis=%llu ", g_nbPatternAnalysis);
                 /* may be a repeated pattern */
                 if (repeat == rep_untested) {
                     if ( ((pattern & 0xFFFF) == (pattern >> 16))
@@ -293,21 +294,48 @@ LZ4HC_InsertAndGetWiderMatch (
                         repeat = rep_not;
                 }   }
                 if ( (repeat == rep_confirmed)
-                  && (matchIndex >= dictLimit) ) {   /* same segment only */
-                    const BYTE* const matchPtr = base + matchIndex;
+                  && (matchCandidateIdx >= dictLimit) ) {   /* same segment only */
+                    const BYTE* const matchPtr = base + matchCandidateIdx;
                     if (LZ4_read32(matchPtr) == pattern) {  /* good candidate */
                         size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
                         const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
                         size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
                         size_t const currentSegmentLength = backLength + forwardPatternLength;
+                        //static U64 g_nbPatternAnalysis_confirmed = 0; g_nbPatternAnalysis_confirmed++; if (g_nbPatternAnalysis_confirmed%(((g_nbPatternAnalysis_confirmed/100000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis_confirmed=%llu ", g_nbPatternAnalysis_confirmed);
 
                         if ( (currentSegmentLength >= srcPatternLength)   /* current pattern segment large enough to contain full srcPatternLength */
                           && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
-                            matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
+                            matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
                         } else {
-                            matchIndex -= (U32)backLength;   /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
+                            matchIndex = matchCandidateIdx - (U32)backLength;   /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
                         }
-        }   }   }   }
+                        matchChainPos = 0;
+                        continue;
+                }   }
+        }   }
+
+        if (mlt == longest) {  /* better match => select a better chain */
+            assert(longest >= MINMATCH);
+            if (1 && matchIndex + longest <= ipIndex) {
+                U32 distanceToNextMatch = 1;
+                int pos;
+                //static U64 g_nbChainSwapper = 0; g_nbChainSwapper++; if (g_nbChainSwapper%(((g_nbChainSwapper/100000)+1)*100)==0) DEBUGLOG(2, "g_nbChainSwapper=%llu ", g_nbChainSwapper);
+                for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) {
+                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos);
+                    if (candidateDist > distanceToNextMatch) {
+                        distanceToNextMatch = candidateDist;
+                        matchChainPos = pos;
+                    }
+                }
+                if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
+                matchIndex -= distanceToNextMatch;
+                continue;
+            }
+        }
+
+        /* follow current chain */
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos);
+
     }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
 
     if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
@@ -339,6 +367,7 @@ LZ4HC_InsertAndGetWiderMatch (
             }
         }
     }
+
     return longest;
 }
 
@@ -1099,7 +1128,7 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4,
     int nbAttempts = maxNbAttempts;
     U32 const pattern = LZ4_read32(ip);
     U32 matchIndex;
-    int fromMStart = 0;
+    int matchChainPos = 0;
 
     DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
     /* First Match */
@@ -1109,13 +1138,11 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4,
                 matchIndex, lowestMatchIndex);
 
     /* find first match */
-    while ( (longest <= MINMATCH)
+    while ( 1 && (longest < MINMATCH)
       && (matchIndex>=lowestMatchIndex)
       && (nbAttempts) ) {
         const BYTE* const matchPtr = base + matchIndex;
         assert(matchIndex < ipIndex);
-        assert(matchIndex >= dictLimit);
-        assert(matchPtr >= lowPrefixPtr);
         assert(matchPtr < ip);
         assert(longest >= 1);
         nbAttempts--;
@@ -1124,7 +1151,6 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4,
             if (mlt > longest) {
                 longest = mlt;
                 *matchpos = matchPtr;
-                break;
         }   }
 
         {   U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
@@ -1132,39 +1158,44 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4,
         }
     }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
 
-    assert(longest > MINMATCH);
-    while ((matchIndex-fromMStart>=lowestMatchIndex) && (nbAttempts)) {
-        const BYTE* const matchPtr = base + matchIndex - fromMStart;
+    /* search longer matches */
+    while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
+        const BYTE* const matchPtr = base + matchIndex;
+        int mlt = 0;
         assert(matchIndex < ipIndex);
-        assert(matchIndex >= dictLimit);
-        assert(matchPtr >= lowPrefixPtr);
         assert(matchPtr < ip);
-        assert(longest >= 1);
+        assert(longest >= MINMATCH);
         nbAttempts--;
         if (LZ4_read16(ip + longest - 1) == LZ4_read16(matchPtr + longest - 1)) {
             if (LZ4_read32(matchPtr) == pattern) {
-                int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
                 if (mlt > longest) {
                     longest = mlt;
                     *matchpos = matchPtr;
-                    matchIndex -= fromMStart;   /* beginning of match */
-                    if (1 && matchIndex + longest <= ipIndex) {
-                        U32 distanceToNextMatch = 1;
-                        int pos;
-                        for (pos = 0; pos <= longest - MINMATCH; pos++) {
-                            U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos);
-                            if (candidateDist > distanceToNextMatch) {
-                                distanceToNextMatch = candidateDist;
-                                fromMStart = pos;
-                            }
-                        }
-                        if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
-                        matchIndex -= distanceToNextMatch - fromMStart;
-                        continue;
+                }
+            }
+        }
+
+        /* search more promising chain */
+        if (mlt == longest) {
+            if (1 && matchIndex + longest <= ipIndex) {
+                U32 distanceToNextMatch = 1;
+                int pos;
+                for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) {
+                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos);
+                    if (candidateDist > distanceToNextMatch) {
+                        distanceToNextMatch = candidateDist;
+                        matchChainPos = pos;
                     }
-        }   }   }
+                }
+                if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
+                matchIndex -= distanceToNextMatch;
+                continue;
+            }
+        }
 
-        {   U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
+        /* continue current chain */
+        {   U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex+matchChainPos);
             matchIndex -= nextOffset;
         }
     }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
@@ -1190,8 +1221,8 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
     /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
      * but this won't be the case here, as we define iLowLimit==ip,
      * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
-    //int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed);
-    int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict;
+    int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed);
+    //int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict;
     if (matchLength <= minLen) return match;
     if (favorDecSpeed) {
         if ((matchLength>18) & (matchLength<=36)) matchLength=18;   /* favor shortcut */