ss_bsdiff: Change the search function 64/280164/3
authorMateusz Moscicki <m.moscicki2@partner.samsung.com>
Wed, 24 Aug 2022 15:17:49 +0000 (17:17 +0200)
committerKarol Lewandowski <k.lewandowsk@samsung.com>
Thu, 25 Aug 2022 20:20:23 +0000 (22:20 +0200)
This commit uses the provided sa_search() function to look for patterns,
instead of using own implementation.

The change is inspired by:

    https://android.googlesource.com/platform/external/bsdiff/+/refs/heads/master/suffix_array_index.cc

Change-Id: I49b3c4dd12c11b81157e030851bb8e4e48d1f6b2

bsdiff/ss_bsdiff.c

index 76fbe41..d892de2 100644 (file)
@@ -146,23 +146,24 @@ static off_t matchlen(u_char *old, off_t oldsize, u_char *new, off_t newsize)
 static off_t search(saidx_t *I, u_char *old, off_t oldsize,
                u_char *new, off_t newsize, off_t st, off_t en, off_t *pos)
 {
-       off_t x, y;
-       while (en - st >= 2) {
-               x = st + (en - st) / 2;
-               if (memcmp(old + I[x], new, MIN(oldsize - I[x], newsize)) < 0)
-                       st = x;
-               else
-                       en = x;
-       }
+       off_t x,y;
+       int left = 0;
 
-       x = matchlen(old + I[st], oldsize - I[st], new, newsize);
-       y = matchlen(old + I[en], oldsize - I[en], new, newsize);
+       int count = sa_search(old, oldsize, new, newsize, I, en, &left);
+       if (count > 0) {
+               *pos = I[left];
+               return newsize;
+       }
 
-       if (x > y) {
-               *pos = I[st];
+       if (left > 0) {
+               x = matchlen(old + I[left - 1], oldsize - I[left - 1], new, newsize);
+       }
+       y = matchlen(old + I[left], oldsize - I[left], new, newsize);
+       if(left > 0 && x > y) {
+               *pos = I[left - 1];
                return x;
        } else {
-               *pos = I[en];
+               *pos = I[left];
                return y;
        }
 }
@@ -370,11 +371,10 @@ int Function(int offset_oldscore)
                        prev_oldscore = oldscore;
                        prev_pos = pos;
                        len = search(data.I, data.old, data.oldsize, data.new[thread_num] + scan, end - scan,
-                                       len, data.oldsize, &pos); // Passing parameter as len instead of 0 for ramdisk.img etc taking long time
+                                       0, data.oldsize, &pos);
 
-                       for (; scsc < scan + len; scsc++)
-                               if ((scsc + lastoffset < data.oldsize) &&
-                                               (data.old[scsc + lastoffset] == data.new[thread_num][scsc]))
+                       for (; scsc < scan + len && scsc + lastoffset < data.oldsize; scsc++)
+                               if (data.old[scsc + lastoffset] == data.new[thread_num][scsc])
                                        oldscore++;
 #ifdef TIME_LIMIT_CHECK
                        if (offset_oldscore > 4) // when offset_oldscore is 4 and less we have to make sure diff is created no mater what, so we can't timeout
@@ -405,9 +405,10 @@ int Function(int offset_oldscore)
                        const size_t fuzz = 8;
                        if (prev_len - fuzz <= len && len <= prev_len &&
                            prev_oldscore - fuzz <= oldscore &&
+                           oldscore <= prev_oldscore &&
                            prev_pos <= pos && pos <= prev_pos + fuzz &&
                            oldscore <= len && len <= oldscore + fuzz) {
-                           num_less_than_eight++;
+                               num_less_than_eight++;
                        } else {
                                num_less_than_eight = 0;
                        }