erofs-utils: lib: use xxh64() for faster filtering first for dedupe
authorGao Xiang <hsiangkao@linux.alibaba.com>
Fri, 22 Sep 2023 18:30:53 +0000 (02:30 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Thu, 19 Oct 2023 23:44:06 +0000 (07:44 +0800)
Let's check if xxh64 equals when rolling back on global compressed
deduplication.

As a result, it could decrease time by 26.4% (6m57.990s -> 5m7.755s)
on a dataset with "-Ededupe -C8192".

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230922183055.1583756-1-hsiangkao@linux.alibaba.com
include/erofs/defs.h
include/erofs/xxhash.h [deleted file]
lib/Makefile.am
lib/dedupe.c
lib/xattr.c
lib/xxhash.c
lib/xxhash.h [new file with mode: 0644]

index fefa7e7bd708eb69813239c9b967e753f6463b0a..e7384a143f9b27e046dec2ec9830bf26b9c8d930 100644 (file)
@@ -204,6 +204,11 @@ static inline void put_unaligned_le32(u32 val, void *p)
        __put_unaligned_t(__le32, cpu_to_le32(val), p);
 }
 
+static inline u32 get_unaligned_le64(const void *p)
+{
+       return le64_to_cpu(__get_unaligned_t(__le64, p));
+}
+
 /**
  * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
  * @n - parameter
diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
deleted file mode 100644 (file)
index 5441209..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
-#ifndef __EROFS_XXHASH_H
-#define __EROFS_XXHASH_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include <stdint.h>
-
-/**
- * xxh32() - calculate the 32-bit hash of the input with a given seed.
- *
- * @input:  The data to hash.
- * @length: The length of the data to hash.
- * @seed:   The seed can be used to alter the result predictably.
- *
- * Return:  The 32-bit hash of the data.
- */
-uint32_t xxh32(const void *input, size_t length, uint32_t seed);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
index 483d4108cf49e852abee5265bff632f4f5083cdf..470e09583f7391025abfddc3923ec744b1ea742a 100644 (file)
@@ -25,9 +25,9 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/xattr.h \
       $(top_srcdir)/include/erofs/compress_hints.h \
       $(top_srcdir)/include/erofs/fragments.h \
-      $(top_srcdir)/include/erofs/xxhash.h \
       $(top_srcdir)/include/erofs/rebuild.h \
-      $(top_srcdir)/lib/liberofs_private.h
+      $(top_srcdir)/lib/liberofs_private.h \
+      $(top_srcdir)/lib/xxhash.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
index 17da452c4f8f6a27552c15539059d1d0594ab7f4..2f86f8d2337fb06977f64c9b70ad36a2533cb566 100644 (file)
@@ -6,6 +6,7 @@
 #include "erofs/print.h"
 #include "rb_tree.h"
 #include "rolling_hash.h"
+#include "xxhash.h"
 #include "sha256.h"
 
 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
@@ -66,6 +67,7 @@ static struct rb_tree *dedupe_tree, *dedupe_subtree;
 struct z_erofs_dedupe_item {
        long long       hash;
        u8              prefix_sha256[32];
+       u64             prefix_xxh64;
 
        erofs_blk_t     compressed_blkaddr;
        unsigned int    compressed_blks;
@@ -102,6 +104,7 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
        for (; cur >= ctx->start; --cur) {
                struct z_erofs_dedupe_item *e;
                unsigned int extra;
+               u64 xxh64_csum;
                u8 sha256[32];
 
                if (initial) {
@@ -120,6 +123,10 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
                                continue;
                }
 
+               xxh64_csum = xxh64(cur, window_size, 0);
+               if (e->prefix_xxh64 != xxh64_csum)
+                       continue;
+
                erofs_sha256(cur, window_size, sha256);
                if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
                        continue;
@@ -155,6 +162,8 @@ int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
 
        di->original_length = e->length;
        erofs_sha256(original_data, window_size, di->prefix_sha256);
+
+       di->prefix_xxh64 = xxh64(original_data, window_size, 0);
        di->hash = erofs_rolling_hash_init(original_data,
                        window_size, true);
        memcpy(di->extra_data, original_data + window_size,
index 6c8ebf4a61816bc937b65e3a32346bdb41692db0..77c8c3a4492bb385576292b869f4f357fabcef75 100644 (file)
@@ -18,7 +18,7 @@
 #include "erofs/cache.h"
 #include "erofs/io.h"
 #include "erofs/fragments.h"
-#include "erofs/xxhash.h"
+#include "xxhash.h"
 #include "liberofs_private.h"
 
 #ifndef XATTR_SYSTEM_PREFIX
index 7289c77fc2ea0850ac00829cc2001e7ef8bc084a..2768375da31a458211f8721ed2487f19f286642f 100644 (file)
  * - xxHash homepage: https://cyan4973.github.io/xxHash/
  * - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
-
 #include "erofs/defs.h"
-#include "erofs/xxhash.h"
+#include "xxhash.h"
 
 /*-*************************************
  * Macros
  **************************************/
 #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
 
 /*-*************************************
  * Constants
@@ -61,6 +61,12 @@ static const uint32_t PRIME32_3 = 3266489917U;
 static const uint32_t PRIME32_4 =  668265263U;
 static const uint32_t PRIME32_5 =  374761393U;
 
+static const uint64_t PRIME64_1 = 11400714785074694791ULL;
+static const uint64_t PRIME64_2 = 14029467366897019727ULL;
+static const uint64_t PRIME64_3 =  1609587929392839161ULL;
+static const uint64_t PRIME64_4 =  9650029242287828579ULL;
+static const uint64_t PRIME64_5 =  2870177450012600261ULL;
+
 /*-***************************
  * Simple Hash Functions
  ****************************/
@@ -124,3 +130,85 @@ uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
 
        return h32;
 }
+
+static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
+{
+       acc += input * PRIME64_2;
+       acc = xxh_rotl64(acc, 31);
+       acc *= PRIME64_1;
+       return acc;
+}
+
+static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
+{
+       val = xxh64_round(0, val);
+       acc ^= val;
+       acc = acc * PRIME64_1 + PRIME64_4;
+       return acc;
+}
+
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
+{
+       const uint8_t *p = (const uint8_t *)input;
+       const uint8_t *const b_end = p + len;
+       uint64_t h64;
+
+       if (len >= 32) {
+               const uint8_t *const limit = b_end - 32;
+               uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
+               uint64_t v2 = seed + PRIME64_2;
+               uint64_t v3 = seed + 0;
+               uint64_t v4 = seed - PRIME64_1;
+
+               do {
+                       v1 = xxh64_round(v1, get_unaligned_le64(p));
+                       p += 8;
+                       v2 = xxh64_round(v2, get_unaligned_le64(p));
+                       p += 8;
+                       v3 = xxh64_round(v3, get_unaligned_le64(p));
+                       p += 8;
+                       v4 = xxh64_round(v4, get_unaligned_le64(p));
+                       p += 8;
+               } while (p <= limit);
+
+               h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+                       xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+               h64 = xxh64_merge_round(h64, v1);
+               h64 = xxh64_merge_round(h64, v2);
+               h64 = xxh64_merge_round(h64, v3);
+               h64 = xxh64_merge_round(h64, v4);
+
+       } else {
+               h64  = seed + PRIME64_5;
+       }
+
+       h64 += (uint64_t)len;
+
+       while (p + 8 <= b_end) {
+               const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+               h64 ^= k1;
+               h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+               p += 8;
+       }
+
+       if (p + 4 <= b_end) {
+               h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+               h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+               p += 4;
+       }
+
+       while (p < b_end) {
+               h64 ^= (*p) * PRIME64_5;
+               h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+               p++;
+       }
+
+       h64 ^= h64 >> 33;
+       h64 *= PRIME64_2;
+       h64 ^= h64 >> 29;
+       h64 *= PRIME64_3;
+       h64 ^= h64 >> 32;
+
+       return h64;
+}
diff --git a/lib/xxhash.h b/lib/xxhash.h
new file mode 100644 (file)
index 0000000..723c3a5
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
+#ifndef __EROFS_LIB_XXHASH_H
+#define __EROFS_LIB_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stdint.h>
+
+/*
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ * @seed:   The seed can be used to alter the result predictably.
+ *
+ * Return:  The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+/*
+ * xxh64() - calculate the 64-bit hash of the input with a given seed.
+ *
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ * @seed:   The seed can be used to alter the result predictably.
+ *
+ * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
+ *
+ * Return:  The 64-bit hash of the data.
+ */
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif