From 672a4fb4ea99cb54d4099c3df8431a1edbdc5224 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 23 Sep 2023 02:30:53 +0800 Subject: [PATCH] erofs-utils: lib: use xxh64() for faster filtering first for dedupe Let's check if xxh64 equals when rolling back on global compressed deduplication. As a result, it could decrease time by 26.4% (6m57.990s -> 5m7.755s) on a dataset with "-Ededupe -C8192". Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230922183055.1583756-1-hsiangkao@linux.alibaba.com --- include/erofs/defs.h | 5 +++ include/erofs/xxhash.h | 27 ------------- lib/Makefile.am | 4 +- lib/dedupe.c | 9 +++++ lib/xattr.c | 2 +- lib/xxhash.c | 92 +++++++++++++++++++++++++++++++++++++++++- lib/xxhash.h | 40 ++++++++++++++++++ 7 files changed, 147 insertions(+), 32 deletions(-) delete mode 100644 include/erofs/xxhash.h create mode 100644 lib/xxhash.h diff --git a/include/erofs/defs.h b/include/erofs/defs.h index fefa7e7..e7384a1 100644 --- a/include/erofs/defs.h +++ b/include/erofs/defs.h @@ -204,6 +204,11 @@ static inline void put_unaligned_le32(u32 val, void *p) __put_unaligned_t(__le32, cpu_to_le32(val), p); } +static inline u32 get_unaligned_le64(const void *p) +{ + return le64_to_cpu(__get_unaligned_t(__le64, p)); +} + /** * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value * @n - parameter diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h deleted file mode 100644 index 5441209..0000000 --- a/include/erofs/xxhash.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */ -#ifndef __EROFS_XXHASH_H -#define __EROFS_XXHASH_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include - -/** - * xxh32() - calculate the 32-bit hash of the input with a given seed. - * - * @input: The data to hash. - * @length: The length of the data to hash. - * @seed: The seed can be used to alter the result predictably. - * - * Return: The 32-bit hash of the data. - */ -uint32_t xxh32(const void *input, size_t length, uint32_t seed); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/Makefile.am b/lib/Makefile.am index 483d410..470e095 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -25,9 +25,9 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \ $(top_srcdir)/include/erofs/xattr.h \ $(top_srcdir)/include/erofs/compress_hints.h \ $(top_srcdir)/include/erofs/fragments.h \ - $(top_srcdir)/include/erofs/xxhash.h \ $(top_srcdir)/include/erofs/rebuild.h \ - $(top_srcdir)/lib/liberofs_private.h + $(top_srcdir)/lib/liberofs_private.h \ + $(top_srcdir)/lib/xxhash.h noinst_HEADERS += compressor.h liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \ diff --git a/lib/dedupe.c b/lib/dedupe.c index 17da452..2f86f8d 100644 --- a/lib/dedupe.c +++ b/lib/dedupe.c @@ -6,6 +6,7 @@ #include "erofs/print.h" #include "rb_tree.h" #include "rolling_hash.h" +#include "xxhash.h" #include "sha256.h" unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2, @@ -66,6 +67,7 @@ static struct rb_tree *dedupe_tree, *dedupe_subtree; struct z_erofs_dedupe_item { long long hash; u8 prefix_sha256[32]; + u64 prefix_xxh64; erofs_blk_t compressed_blkaddr; unsigned int compressed_blks; @@ -102,6 +104,7 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx) for (; cur >= ctx->start; --cur) { struct z_erofs_dedupe_item *e; unsigned int extra; + u64 xxh64_csum; u8 sha256[32]; if (initial) { @@ -120,6 +123,10 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx) continue; } + xxh64_csum = xxh64(cur, window_size, 0); + if (e->prefix_xxh64 != xxh64_csum) + continue; + erofs_sha256(cur, window_size, sha256); if (memcmp(sha256, e->prefix_sha256, sizeof(sha256))) continue; @@ -155,6 +162,8 @@ int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e, di->original_length = e->length; erofs_sha256(original_data, window_size, di->prefix_sha256); + + di->prefix_xxh64 = xxh64(original_data, window_size, 0); di->hash = erofs_rolling_hash_init(original_data, window_size, true); memcpy(di->extra_data, original_data + window_size, diff --git a/lib/xattr.c b/lib/xattr.c index 6c8ebf4..77c8c3a 100644 --- a/lib/xattr.c +++ b/lib/xattr.c @@ -18,7 +18,7 @@ #include "erofs/cache.h" #include "erofs/io.h" #include "erofs/fragments.h" -#include "erofs/xxhash.h" +#include "xxhash.h" #include "liberofs_private.h" #ifndef XATTR_SYSTEM_PREFIX diff --git a/lib/xxhash.c b/lib/xxhash.c index 7289c77..2768375 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -43,14 +43,14 @@ * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ - #include "erofs/defs.h" -#include "erofs/xxhash.h" +#include "xxhash.h" /*-************************************* * Macros **************************************/ #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) +#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r))) /*-************************************* * Constants @@ -61,6 +61,12 @@ static const uint32_t PRIME32_3 = 3266489917U; static const uint32_t PRIME32_4 = 668265263U; static const uint32_t PRIME32_5 = 374761393U; +static const uint64_t PRIME64_1 = 11400714785074694791ULL; +static const uint64_t PRIME64_2 = 14029467366897019727ULL; +static const uint64_t PRIME64_3 = 1609587929392839161ULL; +static const uint64_t PRIME64_4 = 9650029242287828579ULL; +static const uint64_t PRIME64_5 = 2870177450012600261ULL; + /*-*************************** * Simple Hash Functions ****************************/ @@ -124,3 +130,85 @@ uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) return h32; } + +static uint64_t xxh64_round(uint64_t acc, const uint64_t input) +{ + acc += input * PRIME64_2; + acc = xxh_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val) +{ + val = xxh64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +uint64_t xxh64(const void *input, const size_t len, const uint64_t seed) +{ + const uint8_t *p = (const uint8_t *)input; + const uint8_t *const b_end = p + len; + uint64_t h64; + + if (len >= 32) { + const uint8_t *const limit = b_end - 32; + uint64_t v1 = seed + PRIME64_1 + PRIME64_2; + uint64_t v2 = seed + PRIME64_2; + uint64_t v3 = seed + 0; + uint64_t v4 = seed - PRIME64_1; + + do { + v1 = xxh64_round(v1, get_unaligned_le64(p)); + p += 8; + v2 = xxh64_round(v2, get_unaligned_le64(p)); + p += 8; + v3 = xxh64_round(v3, get_unaligned_le64(p)); + p += 8; + v4 = xxh64_round(v4, get_unaligned_le64(p)); + p += 8; + } while (p <= limit); + + h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); + h64 = xxh64_merge_round(h64, v1); + h64 = xxh64_merge_round(h64, v2); + h64 = xxh64_merge_round(h64, v3); + h64 = xxh64_merge_round(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (uint64_t)len; + + while (p + 8 <= b_end) { + const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); + + h64 ^= k1; + h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; + p += 8; + } + + if (p + 4 <= b_end) { + h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; + h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p += 4; + } + + while (p < b_end) { + h64 ^= (*p) * PRIME64_5; + h64 = xxh_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} diff --git a/lib/xxhash.h b/lib/xxhash.h new file mode 100644 index 0000000..723c3a5 --- /dev/null +++ b/lib/xxhash.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */ +#ifndef __EROFS_LIB_XXHASH_H +#define __EROFS_LIB_XXHASH_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include + +/* + * xxh32() - calculate the 32-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * Return: The 32-bit hash of the data. + */ +uint32_t xxh32(const void *input, size_t length, uint32_t seed); + +/* + * xxh64() - calculate the 64-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems. + * + * Return: The 64-bit hash of the data. + */ +uint64_t xxh64(const void *input, const size_t len, const uint64_t seed); + +#ifdef __cplusplus +} +#endif + +#endif -- 2.34.1