erofs-utils: add xxh32 library
authorJingbo Xu <jefflexu@linux.alibaba.com>
Tue, 29 Aug 2023 14:55:02 +0000 (22:55 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Tue, 29 Aug 2023 17:26:42 +0000 (01:26 +0800)
Add xxh32 library which could be used by following xattr bloom filter
feature.

Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230829145504.93567-2-jefflexu@linux.alibaba.com
[ Gao Xiang: use `BSD-2-Clause OR GPL-2.0+` for "erofs/xxhash.h". ]
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
include/erofs/xxhash.h [new file with mode: 0644]
lib/Makefile.am
lib/xxhash.c [new file with mode: 0644]

diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
new file mode 100644 (file)
index 0000000..5441209
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
+#ifndef __EROFS_XXHASH_H
+#define __EROFS_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stdint.h>
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input:  The data to hash.
+ * @length: The length of the data to hash.
+ * @seed:   The seed can be used to alter the result predictably.
+ *
+ * Return:  The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 7a5dc03a8f1a3301811abf5d4bf20b01a0aac4d9..3e0951647173e435ed1a09539e2f7761430d2b60 100644 (file)
@@ -24,6 +24,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/xattr.h \
       $(top_srcdir)/include/erofs/compress_hints.h \
       $(top_srcdir)/include/erofs/fragments.h \
+      $(top_srcdir)/include/erofs/xxhash.h \
       $(top_srcdir)/lib/liberofs_private.h
 
 noinst_HEADERS += compressor.h
@@ -31,7 +32,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
                      namei.c data.c compress.c compressor.c zmap.c decompress.c \
                      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
                      fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c \
-                     block_list.c
+                     block_list.c xxhash.c
 
 liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
 if ENABLE_LZ4
diff --git a/lib/xxhash.c b/lib/xxhash.c
new file mode 100644 (file)
index 0000000..7289c77
--- /dev/null
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only
+/*
+ * The xxhash is copied from the linux kernel at:
+ *     https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/xxhash.c
+ *
+ * The original copyright is:
+ *
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following disclaimer
+ *     in the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#include "erofs/defs.h"
+#include "erofs/xxhash.h"
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 =  668265263U;
+static const uint32_t PRIME32_5 =  374761393U;
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+       seed += input * PRIME32_2;
+       seed = xxh_rotl32(seed, 13);
+       seed *= PRIME32_1;
+       return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+       const uint8_t *p = (const uint8_t *)input;
+       const uint8_t *b_end = p + len;
+       uint32_t h32;
+
+       if (len >= 16) {
+               const uint8_t *const limit = b_end - 16;
+               uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+               uint32_t v2 = seed + PRIME32_2;
+               uint32_t v3 = seed + 0;
+               uint32_t v4 = seed - PRIME32_1;
+
+               do {
+                       v1 = xxh32_round(v1, get_unaligned_le32(p));
+                       p += 4;
+                       v2 = xxh32_round(v2, get_unaligned_le32(p));
+                       p += 4;
+                       v3 = xxh32_round(v3, get_unaligned_le32(p));
+                       p += 4;
+                       v4 = xxh32_round(v4, get_unaligned_le32(p));
+                       p += 4;
+               } while (p <= limit);
+
+               h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+                       xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+       } else {
+               h32 = seed + PRIME32_5;
+       }
+
+       h32 += (uint32_t)len;
+
+       while (p + 4 <= b_end) {
+               h32 += get_unaligned_le32(p) * PRIME32_3;
+               h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+               p += 4;
+       }
+
+       while (p < b_end) {
+               h32 += (*p) * PRIME32_5;
+               h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+               p++;
+       }
+
+       h32 ^= h32 >> 15;
+       h32 *= PRIME32_2;
+       h32 ^= h32 >> 13;
+       h32 *= PRIME32_3;
+       h32 ^= h32 >> 16;
+
+       return h32;
+}