From 7f20e2a8d2ba73a8b05742903f4ae42afec82bff Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Tue, 14 Mar 2023 12:46:21 +0000 Subject: [PATCH] [libc][NFC] Move memcmp implementations in subfolders --- libc/src/string/memory_utils/CMakeLists.txt | 6 +- .../memory_utils/aarch64/memcmp_implementations.h | 70 ++++++++++++ .../string/memory_utils/memcmp_implementations.h | 120 ++------------------- .../memory_utils/x86_64/memcmp_implementations.h | 96 +++++++++++++++++ utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 6 +- 5 files changed, 183 insertions(+), 115 deletions(-) create mode 100644 libc/src/string/memory_utils/aarch64/memcmp_implementations.h create mode 100644 libc/src/string/memory_utils/x86_64/memcmp_implementations.h diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt index 5210958..30123b0 100644 --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -2,12 +2,12 @@ add_header_library( memory_utils HDRS + aarch64/memcmp_implementations.h + aarch64/memcpy_implementations.h bcmp_implementations.h bzero_implementations.h memcmp_implementations.h memcpy_implementations.h - aarch64/memcpy_implementations.h - x86_64/memcpy_implementations.h memmove_implementations.h memset_implementations.h op_aarch64.h @@ -15,6 +15,8 @@ add_header_library( op_generic.h op_x86.h utils.h + x86_64/memcmp_implementations.h + x86_64/memcpy_implementations.h DEPS libc.src.__support.CPP.bit libc.src.__support.CPP.cstddef diff --git a/libc/src/string/memory_utils/aarch64/memcmp_implementations.h b/libc/src/string/memory_utils/aarch64/memcmp_implementations.h new file mode 100644 index 0000000..2c9308d --- /dev/null +++ b/libc/src/string/memory_utils/aarch64/memcmp_implementations.h @@ -0,0 +1,70 @@ +//===-- Memcmp implementation for aarch64 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMCMP_IMPLEMENTATIONS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMCMP_IMPLEMENTATIONS_H + +#include "src/__support/macros/config.h" // LIBC_INLINE +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/string/memory_utils/op_aarch64.h" +#include "src/string/memory_utils/op_generic.h" +#include "src/string/memory_utils/utils.h" // MemcmpReturnType + +namespace __llvm_libc { + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { + if (LIBC_UNLIKELY(count >= 384)) { + if (auto value = generic::Memcmp<16>::block(p1, p2)) + return value; + align_to_next_boundary<16, Arg::P1>(p1, p2, count); + } + return generic::Memcmp<16>::loop_and_tail(p1, p2, count); +} + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) { + if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞] + if (auto value = generic::Memcmp<16>::block(p1, p2)) + return value; + align_to_next_boundary<16, Arg::P1>(p1, p2, count); + return generic::Memcmp<32>::loop_and_tail(p1, p2, count); + } + if (generic::Bcmp<16>::block(p1, p2)) // [16, 16] + return generic::Memcmp<16>::block(p1, p2); + if (count < 32) // [17, 31] + return generic::Memcmp<16>::tail(p1, p2, count); + if (generic::Bcmp<16>::block(p1 + 16, p2 + 16)) // [32, 32] + return generic::Memcmp<16>::block(p1 + 16, p2 + 16); + if (count < 64) // [33, 63] + return generic::Memcmp<32>::tail(p1, p2, count); + // [64, 127] + return generic::Memcmp<16>::loop_and_tail(p1 + 32, p2 + 32, count - 32); +} + +LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2, + size_t count) { + if (count == 0) + return MemcmpReturnType::ZERO(); + if (count == 1) + return generic::Memcmp<1>::block(p1, p2); + if (count == 2) + return generic::Memcmp<2>::block(p1, p2); + if (count == 3) + return generic::Memcmp<3>::block(p1, p2); + if (count <= 8) + return generic::Memcmp<4>::head_tail(p1, p2, count); + if (count <= 16) + return generic::Memcmp<8>::head_tail(p1, p2, count); + if constexpr (aarch64::kNeon) + return inline_memcmp_aarch64_neon_gt16(p1, p2, count); + else + return inline_memcmp_generic_gt16(p1, p2, count); +} +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMCMP_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h index 01c524d..9ae994f 100644 --- a/libc/src/string/memory_utils/memcmp_implementations.h +++ b/libc/src/string/memory_utils/memcmp_implementations.h @@ -12,15 +12,17 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" -#include "src/string/memory_utils/op_aarch64.h" -#include "src/string/memory_utils/op_builtin.h" -#include "src/string/memory_utils/op_generic.h" -#include "src/string/memory_utils/op_x86.h" -#include "src/string/memory_utils/utils.h" #include // size_t +#if defined(LIBC_TARGET_ARCH_IS_X86) +#include "src/string/memory_utils/x86_64/memcmp_implementations.h" +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#include "src/string/memory_utils/aarch64/memcmp_implementations.h" +#endif + namespace __llvm_libc { + [[maybe_unused]] LIBC_INLINE MemcmpReturnType inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) { LIBC_LOOP_NOUNROLL @@ -30,115 +32,11 @@ inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) { return MemcmpReturnType::ZERO(); } -#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64) -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { - if (LIBC_UNLIKELY(count >= 384)) { - if (auto value = generic::Memcmp<16>::block(p1, p2)) - return value; - align_to_next_boundary<16, Arg::P1>(p1, p2, count); - } - return generic::Memcmp<16>::loop_and_tail(p1, p2, count); -} -#endif // defined(LIBC_TARGET_ARCH_IS_X86) || - // defined(LIBC_TARGET_ARCH_IS_AARCH64) - -#if defined(LIBC_TARGET_ARCH_IS_X86) -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) { - if (LIBC_UNLIKELY(count >= 384)) { - if (auto value = x86::sse2::Memcmp<16>::block(p1, p2)) - return value; - align_to_next_boundary<16, Arg::P1>(p1, p2, count); - } - return x86::sse2::Memcmp<16>::loop_and_tail(p1, p2, count); -} - -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) { - if (count <= 32) - return x86::sse2::Memcmp<16>::head_tail(p1, p2, count); - if (count <= 64) - return x86::avx2::Memcmp<32>::head_tail(p1, p2, count); - if (count <= 128) - return x86::avx2::Memcmp<64>::head_tail(p1, p2, count); - if (LIBC_UNLIKELY(count >= 384)) { - if (auto value = x86::avx2::Memcmp<32>::block(p1, p2)) - return value; - align_to_next_boundary<32, Arg::P1>(p1, p2, count); - } - return x86::avx2::Memcmp<32>::loop_and_tail(p1, p2, count); -} - -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) { - if (count <= 32) - return x86::sse2::Memcmp<16>::head_tail(p1, p2, count); - if (count <= 64) - return x86::avx2::Memcmp<32>::head_tail(p1, p2, count); - if (count <= 128) - return x86::avx512bw::Memcmp<64>::head_tail(p1, p2, count); - if (LIBC_UNLIKELY(count >= 384)) { - if (auto value = x86::avx512bw::Memcmp<64>::block(p1, p2)) - return value; - align_to_next_boundary<64, Arg::P1>(p1, p2, count); - } - return x86::avx512bw::Memcmp<64>::loop_and_tail(p1, p2, count); -} - -#endif // defined(LIBC_TARGET_ARCH_IS_X86) - -#if defined(LIBC_TARGET_ARCH_IS_AARCH64) -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) { - if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞] - if (auto value = generic::Memcmp<16>::block(p1, p2)) - return value; - align_to_next_boundary<16, Arg::P1>(p1, p2, count); - return generic::Memcmp<32>::loop_and_tail(p1, p2, count); - } - if (generic::Bcmp<16>::block(p1, p2)) // [16, 16] - return generic::Memcmp<16>::block(p1, p2); - if (count < 32) // [17, 31] - return generic::Memcmp<16>::tail(p1, p2, count); - if (generic::Bcmp<16>::block(p1 + 16, p2 + 16)) // [32, 32] - return generic::Memcmp<16>::block(p1 + 16, p2 + 16); - if (count < 64) // [33, 63] - return generic::Memcmp<32>::tail(p1, p2, count); - // [64, 127] - return generic::Memcmp<16>::loop_and_tail(p1 + 32, p2 + 32, count - 32); -} -#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64) - LIBC_INLINE MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) { -#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64) - if (count == 0) - return MemcmpReturnType::ZERO(); - if (count == 1) - return generic::Memcmp<1>::block(p1, p2); - if (count == 2) - return generic::Memcmp<2>::block(p1, p2); - if (count == 3) - return generic::Memcmp<3>::block(p1, p2); - if (count <= 8) - return generic::Memcmp<4>::head_tail(p1, p2, count); - if (count <= 16) - return generic::Memcmp<8>::head_tail(p1, p2, count); #if defined(LIBC_TARGET_ARCH_IS_X86) - if constexpr (x86::kAvx512BW) - return inline_memcmp_x86_avx512bw_gt16(p1, p2, count); - else if constexpr (x86::kAvx2) - return inline_memcmp_x86_avx2_gt16(p1, p2, count); - else if constexpr (x86::kSse2) - return inline_memcmp_x86_sse2_gt16(p1, p2, count); - else - return inline_memcmp_generic_gt16(p1, p2, count); + return inline_memcmp_x86(p1, p2, count); #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) - if constexpr (aarch64::kNeon) - return inline_memcmp_aarch64_neon_gt16(p1, p2, count); - else - return inline_memcmp_generic_gt16(p1, p2, count); -#endif + return inline_memcmp_aarch64(p1, p2, count); #else return inline_memcmp_embedded_tiny(p1, p2, count); #endif diff --git a/libc/src/string/memory_utils/x86_64/memcmp_implementations.h b/libc/src/string/memory_utils/x86_64/memcmp_implementations.h new file mode 100644 index 0000000..26de1d9 --- /dev/null +++ b/libc/src/string/memory_utils/x86_64/memcmp_implementations.h @@ -0,0 +1,96 @@ +//===-- Memcmp implementation for x86_64 ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMCMP_IMPLEMENTATIONS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMCMP_IMPLEMENTATIONS_H + +#include "src/__support/macros/config.h" // LIBC_INLINE +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/string/memory_utils/op_generic.h" +#include "src/string/memory_utils/op_x86.h" +#include "src/string/memory_utils/utils.h" // MemcmpReturnType + +namespace __llvm_libc { + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { + if (LIBC_UNLIKELY(count >= 384)) { + if (auto value = generic::Memcmp<16>::block(p1, p2)) + return value; + align_to_next_boundary<16, Arg::P1>(p1, p2, count); + } + return generic::Memcmp<16>::loop_and_tail(p1, p2, count); +} + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) { + if (LIBC_UNLIKELY(count >= 384)) { + if (auto value = x86::sse2::Memcmp<16>::block(p1, p2)) + return value; + align_to_next_boundary<16, Arg::P1>(p1, p2, count); + } + return x86::sse2::Memcmp<16>::loop_and_tail(p1, p2, count); +} + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) { + if (count <= 32) + return x86::sse2::Memcmp<16>::head_tail(p1, p2, count); + if (count <= 64) + return x86::avx2::Memcmp<32>::head_tail(p1, p2, count); + if (count <= 128) + return x86::avx2::Memcmp<64>::head_tail(p1, p2, count); + if (LIBC_UNLIKELY(count >= 384)) { + if (auto value = x86::avx2::Memcmp<32>::block(p1, p2)) + return value; + align_to_next_boundary<32, Arg::P1>(p1, p2, count); + } + return x86::avx2::Memcmp<32>::loop_and_tail(p1, p2, count); +} + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) { + if (count <= 32) + return x86::sse2::Memcmp<16>::head_tail(p1, p2, count); + if (count <= 64) + return x86::avx2::Memcmp<32>::head_tail(p1, p2, count); + if (count <= 128) + return x86::avx512bw::Memcmp<64>::head_tail(p1, p2, count); + if (LIBC_UNLIKELY(count >= 384)) { + if (auto value = x86::avx512bw::Memcmp<64>::block(p1, p2)) + return value; + align_to_next_boundary<64, Arg::P1>(p1, p2, count); + } + return x86::avx512bw::Memcmp<64>::loop_and_tail(p1, p2, count); +} + +LIBC_INLINE MemcmpReturnType inline_memcmp_x86(CPtr p1, CPtr p2, size_t count) { + + if (count == 0) + return MemcmpReturnType::ZERO(); + if (count == 1) + return generic::Memcmp<1>::block(p1, p2); + if (count == 2) + return generic::Memcmp<2>::block(p1, p2); + if (count == 3) + return generic::Memcmp<3>::block(p1, p2); + if (count <= 8) + return generic::Memcmp<4>::head_tail(p1, p2, count); + if (count <= 16) + return generic::Memcmp<8>::head_tail(p1, p2, count); + if constexpr (x86::kAvx512BW) + return inline_memcmp_x86_avx512bw_gt16(p1, p2, count); + else if constexpr (x86::kAvx2) + return inline_memcmp_x86_avx2_gt16(p1, p2, count); + else if constexpr (x86::kSse2) + return inline_memcmp_x86_sse2_gt16(p1, p2, count); + else + return inline_memcmp_generic_gt16(p1, p2, count); +} +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMCMP_IMPLEMENTATIONS_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index ab797c3..23c288d 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1487,16 +1487,18 @@ libc_support_library( "src/string/memory_utils/utils.h", ], textual_hdrs = [ + "src/string/memory_utils/aarch64/memcmp_implementations.h", + "src/string/memory_utils/aarch64/memcpy_implementations.h", "src/string/memory_utils/bcmp_implementations.h", "src/string/memory_utils/bzero_implementations.h", "src/string/memory_utils/memcmp_implementations.h", "src/string/memory_utils/memcpy_implementations.h", - "src/string/memory_utils/aarch64/memcpy_implementations.h", - "src/string/memory_utils/x86_64/memcpy_implementations.h", "src/string/memory_utils/memmove_implementations.h", "src/string/memory_utils/memset_implementations.h", "src/string/memory_utils/strcmp_implementations.h", "src/string/memory_utils/strstr_implementations.h", + "src/string/memory_utils/x86_64/memcmp_implementations.h", + "src/string/memory_utils/x86_64/memcpy_implementations.h", ], deps = [ ":__support_common", -- 2.7.4