From ab45c1f21f63bfd0acb9e27a626ab33659918868 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Mon, 14 Jun 2021 09:33:39 +0000 Subject: [PATCH] Revert "[libc] Add a set of elementary operations" This reverts commit e63f27a3cf8129cb66b8350ad50bf19633554a6b. --- libc/src/string/CMakeLists.txt | 2 +- libc/src/string/aarch64/memcpy.cpp | 31 +- libc/src/string/memcpy.cpp | 25 +- libc/src/string/memory_utils/CMakeLists.txt | 3 +- libc/src/string/memory_utils/elements.h | 475 --------------------- libc/src/string/memory_utils/elements_x86.h | 151 ------- libc/src/string/memory_utils/memcpy_utils.h | 140 ++++++ libc/src/string/memory_utils/memset_utils.h | 83 +++- libc/src/string/x86_64/memcpy.cpp | 35 +- libc/test/src/string/memory_utils/CMakeLists.txt | 5 +- .../test/src/string/memory_utils/elements_test.cpp | 103 ----- .../src/string/memory_utils/memcpy_utils_test.cpp | 336 +++++++++++++++ .../src/string/memory_utils/memory_access_test.cpp | 231 ---------- 13 files changed, 584 insertions(+), 1036 deletions(-) delete mode 100644 libc/src/string/memory_utils/elements.h delete mode 100644 libc/src/string/memory_utils/elements_x86.h create mode 100644 libc/src/string/memory_utils/memcpy_utils.h delete mode 100644 libc/test/src/string/memory_utils/elements_test.cpp create mode 100644 libc/test/src/string/memory_utils/memcpy_utils_test.cpp delete mode 100644 libc/test/src/string/memory_utils/memory_access_test.cpp diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 4dd8ee0..f7a0406 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -194,7 +194,7 @@ function(add_implementation name impl_name) SRCS ${ADD_IMPL_SRCS} HDRS ${ADD_IMPL_HDRS} DEPENDS ${ADD_IMPL_DEPENDS} - COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} "SHELL:-mllvm -combiner-global-alias-analysis" + COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ) get_fq_target_name(${impl_name} fq_target_name) set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}") diff --git a/libc/src/string/aarch64/memcpy.cpp b/libc/src/string/aarch64/memcpy.cpp index 1a1fbbc..78988ec 100644 --- a/libc/src/string/aarch64/memcpy.cpp +++ b/libc/src/string/aarch64/memcpy.cpp @@ -8,19 +8,10 @@ #include "src/string/memcpy.h" #include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/memcpy_utils.h" namespace __llvm_libc { -using _1 = scalar::UINT8; -using _2 = scalar::UINT16; -using _3 = Chained; -using _4 = scalar::UINT32; -using _8 = scalar::UINT64; -using _16 = Repeated; -using _32 = Repeated; -using _64 = Repeated; - // Design rationale // ================ // @@ -46,24 +37,24 @@ static void memcpy_aarch64(char *__restrict dst, const char *__restrict src, if (count == 0) return; if (count == 1) - return Copy<_1>(dst, src); + return CopyBlock<1>(dst, src); if (count == 2) - return Copy<_2>(dst, src); + return CopyBlock<2>(dst, src); if (count == 3) - return Copy<_3>(dst, src); + return CopyBlock<3>(dst, src); if (count == 4) - return Copy<_4>(dst, src); + return CopyBlock<4>(dst, src); if (count < 8) - return Copy>(dst, src, count); + return CopyBlockOverlap<4>(dst, src, count); if (count < 16) - return Copy>(dst, src, count); + return CopyBlockOverlap<8>(dst, src, count); if (count < 32) - return Copy>(dst, src, count); + return CopyBlockOverlap<16>(dst, src, count); if (count < 64) - return Copy>(dst, src, count); + return CopyBlockOverlap<32>(dst, src, count); if (count < 128) - return Copy>(dst, src, count); - return Copy::Then>>(dst, src, count); + return CopyBlockOverlap<64>(dst, src, count); + return CopySrcAlignedBlocks<64, 16>(dst, src, count); } LLVM_LIBC_FUNCTION(void *, memcpy, diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp index 5e70e00..e050d7f 100644 --- a/libc/src/string/memcpy.cpp +++ b/libc/src/string/memcpy.cpp @@ -8,7 +8,7 @@ #include "src/string/memcpy.h" #include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/memcpy_utils.h" namespace __llvm_libc { @@ -32,30 +32,27 @@ namespace __llvm_libc { // with little change on the code side. static void memcpy_impl(char *__restrict dst, const char *__restrict src, size_t count) { - // Use scalar strategies (_1, _2, _3 ...) - using namespace __llvm_libc::scalar; - if (count == 0) return; if (count == 1) - return Copy<_1>(dst, src); + return CopyBlock<1>(dst, src); if (count == 2) - return Copy<_2>(dst, src); + return CopyBlock<2>(dst, src); if (count == 3) - return Copy<_3>(dst, src); + return CopyBlock<3>(dst, src); if (count == 4) - return Copy<_4>(dst, src); + return CopyBlock<4>(dst, src); if (count < 8) - return Copy>(dst, src, count); + return CopyBlockOverlap<4>(dst, src, count); if (count < 16) - return Copy>(dst, src, count); + return CopyBlockOverlap<8>(dst, src, count); if (count < 32) - return Copy>(dst, src, count); + return CopyBlockOverlap<16>(dst, src, count); if (count < 64) - return Copy>(dst, src, count); + return CopyBlockOverlap<32>(dst, src, count); if (count < 128) - return Copy>(dst, src, count); - return Copy::Then>>(dst, src, count); + return CopyBlockOverlap<64>(dst, src, count); + return CopySrcAlignedBlocks<32>(dst, src, count); } LLVM_LIBC_FUNCTION(void *, memcpy, diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt index 4a550e5..327031a 100644 --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -2,5 +2,6 @@ add_header_library( memory_utils HDRS utils.h - elements.h + memcpy_utils.h + memset_utils.h ) diff --git a/libc/src/string/memory_utils/elements.h b/libc/src/string/memory_utils/elements.h deleted file mode 100644 index 90d935e..0000000 --- a/libc/src/string/memory_utils/elements.h +++ /dev/null @@ -1,475 +0,0 @@ -//===-- Elementary operations to compose memory primitives ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H - -#include // size_t -#include // uint8_t, uint16_t, uint32_t, uint64_t - -#include "src/__support/endian.h" -#include "src/string/memory_utils/utils.h" - -namespace __llvm_libc { - -// Elementary Operations -// -------------------------------- -// We define abstract elementary operations acting on fixed chunks of memory. -// These are low level building blocks that are meant to be assembled to compose -// higher order abstractions. Each function is defined twice: once with -// fixed-size operations, and once with runtime-size operations. - -// Fixed-size copies from 'src' to 'dst'. -template -void Copy(char *__restrict dst, const char *__restrict src) { - Element::Copy(dst, src); -} -// Runtime-size copies from 'src' to 'dst'. -template -void Copy(char *__restrict dst, const char *__restrict src, size_t size) { - Element::Copy(dst, src, size); -} - -// Fixed-size equality between 'lhs' and 'rhs'. -template bool Equals(const char *lhs, const char *rhs) { - return Element::Equals(lhs, rhs); -} -// Runtime-size equality between 'lhs' and 'rhs'. -template -bool Equals(const char *lhs, const char *rhs, size_t size) { - return Element::Equals(lhs, rhs, size); -} - -// Fixed-size three-way comparison between 'lhs' and 'rhs'. -template -int ThreeWayCompare(const char *lhs, const char *rhs) { - return Element::ThreeWayCompare(lhs, rhs); -} -// Runtime-size three-way comparison between 'lhs' and 'rhs'. -template -int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { - return Element::ThreeWayCompare(lhs, rhs, size); -} - -// Fixed-size initialization. -template -void SplatSet(char *dst, const unsigned char value) { - Element::SplatSet(dst, value); -} -// Runtime-size initialization. -template -void SplatSet(char *dst, const unsigned char value, size_t size) { - Element::SplatSet(dst, value, size); -} - -// Fixed-size Higher-Order Operations -// ---------------------------------- -// - Repeated: Repeat the operation several times in a row. -// - Chained: Chain the operation of several types. - -// Repeat the operation several times in a row. -template struct Repeated { - static constexpr size_t kSize = ElementCount * Element::kSize; - - static void Copy(char *__restrict dst, const char *__restrict src) { - for (size_t i = 0; i < ElementCount; ++i) { - const size_t offset = i * Element::kSize; - Element::Copy(dst + offset, src + offset); - } - } - - static bool Equals(const char *lhs, const char *rhs) { - for (size_t i = 0; i < ElementCount; ++i) { - const size_t offset = i * Element::kSize; - if (!Element::Equals(lhs + offset, rhs + offset)) - return false; - } - return true; - } - - static int ThreeWayCompare(const char *lhs, const char *rhs) { - for (size_t i = 0; i < ElementCount; ++i) { - const size_t offset = i * Element::kSize; - // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'. - if (Element::Equals(lhs + offset, rhs + offset)) - continue; - return Element::ThreeWayCompare(lhs + offset, rhs + offset); - } - return 0; - } - - static void SplatSet(char *dst, const unsigned char value) { - for (size_t i = 0; i < ElementCount; ++i) { - const size_t offset = i * Element::kSize; - Element::SplatSet(dst + offset, value); - } - } -}; - -// Chain the operation of several types. -// For instance, to handle a 3 bytes operation, one can use: -// Chained::Operation(); -template struct Chained; - -template struct Chained { - static constexpr size_t kSize = Head::kSize + Chained::kSize; - - static void Copy(char *__restrict dst, const char *__restrict src) { - Chained::Copy(dst + Head::kSize, src + Head::kSize); - __llvm_libc::Copy(dst, src); - } - - static bool Equals(const char *lhs, const char *rhs) { - if (!__llvm_libc::Equals(lhs, rhs)) - return false; - return Chained::Equals(lhs + Head::kSize, rhs + Head::kSize); - } - - static int ThreeWayCompare(const char *lhs, const char *rhs) { - if (__llvm_libc::Equals(lhs, rhs)) - return Chained::ThreeWayCompare(lhs + Head::kSize, - rhs + Head::kSize); - return __llvm_libc::ThreeWayCompare(lhs, rhs); - } - - static void SplatSet(char *dst, const unsigned char value) { - Chained::SplatSet(dst + Head::kSize, value); - __llvm_libc::SplatSet(dst, value); - } -}; - -template <> struct Chained<> { - static constexpr size_t kSize = 0; - static void Copy(char *__restrict dst, const char *__restrict src) {} - static bool Equals(const char *lhs, const char *rhs) { return true; } - static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; } - static void SplatSet(char *dst, const unsigned char value) {} -}; - -// Runtime-size Higher-Order Operations -// ------------------------------------ -// - Tail: Perform the operation on the last 'T::kSize' bytes of the buffer. -// - HeadTail: Perform the operation on the first and last 'T::kSize' bytes -// of the buffer. -// - Loop: Perform a loop of fixed-sized operations. - -// Perform the operation on the last 'T::kSize' bytes of the buffer. -// -// e.g. with -// [1234567812345678123] -// [__XXXXXXXXXXXXXX___] -// [________XXXXXXXX___] -// -// Precondition: `size >= T::kSize`. -template struct Tail { - static void Copy(char *__restrict dst, const char *__restrict src, - size_t size) { - return T::Copy(dst + offset(size), src + offset(size)); - } - - static bool Equals(const char *lhs, const char *rhs, size_t size) { - return T::Equals(lhs + offset(size), rhs + offset(size)); - } - - static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { - return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size)); - } - - static void SplatSet(char *dst, const unsigned char value, size_t size) { - return T::SplatSet(dst + offset(size), value); - } - - static size_t offset(size_t size) { return size - T::kSize; } -}; - -// Perform the operation on the first and last 'T::kSize' bytes of the buffer. -// This is useful for overlapping operations. -// -// e.g. with -// [1234567812345678123] -// [__XXXXXXXXXXXXXX___] -// [__XXXXXXXX_________] -// [________XXXXXXXX___] -// -// Precondition: `size >= T::kSize && size <= 2 x T::kSize`. -template struct HeadTail { - static void Copy(char *__restrict dst, const char *__restrict src, - size_t size) { - T::Copy(dst, src); - Tail::Copy(dst, src, size); - } - - static bool Equals(const char *lhs, const char *rhs, size_t size) { - if (!T::Equals(lhs, rhs)) - return false; - return Tail::Equals(lhs, rhs, size); - } - - static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { - if (const int result = T::ThreeWayCompare(lhs, rhs)) - return result; - return Tail::ThreeWayCompare(lhs, rhs, size); - } - - static void SplatSet(char *dst, const unsigned char value, size_t size) { - T::SplatSet(dst, value); - Tail::SplatSet(dst, value, size); - } -}; - -// Simple loop ending with a Tail operation. -// -// e.g. with -// [12345678123456781234567812345678] -// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] -// [__XXXXXXXX_______________________] -// [__________XXXXXXXX_______________] -// [__________________XXXXXXXX_______] -// [______________________XXXXXXXX___] -// -// Precondition: -// - size >= T::kSize -template struct Loop { - static void Copy(char *__restrict dst, const char *__restrict src, - size_t size) { - for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) - T::Copy(dst + offset, src + offset); - Tail::Copy(dst, src, size); - } - - static bool Equals(const char *lhs, const char *rhs, size_t size) { - for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) - if (!T::Equals(lhs + offset, rhs + offset)) - return false; - return Tail::Equals(lhs, rhs, size); - } - - static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { - for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) - if (const int result = T::ThreeWayCompare(lhs + offset, rhs + offset)) - return result; - return Tail::ThreeWayCompare(lhs, rhs, size); - } - - static void SplatSet(char *dst, const unsigned char value, size_t size) { - for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) - T::SplatSet(dst + offset, value); - Tail::SplatSet(dst, value, size); - } -}; - -enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; - -namespace internal { - -// Provides a specialized Bump function that adjusts pointers and size so first -// argument (resp. second argument) gets aligned to Alignment. -// We make sure the compiler knows about the adjusted pointer alignment. -template struct AlignHelper {}; - -template struct AlignHelper { - template - static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { - const intptr_t offset = offset_to_next_aligned(p1ref); - p1ref += offset; - p2ref += offset; - size -= offset; - p1ref = assume_aligned(p1ref); - } -}; - -template struct AlignHelper { - template - static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { - const intptr_t offset = offset_to_next_aligned(p2ref); - p1ref += offset; - p2ref += offset; - size -= offset; - p2ref = assume_aligned(p2ref); - } -}; - -} // namespace internal - -// An alignment operation that: -// - executes the 'AlignmentT' operation -// - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected -// pointer gets aligned, size is decreased accordingly. -// - calls the 'NextT' operation. -// -// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: -// Copy::Then>>(dst, src, count); -template struct Align { -private: - static constexpr size_t Alignment = AlignmentT::kSize; - static_assert(Alignment > 1, "Alignment must be more than 1"); - static_assert(is_power2(Alignment), "Alignment must be a power of 2"); - -public: - template struct Then { - static void Copy(char *__restrict dst, const char *__restrict src, - size_t size) { - AlignmentT::Copy(dst, src); - internal::AlignHelper::Bump(dst, src, size); - NextT::Copy(dst, src, size); - } - - static bool Equals(const char *lhs, const char *rhs, size_t size) { - if (!AlignmentT::Equals(lhs, rhs)) - return false; - internal::AlignHelper::Bump(lhs, rhs, size); - return NextT::Equals(lhs, rhs, size); - } - - static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { - if (const int result = AlignmentT::ThreeWayCompare(lhs, rhs)) - return result; - internal::AlignHelper::Bump(lhs, rhs, size); - return NextT::ThreeWayCompare(lhs, rhs, size); - } - - static void SplatSet(char *dst, const unsigned char value, size_t size) { - AlignmentT::SplatSet(dst, value); - char *dummy = nullptr; - internal::AlignHelper::Bump(dst, dummy, size); - NextT::SplatSet(dst, value, size); - } - }; -}; - -// Fixed-size Builtin Operations -// ----------------------------- -// Note: Do not use 'builtin' right now as it requires the implementation of the -// `_inline` versions of all the builtins. Theoretically, Clang can still turn -// them into calls to the C library leading to reentrancy problems. -namespace builtin { -template struct Builtin { - static constexpr size_t kSize = Size; - - static void Copy(char *__restrict dst, const char *__restrict src) { - __builtin_memcpy_inline(dst, src, kSize); - } - - static bool Equals(const char *lhs, const char *rhs) { - return __builtin_memcmp(lhs, rhs, kSize) == 0; - } - - static int ThreeWayCompare(const char *lhs, const char *rhs) { - return __builtin_memcmp(lhs, rhs, kSize); - } - - static void SplatSet(char *dst, const unsigned char value) { - __builtin_memset(dst, value, kSize); - } -}; - -using _1 = Builtin<1>; -using _2 = Builtin<2>; -using _3 = Builtin<3>; -using _4 = Builtin<4>; -using _8 = Builtin<8>; -using _16 = Builtin<16>; -using _32 = Builtin<32>; -using _64 = Builtin<64>; -using _128 = Builtin<128>; - -} // namespace builtin - -// Fixed-size Scalar Operations -// ---------------------------- -namespace scalar { - -// The Scalar type makes use of simple sized integers. -template struct Scalar { - static constexpr size_t kSize = sizeof(T); - - static void Copy(char *__restrict dst, const char *__restrict src) { - Store(dst, Load(src)); - } - - static bool Equals(const char *lhs, const char *rhs) { - return Load(lhs) == Load(rhs); - } - - static int ThreeWayCompare(const char *lhs, const char *rhs) { - return ScalarThreeWayCompare(Load(lhs), Load(rhs)); - } - - static void SplatSet(char *dst, const unsigned char value) { - Store(dst, GetSplattedValue(value)); - } - -private: - static T Load(const char *ptr) { - T value; - __builtin_memcpy_inline(&value, ptr, kSize); - return value; - } - static void Store(char *ptr, T value) { - __builtin_memcpy_inline(ptr, &value, kSize); - } - static T GetSplattedValue(const unsigned char value) { - return T(~0) / T(0xFF) * T(value); - } - static int ScalarThreeWayCompare(T a, T b); -}; - -template <> -inline int Scalar::ScalarThreeWayCompare(uint8_t a, uint8_t b) { - const int16_t la = Endian::ToBigEndian(a); - const int16_t lb = Endian::ToBigEndian(b); - return la - lb; -} -template <> -inline int Scalar::ScalarThreeWayCompare(uint16_t a, uint16_t b) { - const int32_t la = Endian::ToBigEndian(a); - const int32_t lb = Endian::ToBigEndian(b); - return la - lb; -} -template <> -inline int Scalar::ScalarThreeWayCompare(uint32_t a, uint32_t b) { - const int64_t la = Endian::ToBigEndian(a); - const int64_t lb = Endian::ToBigEndian(b); - if (la < lb) - return -1; - if (la > lb) - return 1; - return 0; -} -template <> -inline int Scalar::ScalarThreeWayCompare(uint64_t a, uint64_t b) { - const __int128_t la = Endian::ToBigEndian(a); - const __int128_t lb = Endian::ToBigEndian(b); - if (la < lb) - return -1; - if (la > lb) - return 1; - return 0; -} - -using UINT8 = Scalar; // 1 Byte -using UINT16 = Scalar; // 2 Bytes -using UINT32 = Scalar; // 4 Bytes -using UINT64 = Scalar; // 8 Bytes - -using _1 = UINT8; -using _2 = UINT16; -using _3 = Chained; -using _4 = UINT32; -using _8 = UINT64; -using _16 = Repeated<_8, 2>; -using _32 = Repeated<_8, 4>; -using _64 = Repeated<_8, 8>; -using _128 = Repeated<_8, 16>; - -} // namespace scalar -} // namespace __llvm_libc - -#include - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H diff --git a/libc/src/string/memory_utils/elements_x86.h b/libc/src/string/memory_utils/elements_x86.h deleted file mode 100644 index 07c178d..0000000 --- a/libc/src/string/memory_utils/elements_x86.h +++ /dev/null @@ -1,151 +0,0 @@ -//===-- Elementary operations for x86 -------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H - -#include // size_t -#include // uint8_t, uint16_t, uint32_t, uint64_t - -#ifdef __SSE2__ -#include -#endif // __SSE2__ - -#include "src/string/memory_utils/elements.h" // __llvm_libc::scalar - -// Fixed-size Vector Operations -// ---------------------------- - -namespace __llvm_libc { -namespace x86 { - -#ifdef __SSE2__ -template struct Vector : public Base { - static void Copy(char *dst, const char *src) { - Base::Store(dst, Base::Load(src)); - } - - static bool Equals(const char *a, const char *b) { - return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0; - } - - static int ThreeWayCompare(const char *a, const char *b) { - const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b)); - if (!mask) - return 0; - return CharDiff(a, b, mask); - } - - static void SplatSet(char *dst, const unsigned char value) { - Base::Store(dst, Base::GetSplattedValue(value)); - } - - static int CharDiff(const char *a, const char *b, uint64_t mask) { - const size_t diff_index = __builtin_ctzl(mask); - const int ca = (unsigned char)a[diff_index]; - const int cb = (unsigned char)b[diff_index]; - return ca - cb; - } -}; - -struct M128 { - static constexpr size_t kSize = 16; - using T = char __attribute__((__vector_size__(kSize))); - static uint16_t mask(T value) { return _mm_movemask_epi8(value); } - static uint16_t NotEqualMask(T a, T b) { return mask(a != b); } - static T Load(const char *ptr) { - return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)); - } - static void Store(char *ptr, T value) { - return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value); - } - static T GetSplattedValue(const char v) { - const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; - return splatted; - } -}; - -using Vector128 = Vector; // 16 Bytes - -#ifdef __AVX2__ -struct M256 { - static constexpr size_t kSize = 32; - using T = char __attribute__((__vector_size__(kSize))); - static uint32_t mask(T value) { return _mm256_movemask_epi8(value); } - static uint32_t NotEqualMask(T a, T b) { return mask(a != b); } - static T Load(const char *ptr) { - return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)); - } - static void Store(char *ptr, T value) { - return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value); - } - static T GetSplattedValue(const char v) { - const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, - v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; - return splatted; - } -}; - -using Vector256 = Vector; // 32 Bytes - -#if defined(__AVX512F__) and defined(__AVX512BW__) -struct M512 { - static constexpr size_t kSize = 64; - using T = char __attribute__((__vector_size__(kSize))); - static uint64_t NotEqualMask(T a, T b) { - return _mm512_cmpneq_epi8_mask(a, b); - } - static T Load(const char *ptr) { return _mm512_loadu_epi8(ptr); } - static void Store(char *ptr, T value) { - return _mm512_storeu_epi8(ptr, value); - } - static T GetSplattedValue(const char v) { - const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, - v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, - v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, - v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; - return splatted; - } -}; -using Vector512 = Vector; - -#endif // defined(__AVX512F__) and defined(__AVX512BW__) -#endif // __AVX2__ -#endif // __SSE2__ - -using _1 = __llvm_libc::scalar::_1; -using _2 = __llvm_libc::scalar::_2; -using _3 = __llvm_libc::scalar::_3; -using _4 = __llvm_libc::scalar::_4; -using _8 = __llvm_libc::scalar::_8; -#if defined(__AVX512F__) && defined(__AVX512BW__) -using _16 = __llvm_libc::x86::Vector128; -using _32 = __llvm_libc::x86::Vector256; -using _64 = __llvm_libc::x86::Vector512; -using _128 = __llvm_libc::Repeated<_64, 2>; -#elif defined(__AVX2__) -using _16 = __llvm_libc::x86::Vector128; -using _32 = __llvm_libc::x86::Vector256; -using _64 = __llvm_libc::Repeated<_32, 2>; -using _128 = __llvm_libc::Repeated<_32, 4>; -#elif defined(__SSE2__) -using _16 = __llvm_libc::x86::Vector128; -using _32 = __llvm_libc::Repeated<_16, 2>; -using _64 = __llvm_libc::Repeated<_16, 4>; -using _128 = __llvm_libc::Repeated<_16, 8>; -#else -using _16 = __llvm_libc::Repeated<_8, 2>; -using _32 = __llvm_libc::Repeated<_8, 4>; -using _64 = __llvm_libc::Repeated<_8, 8>; -using _128 = __llvm_libc::Repeated<_8, 16>; -#endif - -} // namespace x86 -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H diff --git a/libc/src/string/memory_utils/memcpy_utils.h b/libc/src/string/memory_utils/memcpy_utils.h new file mode 100644 index 0000000..23836bb --- /dev/null +++ b/libc/src/string/memory_utils/memcpy_utils.h @@ -0,0 +1,140 @@ +//===-- Memcpy utils --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H + +#include "src/__support/sanitizer.h" +#include "src/string/memory_utils/utils.h" +#include // size_t + +// __builtin_memcpy_inline guarantees to never call external functions. +// Unfortunately it is not widely available. +#ifdef __clang__ +#if __has_builtin(__builtin_memcpy_inline) +#define USE_BUILTIN_MEMCPY_INLINE +#endif +#elif defined(__GNUC__) +#define USE_BUILTIN_MEMCPY +#endif + +namespace __llvm_libc { + +// This is useful for testing. +#if defined(LLVM_LIBC_MEMCPY_MONITOR) +extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict, + const char *__restrict, size_t); +#endif + +// Copies `kBlockSize` bytes from `src` to `dst` using a for loop. +// This code requires the use of `-fno-buitin-memcpy` to prevent the compiler +// from turning the for-loop back into `__builtin_memcpy`. +template +static void ForLoopCopy(char *__restrict dst, const char *__restrict src) { + for (size_t i = 0; i < kBlockSize; ++i) + dst[i] = src[i]; +} + +// Copies `kBlockSize` bytes from `src` to `dst`. +template +static void CopyBlock(char *__restrict dst, const char *__restrict src) { +#if defined(LLVM_LIBC_MEMCPY_MONITOR) + LLVM_LIBC_MEMCPY_MONITOR(dst, src, kBlockSize); +#elif LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER + ForLoopCopy(dst, src); +#elif defined(USE_BUILTIN_MEMCPY_INLINE) + __builtin_memcpy_inline(dst, src, kBlockSize); +#elif defined(USE_BUILTIN_MEMCPY) + __builtin_memcpy(dst, src, kBlockSize); +#else + ForLoopCopy(dst, src); +#endif +} + +// Copies `kBlockSize` bytes from `src + count - kBlockSize` to +// `dst + count - kBlockSize`. +// Precondition: `count >= kBlockSize`. +template +static void CopyLastBlock(char *__restrict dst, const char *__restrict src, + size_t count) { + const size_t offset = count - kBlockSize; + CopyBlock(dst + offset, src + offset); +} + +// Copies `kBlockSize` bytes twice with an overlap between the two. +// +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `count >= kBlockSize && count <= kBlockSize`. +template +static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src, + size_t count) { + CopyBlock(dst, src); + CopyLastBlock(dst, src, count); +} + +// Copies `count` bytes by blocks of `kBlockSize` bytes. +// Copies at the start and end of the buffer are unaligned. +// Copies in the middle of the buffer are aligned to `kAlignment`. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXX___________________________] +// [_____XXXXXXXX____________________] +// [_____________XXXXXXXX____________] +// [_____________________XXXXXXXX____] +// [______________________XXXXXXXX___] +// +// Precondition: `kAlignment <= kBlockSize` +// `count > 2 * kBlockSize` for efficiency. +// `count >= kAlignment` for correctness. +template +static void CopySrcAlignedBlocks(char *__restrict dst, + const char *__restrict src, size_t count) { + static_assert(is_power2(kAlignment), "kAlignment must be a power of two"); + static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two"); + static_assert(kAlignment <= kBlockSize, + "kAlignment must be less or equal to block size"); + CopyBlock(dst, src); // Copy first block + + // Copy aligned blocks + const size_t ofla = offset_from_last_aligned(src); + const size_t limit = count + ofla - kBlockSize; + for (size_t offset = kAlignment; offset < limit; offset += kBlockSize) + CopyBlock(dst - ofla + offset, + assume_aligned(src - ofla + offset)); + + CopyLastBlock(dst, src, count); // Copy last block +} + +template +static void CopyDstAlignedBlocks(char *__restrict dst, + const char *__restrict src, size_t count) { + static_assert(is_power2(kAlignment), "kAlignment must be a power of two"); + static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two"); + static_assert(kAlignment <= kBlockSize, + "kAlignment must be less or equal to block size"); + CopyBlock(dst, src); // Copy first block + + // Copy aligned blocks + const size_t ofla = offset_from_last_aligned(dst); + const size_t limit = count + ofla - kBlockSize; + for (size_t offset = kAlignment; offset < limit; offset += kBlockSize) + CopyBlock(assume_aligned(dst - ofla + offset), + src - ofla + offset); + + CopyLastBlock(dst, src, count); // Copy last block +} + +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H diff --git a/libc/src/string/memory_utils/memset_utils.h b/libc/src/string/memory_utils/memset_utils.h index 4826670..7024a6c7 100644 --- a/libc/src/string/memory_utils/memset_utils.h +++ b/libc/src/string/memory_utils/memset_utils.h @@ -6,16 +6,70 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H -#include "src/string/memory_utils/elements.h" #include "src/string/memory_utils/utils.h" #include // size_t namespace __llvm_libc { +// Sets `kBlockSize` bytes starting from `src` to `value`. +template static void SetBlock(char *dst, unsigned value) { + // Theoretically the compiler is allowed to call memset here and end up with a + // recursive call, practically it doesn't happen, however this should be + // replaced with a __builtin_memset_inline once it's available in clang. + __builtin_memset(dst, value, kBlockSize); +} + +// Sets `kBlockSize` bytes from `src + count - kBlockSize` to `value`. +// Precondition: `count >= kBlockSize`. +template +static void SetLastBlock(char *dst, unsigned value, size_t count) { + SetBlock(dst + count - kBlockSize, value); +} + +// Sets `kBlockSize` bytes twice with an overlap between the two. +// +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `count >= kBlockSize && count <= kBlockSize`. +template +static void SetBlockOverlap(char *dst, unsigned value, size_t count) { + SetBlock(dst, value); + SetLastBlock(dst, value, count); +} + +// Sets `count` bytes by blocks of `kBlockSize` bytes. +// Sets at the start and end of the buffer are unaligned. +// Sets in the middle of the buffer are aligned to `kBlockSize`. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXXXXXX______________________] +// [________XXXXXXXX________________] +// [________________XXXXXXXX________] +// [_____________________XXXXXXXX___] +// +// Precondition: `count > 2 * kBlockSize` for efficiency. +// `count >= kBlockSize` for correctness. +template +static void SetAlignedBlocks(char *dst, unsigned value, size_t count) { + SetBlock(dst, value); // Set first block + + // Set aligned blocks + size_t offset = kBlockSize - offset_from_last_aligned(dst); + for (; offset + kBlockSize < count; offset += kBlockSize) + SetBlock(dst + offset, value); + + SetLastBlock(dst, value, count); // Set last block +} + // A general purpose implementation assuming cheap unaligned writes for sizes: // 1, 2, 4, 8, 16, 32 and 64 Bytes. Note that some architecture can't store 32 // or 64 Bytes at a time, the compiler will expand them as needed. @@ -52,27 +106,26 @@ inline static void GeneralPurposeMemset(char *dst, unsigned char value, if (count == 0) return; if (count == 1) - return SplatSet(dst, value); + return SetBlock<1>(dst, value); if (count == 2) - return SplatSet(dst, value); + return SetBlock<2>(dst, value); if (count == 3) - return SplatSet(dst, value); + return SetBlock<3>(dst, value); if (count == 4) - return SplatSet(dst, value); + return SetBlock<4>(dst, value); if (count <= 8) - return SplatSet>(dst, value, count); + return SetBlockOverlap<4>(dst, value, count); if (count <= 16) - return SplatSet>(dst, value, count); + return SetBlockOverlap<8>(dst, value, count); if (count <= 32) - return SplatSet>(dst, value, count); + return SetBlockOverlap<16>(dst, value, count); if (count <= 64) - return SplatSet>(dst, value, count); + return SetBlockOverlap<32>(dst, value, count); if (count <= 128) - return SplatSet>(dst, value, count); - return SplatSet::Then>>( - dst, value, count); + return SetBlockOverlap<64>(dst, value, count); + return SetAlignedBlocks<32>(dst, value, count); } } // namespace __llvm_libc -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H +#endif // LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H diff --git a/libc/src/string/x86_64/memcpy.cpp b/libc/src/string/x86_64/memcpy.cpp index 7f6e5b6..bbd8fe9 100644 --- a/libc/src/string/x86_64/memcpy.cpp +++ b/libc/src/string/x86_64/memcpy.cpp @@ -8,7 +8,7 @@ #include "src/string/memcpy.h" #include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/memcpy_utils.h" namespace __llvm_libc { @@ -29,11 +29,8 @@ constexpr size_t kRepMovsBSize = // Whether target supports AVX instructions. constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__); -#ifdef __AVX__ -using LoopBlockSize = __llvm_libc::x86::_64; -#else -using LoopBlockSize = __llvm_libc::x86::_32; -#endif +// The chunk size used for the loop copy strategy. +constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32; static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, size_t count) { @@ -64,37 +61,33 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, // with little change on the code side. static void memcpy_x86(char *__restrict dst, const char *__restrict src, size_t count) { - // Use x86 strategies (_1, _2, _3 ...) - using namespace __llvm_libc::x86; - if (kUseOnlyRepMovsb) return CopyRepMovsb(dst, src, count); if (count == 0) return; if (count == 1) - return Copy<_1>(dst, src); + return CopyBlock<1>(dst, src); if (count == 2) - return Copy<_2>(dst, src); + return CopyBlock<2>(dst, src); if (count == 3) - return Copy<_3>(dst, src); + return CopyBlock<3>(dst, src); if (count == 4) - return Copy<_4>(dst, src); + return CopyBlock<4>(dst, src); if (count < 8) - return Copy>(dst, src, count); + return CopyBlockOverlap<4>(dst, src, count); if (count < 16) - return Copy>(dst, src, count); + return CopyBlockOverlap<8>(dst, src, count); if (count < 32) - return Copy>(dst, src, count); + return CopyBlockOverlap<16>(dst, src, count); if (count < 64) - return Copy>(dst, src, count); + return CopyBlockOverlap<32>(dst, src, count); if (count < 128) - return Copy>(dst, src, count); + return CopyBlockOverlap<64>(dst, src, count); if (kHasAvx && count < 256) - return Copy>(dst, src, count); + return CopyBlockOverlap<128>(dst, src, count); if (count <= kRepMovsBSize) - return Copy::Then>>(dst, src, - count); + return CopyDstAlignedBlocks(dst, src, count); return CopyRepMovsb(dst, src, count); } diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt index e270972..068a9ec 100644 --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -3,14 +3,11 @@ add_libc_unittest( SUITE libc_string_unittests SRCS - elements_test.cpp - memory_access_test.cpp utils_test.cpp + memcpy_utils_test.cpp DEPENDS libc.src.string.memory_utils.memory_utils libc.utils.CPP.standalone_cpp - COMPILE_OPTIONS - ${LIBC_COMPILE_OPTIONS_NATIVE} ) target_compile_definitions( diff --git a/libc/test/src/string/memory_utils/elements_test.cpp b/libc/test/src/string/memory_utils/elements_test.cpp deleted file mode 100644 index 120cd6f..0000000 --- a/libc/test/src/string/memory_utils/elements_test.cpp +++ /dev/null @@ -1,103 +0,0 @@ -//===-- Unittests for memory_utils ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/string/memory_utils/elements.h" -#include "utils/CPP/Array.h" -#include "utils/UnitTest/Test.h" - -namespace __llvm_libc { - -// Registering Types -using FixedSizeTypes = testing::TypeList< -#ifdef __SSE2__ - x86::Vector128, // -#endif // __SSE2__ -#ifdef __AVX2__ - x86::Vector256, // -#endif // __AVX2__ -#if defined(__AVX512F__) and defined(__AVX512BW__) - x86::Vector512, // -#endif // defined(__AVX512F__) and defined(__AVX512BW__) - scalar::UINT8, // - scalar::UINT16, // - scalar::UINT32, // - scalar::UINT64, // - Repeated, // - Repeated, // - Repeated, // - Repeated, // - Repeated, // - Chained, // - Chained, // - builtin::_1, // - builtin::_2, // - builtin::_3, // - builtin::_4, // - builtin::_8 // - >; - -char GetRandomChar() { - static constexpr const uint64_t a = 1103515245; - static constexpr const uint64_t c = 12345; - static constexpr const uint64_t m = 1ULL << 31; - static uint64_t seed = 123456789; - seed = (a * seed + c) % m; - return seed; -} - -template using Buffer = cpp::Array; -template Buffer GetRandomBuffer() { - Buffer buffer; - for (auto ¤t : buffer) - current = GetRandomChar(); - return buffer; -} - -TYPED_TEST(LlvmLibcMemoryElements, Copy, FixedSizeTypes) { - Buffer Dst; - const auto buffer = GetRandomBuffer(); - Copy(Dst.data(), buffer.data()); - for (size_t i = 0; i < ParamType::kSize; ++i) - EXPECT_EQ(Dst[i], buffer[i]); -} - -TYPED_TEST(LlvmLibcMemoryElements, Equals, FixedSizeTypes) { - const auto buffer = GetRandomBuffer(); - EXPECT_TRUE(Equals(buffer.data(), buffer.data())); -} - -TYPED_TEST(LlvmLibcMemoryElements, ThreeWayCompare, FixedSizeTypes) { - Buffer initial; - for (auto &c : initial) - c = 5; - - // Testing equality - EXPECT_EQ(ThreeWayCompare(initial.data(), initial.data()), 0); - - // Testing all mismatching positions - for (size_t i = 0; i < ParamType::kSize; ++i) { - auto copy = initial; - ++copy[i]; // Copy is now lexicographycally greated than initial - const auto *less = initial.data(); - const auto *greater = copy.data(); - EXPECT_LT(ThreeWayCompare(less, greater), 0); - EXPECT_GT(ThreeWayCompare(greater, less), 0); - } -} - -TYPED_TEST(LlvmLibcMemoryElements, Splat, FixedSizeTypes) { - Buffer Dst; - const cpp::Array values = {char(0x00), char(0x7F), char(0xFF)}; - for (char value : values) { - SplatSet(Dst.data(), value); - for (size_t i = 0; i < ParamType::kSize; ++i) - EXPECT_EQ(Dst[i], value); - } -} - -} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/memcpy_utils_test.cpp b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp new file mode 100644 index 0000000..37529ae --- /dev/null +++ b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp @@ -0,0 +1,336 @@ +//===-- Unittests for memory_utils ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memory_utils/memcpy_utils.h" +#include "utils/CPP/Array.h" +#include "utils/UnitTest/Test.h" + +#include +#include // uintptr_t + +#ifndef LLVM_LIBC_MEMCPY_MONITOR +#error LLVM_LIBC_MEMCPY_MONITOR must be defined for this test. +#endif + +namespace __llvm_libc { + +struct Buffer { + static constexpr size_t kMaxBuffer = 1024; + char buffer[kMaxBuffer + 1]; + size_t last = 0; + + void Clear() { + last = 0; + for (size_t i = 0; i < kMaxBuffer; ++i) + buffer[i] = '0'; + buffer[kMaxBuffer] = '\0'; + } + + void Increment(const void *ptr) { + const auto offset = reinterpret_cast(ptr); + assert(offset < kMaxBuffer); + ++buffer[offset]; + if (offset > last) + last = offset; + } + + char *Finish() { + assert(last < kMaxBuffer); + buffer[last + 1] = '\0'; + return buffer; + } +}; + +struct Trace { + Buffer read; + Buffer write; + + void Add(char *__restrict dst, const char *__restrict src, size_t count) { + for (size_t i = 0; i < count; ++i) + read.Increment(src + i); + for (size_t i = 0; i < count; ++i) + write.Increment(dst + i); + } + + void Clear() { + read.Clear(); + write.Clear(); + } + + char *Read() { return read.Finish(); } + char *Write() { return write.Finish(); } +}; + +static Trace &GetTrace() { + static thread_local Trace events; + return events; +} + +extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict dst, + const char *__restrict src, + size_t count) { + GetTrace().Add(dst, src, count); +} + +char *I(uintptr_t offset) { return reinterpret_cast(offset); } + +TEST(LlvmLibcMemcpyUtilsTest, CopyTrivial) { + auto &trace = GetTrace(); + + trace.Clear(); + CopyBlock<1>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "1"); + EXPECT_STREQ(trace.Read(), "1"); + + trace.Clear(); + CopyBlock<2>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "11"); + EXPECT_STREQ(trace.Read(), "11"); + + trace.Clear(); + CopyBlock<4>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "1111"); + EXPECT_STREQ(trace.Read(), "1111"); + + trace.Clear(); + CopyBlock<8>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "11111111"); + EXPECT_STREQ(trace.Read(), "11111111"); + + trace.Clear(); + CopyBlock<16>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "1111111111111111"); + EXPECT_STREQ(trace.Read(), "1111111111111111"); + + trace.Clear(); + CopyBlock<32>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "11111111111111111111111111111111"); + EXPECT_STREQ(trace.Read(), "11111111111111111111111111111111"); + + trace.Clear(); + CopyBlock<64>(I(0), I(0)); + EXPECT_STREQ( + trace.Write(), + "1111111111111111111111111111111111111111111111111111111111111111"); + EXPECT_STREQ( + trace.Read(), + "1111111111111111111111111111111111111111111111111111111111111111"); +} + +TEST(LlvmLibcMemcpyUtilsTest, CopyOffset) { + auto &trace = GetTrace(); + + trace.Clear(); + CopyBlock<1>(I(3), I(1)); + EXPECT_STREQ(trace.Write(), "0001"); + EXPECT_STREQ(trace.Read(), "01"); + + trace.Clear(); + CopyBlock<1>(I(2), I(1)); + EXPECT_STREQ(trace.Write(), "001"); + EXPECT_STREQ(trace.Read(), "01"); +} + +TEST(LlvmLibcMemcpyUtilsTest, CopyBlockOverlap) { + auto &trace = GetTrace(); + + trace.Clear(); + CopyBlockOverlap<2>(I(0), I(0), 2); + EXPECT_STREQ(trace.Write(), "22"); + EXPECT_STREQ(trace.Read(), "22"); + + trace.Clear(); + CopyBlockOverlap<2>(I(0), I(0), 3); + EXPECT_STREQ(trace.Write(), "121"); + EXPECT_STREQ(trace.Read(), "121"); + + trace.Clear(); + CopyBlockOverlap<2>(I(0), I(0), 4); + EXPECT_STREQ(trace.Write(), "1111"); + EXPECT_STREQ(trace.Read(), "1111"); + + trace.Clear(); + CopyBlockOverlap<4>(I(2), I(1), 7); + EXPECT_STREQ(trace.Write(), "001112111"); + EXPECT_STREQ(trace.Read(), "01112111"); +} + +TEST(LlvmLibcMemcpyUtilsTest, CopySrcAlignedBlocks) { + auto &trace = GetTrace(); + // Source is aligned and multiple of alignment. + // "1111" + trace.Clear(); + CopySrcAlignedBlocks<4>(I(0), I(0), 4); + EXPECT_STREQ(trace.Write(), "2222"); + EXPECT_STREQ(trace.Read(), "2222"); + + // Source is aligned and multiple of alignment. + // "11110000" + // + "00001111" + // = "11111111" + trace.Clear(); + CopySrcAlignedBlocks<4>(I(0), I(0), 8); + EXPECT_STREQ(trace.Write(), "11111111"); + EXPECT_STREQ(trace.Read(), "11111111"); + + // Source is aligned already overlap at end. + // "1111000000000" + // + "0000111100000" + // + "0000000011110" + // + "0000000001111" + // = "1111111112221" + trace.Clear(); + CopySrcAlignedBlocks<4>(I(0), I(0), 13); + EXPECT_STREQ(trace.Write(), "1111111112221"); + EXPECT_STREQ(trace.Read(), "1111111112221"); + + // Misaligned source. + // "01111000000000" + // + "00001111000000" + // + "00000000111100" + // + "00000000001111" + // = "01112111112211" + trace.Clear(); + CopySrcAlignedBlocks<4>(I(0), I(1), 13); + EXPECT_STREQ(trace.Write(), "1112111112211"); + EXPECT_STREQ(trace.Read(), "01112111112211"); + + // Misaligned source aligned at end. + // "011110000000" + // + "000011110000" + // + "000000001111" + // = "011121111111" + trace.Clear(); + CopySrcAlignedBlocks<4>(I(0), I(1), 11); + EXPECT_STREQ(trace.Write(), "11121111111"); + EXPECT_STREQ(trace.Read(), "011121111111"); +} + +TEST(LlvmLibcMemcpyUtilsTest, CopyDstAlignedBlocks) { + auto &trace = GetTrace(); + // Destination is aligned and multiple of alignment. + // "1111" + trace.Clear(); + CopyDstAlignedBlocks<4>(I(0), I(0), 4); + EXPECT_STREQ(trace.Write(), "2222"); + EXPECT_STREQ(trace.Read(), "2222"); + + // Destination is aligned and multiple of alignment. + // "11110000" + // + "00001111" + // = "11111111" + trace.Clear(); + CopyDstAlignedBlocks<4>(I(0), I(0), 8); + EXPECT_STREQ(trace.Write(), "11111111"); + EXPECT_STREQ(trace.Read(), "11111111"); + + // Destination is aligned already overlap at end. + // "1111000000000" + // + "0000111100000" + // + "0000000011110" + // + "0000000001111" + // = "1111111112221" + trace.Clear(); + CopyDstAlignedBlocks<4>(I(0), I(0), 13); + EXPECT_STREQ(trace.Write(), "1111111112221"); + EXPECT_STREQ(trace.Read(), "1111111112221"); + + // Misaligned destination. + // "01111000000000" + // + "00001111000000" + // + "00000000111100" + // + "00000000001111" + // = "01112111112211" + trace.Clear(); + CopyDstAlignedBlocks<4>(I(1), I(0), 13); + EXPECT_STREQ(trace.Write(), "01112111112211"); + EXPECT_STREQ(trace.Read(), "1112111112211"); + + // Misaligned destination aligned at end. + // "011110000000" + // + "000011110000" + // + "000000001111" + // = "011121111111" + trace.Clear(); + CopyDstAlignedBlocks<4>(I(1), I(0), 11); + EXPECT_STREQ(trace.Write(), "011121111111"); + EXPECT_STREQ(trace.Read(), "11121111111"); +} + +TEST(LlvmLibcMemcpyUtilsTest, CopyAlignedBlocksWithAlignment) { + auto &trace = GetTrace(); + // Source is aligned and multiple of alignment. + // "11111111" + trace.Clear(); + CopySrcAlignedBlocks<8, 4>(I(0), I(0), 8); + EXPECT_STREQ(trace.Write(), "22221111"); + EXPECT_STREQ(trace.Read(), "22221111"); + + // Destination is aligned and multiple of alignment. + // "11111111" + trace.Clear(); + CopyDstAlignedBlocks<8, 4>(I(0), I(0), 8); + EXPECT_STREQ(trace.Write(), "22221111"); + EXPECT_STREQ(trace.Read(), "22221111"); + + // Source is aligned and multiple of alignment. + // "111111111" + trace.Clear(); + CopySrcAlignedBlocks<8, 4>(I(0), I(0), 9); + EXPECT_STREQ(trace.Write(), "122211111"); + EXPECT_STREQ(trace.Read(), "122211111"); + + // Destination is aligned and multiple of alignment. + // "111111111" + trace.Clear(); + CopyDstAlignedBlocks<8, 4>(I(0), I(0), 9); + EXPECT_STREQ(trace.Write(), "122211111"); + EXPECT_STREQ(trace.Read(), "122211111"); +} + +TEST(LlvmLibcMemcpyUtilsTest, CopyAlignedBlocksMaxReloads) { + auto &trace = GetTrace(); + for (size_t alignment = 0; alignment < 32; ++alignment) { + for (size_t count = 64; count < 768; ++count) { + trace.Clear(); + // We should never reload more than twice when copying from count = 2x32. + CopySrcAlignedBlocks<32>(I(alignment), I(0), count); + const char *const written = trace.Write(); + // First bytes are untouched. + for (size_t i = 0; i < alignment; ++i) + EXPECT_EQ(written[i], '0'); + // Next bytes are loaded once or twice but no more. + for (size_t i = alignment; i < count; ++i) { + EXPECT_GE(written[i], '1'); + EXPECT_LE(written[i], '2'); + } + } + } +} + +TEST(LlvmLibcMemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) { + auto &trace = GetTrace(); + for (size_t alignment = 0; alignment < 32; ++alignment) { + for (size_t count = 64; count < 768; ++count) { + trace.Clear(); + // We should never reload more than twice when copying from count = 2x32. + CopySrcAlignedBlocks<32, 16>(I(alignment), I(0), count); + const char *const written = trace.Write(); + // First bytes are untouched. + for (size_t i = 0; i < alignment; ++i) + EXPECT_EQ(written[i], '0'); + // Next bytes are loaded once or twice but no more. + for (size_t i = alignment; i < count; ++i) { + EXPECT_GE(written[i], '1'); + EXPECT_LE(written[i], '2'); + } + } + } +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/memory_access_test.cpp b/libc/test/src/string/memory_utils/memory_access_test.cpp deleted file mode 100644 index 690a0a5..0000000 --- a/libc/test/src/string/memory_utils/memory_access_test.cpp +++ /dev/null @@ -1,231 +0,0 @@ -//===-- Unittests for memory_utils ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define LLVM_LIBC_UNITTEST_OBSERVE 1 - -#include "src/string/memory_utils/elements.h" -#include "utils/CPP/Array.h" -#include "utils/CPP/ArrayRef.h" -#include "utils/UnitTest/Test.h" - -#include -#include - -namespace __llvm_libc { - -static constexpr const size_t kMaxBuffer = 32; - -struct BufferAccess : cpp::Array { - BufferAccess() { Reset(); } - void Reset() { - for (auto &value : *this) - value = '0'; - this->operator[](kMaxBuffer) = '\0'; - } - void Touch(ptrdiff_t offset, size_t size) { - if (offset < 0) - return; - for (size_t i = 0; i < size; ++i) - ++(*this)[offset + i]; - } - operator const char *() const { return this->data(); } -}; - -struct Buffer { - ptrdiff_t Offset(const char *ptr) const { - const bool contained = ptr >= data.begin() && ptr < data.end(); - return contained ? ptr - data.begin() : -1; - } - void Reset() { - reads.Reset(); - writes.Reset(); - } - cpp::Array data; - BufferAccess __attribute__((aligned(64))) reads; - BufferAccess __attribute__((aligned(64))) writes; -}; - -struct MemoryAccessObserver { - void ObserveRead(const char *ptr, size_t size) { - Buffer1.reads.Touch(Buffer1.Offset(ptr), size); - Buffer2.reads.Touch(Buffer2.Offset(ptr), size); - } - - void ObserveWrite(const char *ptr, size_t size) { - Buffer1.writes.Touch(Buffer1.Offset(ptr), size); - Buffer2.writes.Touch(Buffer2.Offset(ptr), size); - } - - void Reset() { - Buffer1.Reset(); - Buffer2.Reset(); - } - - Buffer Buffer1; - Buffer Buffer2; -}; - -MemoryAccessObserver Observer; - -template struct TestingElement { - static constexpr size_t kSize = Size; - - static void Copy(char *__restrict dst, const char *__restrict src) { - Observer.ObserveRead(src, kSize); - Observer.ObserveWrite(dst, kSize); - } - - static bool Equals(const char *lhs, const char *rhs) { - Observer.ObserveRead(lhs, kSize); - Observer.ObserveRead(rhs, kSize); - return true; - } - - static int ThreeWayCompare(const char *lhs, const char *rhs) { - Observer.ObserveRead(lhs, kSize); - Observer.ObserveRead(rhs, kSize); - return 0; - } - - static void SplatSet(char *dst, const unsigned char value) { - Observer.ObserveWrite(dst, kSize); - } -}; - -using Types = testing::TypeList< - TestingElement<1>, // 1 Byte - TestingElement<2>, // 2 Bytes - TestingElement<4>, // 4 Bytes - Repeated, 3>, // 6 Bytes - Chained, TestingElement<2>, TestingElement<1>> // 7 Bytes - >; - -struct LlvmLibcTestAccessBase : public testing::Test { - - template - void checkOperations(const BufferAccess &expected) { - static const BufferAccess untouched; - - Observer.Reset(); - HigherOrder::Copy(dst_ptr() + Offset, src_ptr() + Offset, Size); - ASSERT_STREQ(src().writes, untouched); - ASSERT_STREQ(dst().reads, untouched); - ASSERT_STREQ(src().reads, expected); - ASSERT_STREQ(dst().writes, expected); - Observer.Reset(); - HigherOrder::Equals(lhs_ptr() + Offset, rhs_ptr() + Offset, Size); - ASSERT_STREQ(lhs().writes, untouched); - ASSERT_STREQ(rhs().writes, untouched); - ASSERT_STREQ(lhs().reads, expected); - ASSERT_STREQ(rhs().reads, expected); - Observer.Reset(); - HigherOrder::ThreeWayCompare(lhs_ptr() + Offset, rhs_ptr() + Offset, Size); - ASSERT_STREQ(lhs().writes, untouched); - ASSERT_STREQ(rhs().writes, untouched); - ASSERT_STREQ(lhs().reads, expected); - ASSERT_STREQ(rhs().reads, expected); - Observer.Reset(); - HigherOrder::SplatSet(dst_ptr() + Offset, 5, Size); - ASSERT_STREQ(src().reads, untouched); - ASSERT_STREQ(src().writes, untouched); - ASSERT_STREQ(dst().reads, untouched); - ASSERT_STREQ(dst().writes, expected); - } - - void checkMaxAccess(const BufferAccess &expected, int max) { - for (size_t i = 0; i < kMaxBuffer; ++i) { - int value = (int)expected[i] - '0'; - if (value < 0 || value > max) { - printf("expected no more than %d access, was '%s'\n", max, - (const char *)expected); - ASSERT_LE(value, max); - } - } - } - -private: - const Buffer &lhs() const { return Observer.Buffer1; } - const Buffer &rhs() const { return Observer.Buffer2; } - const Buffer &src() const { return Observer.Buffer2; } - const Buffer &dst() const { return Observer.Buffer1; } - Buffer &dst() { return Observer.Buffer1; } - - char *dst_ptr() { return dst().data.begin(); } - const char *src_ptr() { return src().data.begin(); } - const char *lhs_ptr() { return lhs().data.begin(); } - const char *rhs_ptr() { return rhs().data.begin(); } -}; - -template -struct LlvmLibcTestAccessTail : public LlvmLibcTestAccessBase { - - void TearDown() override { - static constexpr size_t Size = 10; - - BufferAccess expected; - expected.Touch(Size - ParamType::kSize, ParamType::kSize); - - checkMaxAccess(expected, 1); - checkOperations, Size>(expected); - } -}; -TYPED_TEST_F(LlvmLibcTestAccessTail, Operations, Types) {} - -template -struct LlvmLibcTestAccessHeadTail : public LlvmLibcTestAccessBase { - void TearDown() override { - static constexpr size_t Size = 10; - - BufferAccess expected; - expected.Touch(0, ParamType::kSize); - expected.Touch(Size - ParamType::kSize, ParamType::kSize); - - checkMaxAccess(expected, 2); - checkOperations, Size>(expected); - } -}; -TYPED_TEST_F(LlvmLibcTestAccessHeadTail, Operations, Types) {} - -template -struct LlvmLibcTestAccessLoop : public LlvmLibcTestAccessBase { - void TearDown() override { - static constexpr size_t Size = 20; - - BufferAccess expected; - for (size_t i = 0; i < Size - ParamType::kSize; i += ParamType::kSize) - expected.Touch(i, ParamType::kSize); - expected.Touch(Size - ParamType::kSize, ParamType::kSize); - - checkMaxAccess(expected, 2); - checkOperations, Size>(expected); - } -}; -TYPED_TEST_F(LlvmLibcTestAccessLoop, Operations, Types) {} - -template -struct LlvmLibcTestAccessAlignedAccess : public LlvmLibcTestAccessBase { - void TearDown() override { - static constexpr size_t Size = 10; - static constexpr size_t Offset = 2; - using AlignmentT = TestingElement<4>; - - BufferAccess expected; - expected.Touch(Offset, AlignmentT::kSize); - expected.Touch(AlignmentT::kSize, ParamType::kSize); - expected.Touch(Offset + Size - ParamType::kSize, ParamType::kSize); - - checkMaxAccess(expected, 3); - checkOperations::Then>, Size, - Offset>(expected); - checkOperations::Then>, Size, - Offset>(expected); - } -}; -TYPED_TEST_F(LlvmLibcTestAccessAlignedAccess, Operations, Types) {} - -} // namespace __llvm_libc -- 2.7.4