From 7f5d7bc827a5fda1c803342af536f81986e2635a Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 22 Jun 2022 11:34:51 +0000 Subject: [PATCH] [libc][mem*] Introduce Algorithms for new mem framework This patch is a subpart of D125768 intented to make the review easier. This patch introduces the same algorithms as in `libc/src/string/memory_utils/elements.h` but using the new API. Differential Revision: https://reviews.llvm.org/D128335 --- libc/src/string/memory_utils/algorithm.h | 463 +++++++++++++++++ libc/src/string/memory_utils/sized_op.h | 3 + libc/test/src/string/memory_utils/CMakeLists.txt | 1 + .../src/string/memory_utils/algorithm_test.cpp | 565 +++++++++++++++++++++ 4 files changed, 1032 insertions(+) create mode 100644 libc/src/string/memory_utils/algorithm.h create mode 100644 libc/test/src/string/memory_utils/algorithm_test.cpp diff --git a/libc/src/string/memory_utils/algorithm.h b/libc/src/string/memory_utils/algorithm.h new file mode 100644 index 0000000..82c09fc7 --- /dev/null +++ b/libc/src/string/memory_utils/algorithm.h @@ -0,0 +1,463 @@ +//===-- Algorithms to compose sized memory operations ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Higher order primitives that build upon the SizedOpT facility. +// They constitute the basic blocks for composing memory functions. +// This file defines the following operations: +// - Skip +// - Tail +// - HeadTail +// - Loop +// - Align +// +// See each class for documentation. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H + +#include "src/string/memory_utils/address.h" // Address +#include "src/string/memory_utils/utils.h" // offset_to_next_aligned + +#include // ptrdiff_t + +namespace __llvm_libc { + +// We are not yet allowed to use asserts in low level memory operations as +// assert itself could depend on them. +// We define this empty macro so we can enable them as soon as possible and keep +// track of invariants. +#define LIBC_ASSERT(COND) + +// An operation that allows to skip the specified amount of bytes. +template struct Skip { + template struct Then { + template + static inline void set(DstAddrT dst, ubyte value) { + static_assert(NextT::IS_FIXED_SIZE); + NextT::set(offsetAddr(dst), value); + } + + template + static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { + static_assert(NextT::IS_FIXED_SIZE); + return NextT::isDifferent(offsetAddr(src1), + offsetAddr(src2)); + } + + template + static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { + static_assert(NextT::IS_FIXED_SIZE); + return NextT::threeWayCmp(offsetAddr(src1), + offsetAddr(src2)); + } + + template + static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + static_assert(NextT::IS_RUNTIME_SIZE); + return NextT::threeWayCmp(offsetAddr(src1), + offsetAddr(src2), runtime_size - Bytes); + } + }; +}; + +// Compute the address of a tail operation. +// Because of the runtime size, we loose the alignment information. +template +static auto tailAddr(AddrT addr, size_t runtime_size) { + static_assert(IsAddressType::Value); + return offsetAddrAssumeAligned<1>(addr, runtime_size - Size); +} + +// Perform the operation on the last 'Size' bytes of the buffer. +// +// e.g. with +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [________XXXXXXXX___] +// +// Precondition: `runtime_size >= Size`. +template struct Tail { + static_assert(SizedOpT::IS_FIXED_SIZE); + static constexpr bool IS_RUNTIME_SIZE = true; + static constexpr size_t SIZE = SizedOpT::SIZE; + + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOpT::copy(tailAddr(dst, runtime_size), + tailAddr(src, runtime_size)); + } + + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOpT::move(tailAddr(dst, runtime_size), + tailAddr(src, runtime_size)); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + SizedOpT::set(tailAddr(dst, runtime_size), value); + } + + template + static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + return SizedOpT::isDifferent(tailAddr(src1, runtime_size), + tailAddr(src2, runtime_size)); + } + + template + static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + return SizedOpT::threeWayCmp(tailAddr(src1, runtime_size), + tailAddr(src2, runtime_size)); + } +}; + +// Perform the operation on the first and the last `SizedOpT::Size` bytes of the +// buffer. This is useful for overlapping operations. +// +// e.g. with +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`. +template struct HeadTail { + static_assert(SizedOpT::IS_FIXED_SIZE); + static constexpr bool IS_RUNTIME_SIZE = true; + + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); + SizedOpT::copy(dst, src); + Tail::copy(dst, src, runtime_size); + } + + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); + static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE; + // The load and store operations can be performed in any order as long as + // they are not interleaved. More investigations are needed to determine the + // best order. + auto head = SizedOpT::load(src); + auto tail = SizedOpT::load(tailAddr(src, runtime_size)); + SizedOpT::store(tailAddr(dst, runtime_size), tail); + SizedOpT::store(dst, head); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); + SizedOpT::set(dst, value); + Tail::set(dst, value, runtime_size); + } + + template + static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); + // Two strategies can be applied here: + // 1. Compute head and tail and compose them with a bitwise or operation. + // 2. Stop early if head is different. + // We chose the later because HeadTail operations are typically performed + // with sizes ranging from 4 to 256 bytes. The cost of the loads is then + // significantly larger than the cost of the branch. + if (const uint64_t res = SizedOpT::isDifferent(src1, src2)) + return res; + return Tail::isDifferent(src1, src2, runtime_size); + } + + template + static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + LIBC_ASSERT(runtime_size >= SizedOpT::SIZE && + runtime_size <= 2 * SizedOpT::SIZE); + if (const int32_t res = SizedOpT::threeWayCmp(src1, src2)) + return res; + return Tail::threeWayCmp(src1, src2, runtime_size); + } +}; + +// Simple loop ending with a Tail operation. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXXXXXX_______________________] +// [__________XXXXXXXX_______________] +// [__________________XXXXXXXX_______] +// [______________________XXXXXXXX___] +// +// Precondition: +// - runtime_size >= Size +template struct Loop { + static_assert(SizedOpT::IS_FIXED_SIZE); + static constexpr bool IS_RUNTIME_SIZE = true; + static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE; + + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + size_t offset = 0; + do { + SizedOpT::copy(offsetAddrMultiplesOf(dst, offset), + offsetAddrMultiplesOf(src, offset)); + offset += BLOCK_SIZE; + } while (offset < runtime_size - BLOCK_SIZE); + Tail::copy(dst, src, runtime_size); + } + + // Move forward suitable when dst < src. We load the tail bytes before + // handling the loop. + // + // e.g. Moving two bytes + // [ | | | | |] + // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] + // [_________________________LLLLLLLL___] + // [___LLLLLLLL_________________________] + // [_SSSSSSSS___________________________] + // [___________LLLLLLLL_________________] + // [_________SSSSSSSS___________________] + // [___________________LLLLLLLL_________] + // [_________________SSSSSSSS___________] + // [_______________________SSSSSSSS_____] + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + const auto tail_value = + SizedOpT::load(tailAddr(src, runtime_size)); + size_t offset = 0; + do { + SizedOpT::move(offsetAddrMultiplesOf(dst, offset), + offsetAddrMultiplesOf(src, offset)); + offset += BLOCK_SIZE; + } while (offset < runtime_size - BLOCK_SIZE); + SizedOpT::store(tailAddr(dst, runtime_size), tail_value); + } + + // Move backward suitable when dst > src. We load the head bytes before + // handling the loop. + // + // e.g. Moving two bytes + // [ | | | | |] + // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] + // [___LLLLLLLL_________________________] + // [_________________________LLLLLLLL___] + // [___________________________SSSSSSSS_] + // [_________________LLLLLLLL___________] + // [___________________SSSSSSSS_________] + // [_________LLLLLLLL___________________] + // [___________SSSSSSSS_________________] + // [_____SSSSSSSS_______________________] + template + static inline void move_backward(DstAddrT dst, SrcAddrT src, + size_t runtime_size) { + const auto head_value = SizedOpT::load(src); + ptrdiff_t offset = runtime_size - BLOCK_SIZE; + do { + SizedOpT::move(offsetAddrMultiplesOf(dst, offset), + offsetAddrMultiplesOf(src, offset)); + offset -= BLOCK_SIZE; + } while (offset >= 0); + SizedOpT::store(dst, head_value); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + size_t offset = 0; + do { + SizedOpT::set(offsetAddrMultiplesOf(dst, offset), value); + offset += BLOCK_SIZE; + } while (offset < runtime_size - BLOCK_SIZE); + Tail::set(dst, value, runtime_size); + } + + template + static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + size_t offset = 0; + do { + if (uint64_t res = SizedOpT::isDifferent( + offsetAddrMultiplesOf(src1, offset), + offsetAddrMultiplesOf(src2, offset))) + return res; + offset += BLOCK_SIZE; + } while (offset < runtime_size - BLOCK_SIZE); + return Tail::isDifferent(src1, src2, runtime_size); + } + + template + static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + size_t offset = 0; + do { + if (int32_t res = SizedOpT::threeWayCmp( + offsetAddrMultiplesOf(src1, offset), + offsetAddrMultiplesOf(src2, offset))) + return res; + offset += BLOCK_SIZE; + } while (offset < runtime_size - BLOCK_SIZE); + return Tail::threeWayCmp(src1, src2, runtime_size); + } +}; + +// Aligns using a statically-sized operation, then calls the subsequent NextT +// operation. +// +// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: +// Align<_16, Arg::Dst>::Then>::copy(dst, src, runtime_size); +enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; +template struct Align { + static_assert(SizedOpT::IS_FIXED_SIZE); + + template struct Then { + static_assert(NextT::IS_RUNTIME_SIZE); + + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOpT::copy(dst, src); + auto aligned = align(dst, src, runtime_size); + NextT::copy(aligned.arg1, aligned.arg2, aligned.size); + } + + // Move forward suitable when dst < src. The alignment is performed with + // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. + // + // e.g. Moving two bytes and making sure src is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] + // [____LLLLLLLL_____________________] + // [___________LLLLLLLL______________] + // [_SSSSSSSS________________________] + // [________SSSSSSSS_________________] + // + // e.g. Moving two bytes and making sure dst is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] + // [____LLLLLLLL_____________________] + // [______LLLLLLLL___________________] + // [_SSSSSSSS________________________] + // [___SSSSSSSS______________________] + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + auto aligned_after_begin = align(dst, src, runtime_size); + // We move pointers forward by Size so we can perform HeadTail. + auto aligned = aligned_after_begin.stepForward(); + HeadTail::move(dst, src, runtime_size - aligned.size); + NextT::move(aligned.arg1, aligned.arg2, aligned.size); + } + + // Move backward suitable when dst > src. The alignment is performed with + // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. + // + // e.g. Moving two bytes backward and making sure src is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] + // [ _________________LLLLLLLL_______] + // [ ___________________LLLLLLLL_____] + // [____________________SSSSSSSS_____] + // [______________________SSSSSSSS___] + // + // e.g. Moving two bytes and making sure dst is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] + // [ _______________LLLLLLLL_________] + // [ ___________________LLLLLLLL_____] + // [__________________SSSSSSSS_______] + // [______________________SSSSSSSS___] + template + static inline void move_backward(DstAddrT dst, SrcAddrT src, + size_t runtime_size) { + const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size); + const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size); + auto aligned_after_end = align(dst_end, src_end, 0); + // We move pointers back by 2 x Size so we can perform HeadTail. + auto aligned = aligned_after_end.stepBack().stepBack(); + HeadTail::move(aligned.arg1, aligned.arg2, aligned.size); + NextT::move_backward(dst, src, runtime_size - aligned.size); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + SizedOpT::set(dst, value); + DstAddrT _(nullptr); + auto aligned = align(dst, _, runtime_size); + NextT::set(aligned.arg1, value, aligned.size); + } + + template + static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + if (const uint64_t res = SizedOpT::isDifferent(src1, src2)) + return res; + auto aligned = align(src1, src2, runtime_size); + return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size); + } + + template + static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + if (const int32_t res = SizedOpT::threeWayCmp(src1, src2)) + return res; + auto aligned = align(src1, src2, runtime_size); + return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size); + } + }; + +private: + static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE; + static_assert(ALIGN_OP_SIZE > 1); + + template struct Aligned { + Arg1AddrT arg1; + Arg2AddrT arg2; + size_t size; + + Aligned stepForward() const { + return Aligned{offsetAddrMultiplesOf(arg1, ALIGN_OP_SIZE), + offsetAddrMultiplesOf(arg2, ALIGN_OP_SIZE), + size - ALIGN_OP_SIZE}; + } + + Aligned stepBack() const { + return Aligned{offsetAddrMultiplesOf(arg1, -ALIGN_OP_SIZE), + offsetAddrMultiplesOf(arg2, -ALIGN_OP_SIZE), + size + ALIGN_OP_SIZE}; + } + }; + + template + static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) { + return Aligned{arg1, arg2, size}; + } + + template + static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) { + static_assert(IsAddressType::Value); + static_assert(IsAddressType::Value); + if constexpr (AlignOn == Arg::_1) { + auto offset = offset_to_next_aligned(arg1.ptr_); + return makeAligned(offsetAddrAssumeAligned(arg1, offset), + offsetAddrAssumeAligned<1>(arg2, offset), + runtime_size - offset); + } else if constexpr (AlignOn == Arg::_2) { + auto offset = offset_to_next_aligned(arg2.ptr_); + return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset), + offsetAddrAssumeAligned(arg2, offset), + runtime_size - offset); + } else { + DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2"); + } + } +}; + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h index 12ace7c..d58d393 100644 --- a/libc/src/string/memory_utils/sized_op.h +++ b/libc/src/string/memory_utils/sized_op.h @@ -33,6 +33,9 @@ namespace __llvm_libc { template struct SizedOp { static constexpr size_t SIZE = Size; + // Define instantiations of SizedOp as a fixed size operation. + // i.e. an operation that is composable by types in algorithm.h + static constexpr bool IS_FIXED_SIZE = true; private: static_assert(Backend::IS_BACKEND_TYPE); diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt index 83926e4..9b777a7 100644 --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -4,6 +4,7 @@ add_libc_unittest( libc_string_unittests SRCS address_test.cpp + algorithm_test.cpp backend_test.cpp elements_test.cpp memory_access_test.cpp diff --git a/libc/test/src/string/memory_utils/algorithm_test.cpp b/libc/test/src/string/memory_utils/algorithm_test.cpp new file mode 100644 index 0000000..e4fd3e3 --- /dev/null +++ b/libc/test/src/string/memory_utils/algorithm_test.cpp @@ -0,0 +1,565 @@ + +#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 0 + +#include "utils/UnitTest/Test.h" +#include +#include +#include + +#include + +namespace __llvm_libc { + +struct alignas(64) Buffer : cpp::Array { + bool contains(const char *ptr) const { + return ptr >= data() && ptr < (data() + size()); + } + size_t getOffset(const char *ptr) const { return ptr - data(); } + void fill(char c) { + for (auto itr = begin(); itr != end(); ++itr) + *itr = c; + } +}; + +static Buffer buffer1; +static Buffer buffer2; +static std::ostringstream LOG; + +struct TestBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template static void log(const char *Action, const char *ptr) { + LOG << Action << "<" << sizeof(T) << "> "; + if (buffer1.contains(ptr)) + LOG << "a[" << buffer1.getOffset(ptr) << "]"; + else if (buffer2.contains(ptr)) + LOG << "b[" << buffer2.getOffset(ptr) << "]"; + LOG << "\n"; + } + + template + static T load(const T *src) { + log((AS == Aligned::YES ? "LdA" : "LdU"), + reinterpret_cast(src)); + return Scalar64BitBackend::load(src); + } + + template + static void store(T *dst, T value) { + log((AS == Aligned::YES ? "StA" : "StU"), + reinterpret_cast(dst)); + Scalar64BitBackend::store(dst, value); + } + + template static inline T splat(ubyte value) { + LOG << "Splat<" << sizeof(T) << "> " << (unsigned)value << '\n'; + return Scalar64BitBackend::splat(value); + } + + template static inline uint64_t notEquals(T v1, T v2) { + LOG << "Neq<" << sizeof(T) << ">\n"; + return Scalar64BitBackend::notEquals(v1, v2); + } + + template static inline int32_t threeWayCmp(T v1, T v2) { + LOG << "Diff<" << sizeof(T) << ">\n"; + return Scalar64BitBackend::threeWayCmp(v1, v2); + } + + template + using getNextType = Scalar64BitBackend::getNextType; +}; + +struct LlvmLibcAlgorithm : public testing::Test { + void SetUp() override { + LOG = std::ostringstream(); + LOG << '\n'; + } + + void fillEqual() { + buffer1.fill('a'); + buffer2.fill('a'); + } + + void fillDifferent() { + buffer1.fill('a'); + buffer2.fill('b'); + } + + const char *getTrace() { + trace_ = LOG.str(); + return trace_.c_str(); + } + + const char *stripComments(const char *expected) { + expected_.clear(); + std::stringstream ss(expected); + std::string line; + while (std::getline(ss, line, '\n')) { + const auto pos = line.find('#'); + if (pos == std::string::npos) { + expected_ += line; + } else { + auto log = line.substr(0, pos); + while (!log.empty() && std::isspace(log.back())) + log.pop_back(); + expected_ += log; + } + expected_ += '\n'; + } + return expected_.c_str(); + } + + template SrcAddr buf1(size_t offset = 0) const { + return buffer1.data() + offset; + } + template SrcAddr buf2(size_t offset = 0) const { + return buffer2.data() + offset; + } + template DstAddr dst(size_t offset = 0) const { + return buffer1.data() + offset; + } + template SrcAddr src(size_t offset = 0) const { + return buffer2.data() + offset; + } + +private: + std::string trace_; + std::string expected_; +}; + +using _8 = SizedOp; + +/////////////////////////////////////////////////////////////////////////////// +//// Testing fixed fized forward operations +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Copy + +TEST_F(LlvmLibcAlgorithm, copy_1) { + SizedOp::copy(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> b[0] +StU<1> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, copy_15) { + SizedOp::copy(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] +LdU<4> b[8] +StU<4> a[8] +LdU<2> b[12] +StU<2> a[12] +LdU<1> b[14] +StU<1> a[14] +)")); +} + +TEST_F(LlvmLibcAlgorithm, copy_16) { + SizedOp::copy(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] +LdU<8> b[8] +StU<8> a[8] +)")); +} +/////////////////////////////////////////////////////////////////////////////// +// Move + +TEST_F(LlvmLibcAlgorithm, move_1) { + SizedOp::move(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> b[0] +StU<1> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, move_15) { + SizedOp::move(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +LdU<4> b[8] +LdU<2> b[12] +LdU<1> b[14] +StU<1> a[14] +StU<2> a[12] +StU<4> a[8] +StU<8> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, move_16) { + SizedOp::move(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +LdU<8> b[8] +StU<8> a[8] +StU<8> a[0] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +// set + +TEST_F(LlvmLibcAlgorithm, set_1) { + SizedOp::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<1> 42 +StU<1> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, set_15) { + SizedOp::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<8> 42 +StU<8> a[0] +Splat<4> 42 +StU<4> a[8] +Splat<2> 42 +StU<2> a[12] +Splat<1> 42 +StU<1> a[14] +)")); +} + +TEST_F(LlvmLibcAlgorithm, set_16) { + SizedOp::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<8> 42 +StU<8> a[0] +Splat<8> 42 +StU<8> a[8] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +// different + +TEST_F(LlvmLibcAlgorithm, different_1) { + fillEqual(); + SizedOp::isDifferent(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> a[0] +LdU<1> b[0] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, different_15) { + fillEqual(); + SizedOp::isDifferent(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<4> a[8] +LdU<4> b[8] +Neq<4> +LdU<2> a[12] +LdU<2> b[12] +Neq<2> +LdU<1> a[14] +LdU<1> b[14] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, different_15_no_shortcircuit) { + fillDifferent(); + SizedOp::isDifferent(buf1(), buf2()); + // If buffer compare isDifferent we continue to aggregate. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<4> a[8] +LdU<4> b[8] +Neq<4> +LdU<2> a[12] +LdU<2> b[12] +Neq<2> +LdU<1> a[14] +LdU<1> b[14] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, different_16) { + fillEqual(); + SizedOp::isDifferent(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<8> a[8] +LdU<8> b[8] +Neq<8> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +// three_way_cmp + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_1) { + fillEqual(); + SizedOp::threeWayCmp(buf1(), buf2()); + // Buffer compare equal, returning 0 and no call to Diff. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> a[0] +LdU<1> b[0] +Diff<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_15) { + fillEqual(); + SizedOp::threeWayCmp(buf1(), buf2()); + // Buffer compare equal, returning 0 and no call to Diff. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Diff<8> +LdU<4> a[8] +LdU<4> b[8] +Diff<4> +LdU<2> a[12] +LdU<2> b[12] +Diff<2> +LdU<1> a[14] +LdU<1> b[14] +Diff<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_neq_15_shortcircuit) { + fillDifferent(); + SizedOp::threeWayCmp(buf1(), buf2()); + // If buffer compare isDifferent we stop early. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Diff<8> +)")); +} + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_16) { + fillEqual(); + SizedOp::threeWayCmp(buf1(), buf2()); + // Buffer compare equal, returning 0 and no call to Diff. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Diff<8> +LdU<8> a[8] +LdU<8> b[8] +Diff<8> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing skip operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, skip_and_set) { + Skip<11>::Then>::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<1> 42 +StU<1> a[11] +)")); +} + +TEST_F(LlvmLibcAlgorithm, skip_and_different_1) { + Skip<11>::Then>::isDifferent(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> a[11] +LdU<1> b[11] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, skip_and_three_way_cmp_8) { + Skip<11>::Then>::threeWayCmp(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> a[11] +LdU<1> b[11] +Diff<1> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing tail operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, tail_copy_8) { + Tail<_8>::copy(dst(), src(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[8] +StU<8> a[8] +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_move_8) { + Tail<_8>::move(dst(), src(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[8] +StU<8> a[8] +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_set_8) { + Tail<_8>::set(dst(), ubyte{42}, 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<8> 42 +StU<8> a[8] +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_different_8) { + fillEqual(); + Tail<_8>::isDifferent(buf1(), buf2(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[8] +LdU<8> b[8] +Neq<8> +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_three_way_cmp_8) { + fillEqual(); + Tail<_8>::threeWayCmp(buf1(), buf2(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[8] +LdU<8> b[8] +Diff<8> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing HeadTail operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, head_tail_copy_8) { + HeadTail<_8>::copy(dst(), src(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] +LdU<8> b[8] +StU<8> a[8] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing Loop operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, loop_copy_one_iteration_and_tail) { + Loop<_8>::copy(dst(), src(), 10); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # covers 0-7 +LdU<8> b[2] +StU<8> a[2] # covers 2-9 +)")); +} + +TEST_F(LlvmLibcAlgorithm, loop_copy_two_iteration_and_tail) { + Loop<_8>::copy(dst(), src(), 17); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # covers 0-7 +LdU<8> b[8] +StU<8> a[8] # covers 8-15 +LdU<8> b[9] +StU<8> a[9] # covers 9-16 +)")); +} + +TEST_F(LlvmLibcAlgorithm, loop_with_one_turn_is_inefficient_but_ok) { + Loop<_8>::copy(dst(), src(), 8); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # first iteration covers 0-7 +LdU<8> b[0] # tail also covers 0-7 but since Loop is supposed to be used +StU<8> a[0] # with a sufficient number of iterations the tail cost is amortised +)")); +} + +TEST_F(LlvmLibcAlgorithm, loop_with_round_number_of_turn) { + Loop<_8>::copy(dst(), src(), 24); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # first iteration covers 0-7 +LdU<8> b[8] +StU<8> a[8] # second iteration covers 8-15 +LdU<8> b[16] +StU<8> a[16] +)")); +} + +TEST_F(LlvmLibcAlgorithm, dst_aligned_loop) { + Loop<_8>::copy(dst<16>(), src(), 23); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StA<8> a[0] # store is aligned on 16B +LdU<8> b[8] +StA<8> a[8] # subsequent stores are aligned +LdU<8> b[15] +StU<8> a[15] # Tail is always unaligned +)")); +} + +TEST_F(LlvmLibcAlgorithm, aligned_loop) { + Loop<_8>::copy(dst<16>(), src<8>(), 23); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdA<8> b[0] # load is aligned on 8B +StA<8> a[0] # store is aligned on 16B +LdA<8> b[8] # subsequent loads are aligned +StA<8> a[8] # subsequent stores are aligned +LdU<8> b[15] # Tail is always unaligned +StU<8> a[15] # Tail is always unaligned +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing Align operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, align_dst_copy_8) { + Align<_8, Arg::Dst>::Then>::copy(dst(2), src(3), 31); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[3] +StU<8> a[2] # First store covers unaligned bytes +LdU<8> b[9] +StA<8> a[8] # First aligned store +LdU<8> b[17] +StA<8> a[16] # Subsequent stores are aligned +LdU<8> b[25] +StA<8> a[24] # Subsequent stores are aligned +LdU<8> b[26] +StU<8> a[25] # Last store covers remaining bytes +)")); +} + +TEST_F(LlvmLibcAlgorithm, align_src_copy_8) { + Align<_8, Arg::Src>::Then>::copy(dst(2), src(3), 31); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[3] # First load covers unaligned bytes +StU<8> a[2] +LdA<8> b[8] # First aligned load +StU<8> a[7] +LdA<8> b[16] # Subsequent loads are aligned +StU<8> a[15] +LdA<8> b[24] # Subsequent loads are aligned +StU<8> a[23] +LdU<8> b[26] # Last load covers remaining bytes +StU<8> a[25] +)")); +} + +} // namespace __llvm_libc -- 2.7.4