From: Andre Vieira Date: Thu, 23 Sep 2021 08:19:47 +0000 (+0100) Subject: [libc] Add optimized memset for AArch64 X-Git-Tag: upstream/15.0.7~30757 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8b87c3d5736730cda1d8856098621029b759f3d1;p=platform%2Fupstream%2Fllvm.git [libc] Add optimized memset for AArch64 Differential Revision: https://reviews.llvm.org/D107848 --- diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index b652bef..aa22fa0 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -341,7 +341,7 @@ endif() function(add_memset memset_name) add_implementation(memset ${memset_name} - SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp + SRCS ${MEMSET_SRC} HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h DEPENDS .memory_utils.memory_utils @@ -353,13 +353,20 @@ function(add_memset memset_name) endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) + set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp) add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) +elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) + set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memset.cpp) + add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} + COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") + add_memset(memset COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") else() + set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) endif() diff --git a/libc/src/string/aarch64/memset.cpp b/libc/src/string/aarch64/memset.cpp new file mode 100644 index 0000000..fa66ffe --- /dev/null +++ b/libc/src/string/aarch64/memset.cpp @@ -0,0 +1,49 @@ +//===-- Implementation of memset ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memset.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memset_utils.h" + +namespace __llvm_libc { + +using namespace __llvm_libc::aarch64_memset; + +inline static void AArch64Memset(char *dst, int value, size_t count) { + if (count == 0) + return; + if (count <= 3) { + SplatSet<_1>(dst, value); + if (count > 1) + SplatSet>(dst, value, count); + return; + } + if (count <= 8) + return SplatSet>(dst, value, count); + if (count <= 16) + return SplatSet>(dst, value, count); + if (count <= 32) + return SplatSet>(dst, value, count); + if (count <= 96) { + SplatSet<_32>(dst, value); + if (count <= 64) + return SplatSet>(dst, value, count); + SplatSet::Then<_32>>(dst, value); + SplatSet>(dst, value, count); + return; + } + if (count < 448 || value != 0 || !AArch64ZVA(dst, count)) + return SplatSet::Then>>(dst, value, count); +} + +LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) { + AArch64Memset((char *)dst, value, count); + return dst; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/elements_aarch64.h b/libc/src/string/memory_utils/elements_aarch64.h index 7f722af..366efc1 100644 --- a/libc/src/string/memory_utils/elements_aarch64.h +++ b/libc/src/string/memory_utils/elements_aarch64.h @@ -18,6 +18,54 @@ #endif namespace __llvm_libc { +namespace aarch64_memset { +#ifdef __ARM_NEON +struct Splat8 { + static constexpr size_t kSize = 8; + static void SplatSet(char *dst, const unsigned char value) { + vst1_u8((uint8_t *)dst, vdup_n_u8(value)); + } +}; + +struct Splat16 { + static constexpr size_t kSize = 16; + static void SplatSet(char *dst, const unsigned char value) { + vst1q_u8((uint8_t *)dst, vdupq_n_u8(value)); + } +}; + +using _8 = Splat8; +using _16 = Splat16; +#else +using _8 = __llvm_libc::scalar::_8; +using _16 = Repeated<_8, 2>; +#endif // __ARM_NEON + +using _1 = __llvm_libc::scalar::_1; +using _2 = __llvm_libc::scalar::_2; +using _3 = __llvm_libc::scalar::_3; +using _4 = __llvm_libc::scalar::_4; +using _32 = Chained<_16, _16>; +using _64 = Chained<_32, _32>; + +struct ZVA { + static constexpr size_t kSize = 64; + static void SplatSet(char *dst, const unsigned char value) { + asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory"); + } +}; + +inline static bool AArch64ZVA(char *dst, size_t count) { + uint64_t zva_val; + asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); + if ((zva_val & 31) != 4) + return false; + SplatSet::Then>>(dst, 0, count); + return true; +} + +} // namespace aarch64_memset + namespace aarch64 { using _1 = __llvm_libc::scalar::_1;