From 09b53121c323f260ab97cecd067d4e7b3be1bf7c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micha=C5=82=20G=C3=B3rny?= Date: Thu, 31 Mar 2022 09:55:25 +0200 Subject: [PATCH] [compiler-rt] [scudo] Use -mcrc32 on x86 when available Update the hardware CRC32 logic in scudo to support using `-mcrc32` instead of `-msse4.2`. The CRC32 intrinsics use the former flag in the newer compiler versions, e.g. in clang since 12fa608af44a. With these compilers, passing `-msse4.2` is insufficient to enable the instructions and causes build failures when `-march` does not enable CRC32: /var/tmp/portage/sys-libs/compiler-rt-sanitizers-14.0.0/work/compiler-rt/lib/scudo/scudo_crc32.cpp:20:10: error: always_inline function '_mm_crc32_u32' requires target feature 'crc32', but would be inlined into function 'computeHardwareCRC32' that is compiled without support for 'crc32' return CRC32_INTRINSIC(Crc, Data); ^ /var/tmp/portage/sys-libs/compiler-rt-sanitizers-14.0.0/work/compiler-rt/lib/scudo/scudo_crc32.h:27:27: note: expanded from macro 'CRC32_INTRINSIC' # define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) ^ /var/tmp/portage/sys-libs/compiler-rt-sanitizers-14.0.0/work/compiler-rt/lib/scudo/../sanitizer_common/sanitizer_platform.h:132:36: note: expanded from macro 'FIRST_32_SECOND_64' # define FIRST_32_SECOND_64(a, b) (a) ^ 1 error generated. For backwards compatibility, use `-mcrc32` when available and fall back to `-msse4.2`. The `` header remains in use as it still works and is compatible with GCC, while clang's `` is not. Originally reported in https://bugs.gentoo.org/835870. Differential Revision: https://reviews.llvm.org/D122789 --- compiler-rt/cmake/config-ix.cmake | 1 + compiler-rt/lib/scudo/CMakeLists.txt | 7 +++++-- compiler-rt/lib/scudo/scudo_allocator.cpp | 4 ++-- compiler-rt/lib/scudo/scudo_crc32.cpp | 4 ++-- compiler-rt/lib/scudo/scudo_crc32.h | 9 +++++---- compiler-rt/lib/scudo/standalone/CMakeLists.txt | 7 +++++-- compiler-rt/lib/scudo/standalone/checksum.h | 5 +++-- compiler-rt/lib/scudo/standalone/chunk.h | 4 ++-- compiler-rt/lib/scudo/standalone/crc32_hw.cpp | 4 ++-- 9 files changed, 27 insertions(+), 18 deletions(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 3ad2d6a..33e6e1c 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -78,6 +78,7 @@ check_cxx_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_GENERA check_cxx_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG) check_cxx_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG) check_cxx_compiler_flag(-fno-coverage-mapping COMPILER_RT_HAS_FNO_COVERAGE_MAPPING_FLAG) +check_cxx_compiler_flag("-Werror -mcrc32" COMPILER_RT_HAS_MCRC32_FLAG) check_cxx_compiler_flag("-Werror -msse3" COMPILER_RT_HAS_MSSE3_FLAG) check_cxx_compiler_flag("-Werror -msse4.2" COMPILER_RT_HAS_MSSE4_2_FLAG) check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG) diff --git a/compiler-rt/lib/scudo/CMakeLists.txt b/compiler-rt/lib/scudo/CMakeLists.txt index 995e853..31a6976 100644 --- a/compiler-rt/lib/scudo/CMakeLists.txt +++ b/compiler-rt/lib/scudo/CMakeLists.txt @@ -86,8 +86,11 @@ set(SCUDO_HEADERS scudo_tsd_shared.inc scudo_utils.h) -# Enable the SSE 4.2 instruction set for scudo_crc32.cpp, if available. -if (COMPILER_RT_HAS_MSSE4_2_FLAG) +# Enable the necessary instruction set for scudo_crc32.cpp, if available. +# Newer compiler versions use -mcrc32 rather than -msse4.2. +if (COMPILER_RT_HAS_MCRC32_FLAG) + set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -mcrc32) +elseif (COMPILER_RT_HAS_MSSE4_2_FLAG) set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp index 5b6ac8b..6a6b577 100644 --- a/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -49,7 +49,7 @@ inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { // as opposed to only for scudo_crc32.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a // result there is no reason to not use it here. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) Crc = CRC32_INTRINSIC(Crc, Value); for (uptr i = 0; i < ArraySize; i++) Crc = CRC32_INTRINSIC(Crc, Array[i]); @@ -65,7 +65,7 @@ inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { for (uptr i = 0; i < ArraySize; i++) Crc = computeSoftwareCRC32(Crc, Array[i]); return Crc; -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } static BackendT &getBackend(); diff --git a/compiler-rt/lib/scudo/scudo_crc32.cpp b/compiler-rt/lib/scudo/scudo_crc32.cpp index 8747350..137c44c 100644 --- a/compiler-rt/lib/scudo/scudo_crc32.cpp +++ b/compiler-rt/lib/scudo/scudo_crc32.cpp @@ -15,10 +15,10 @@ namespace __scudo { -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 computeHardwareCRC32(u32 Crc, uptr Data) { return CRC32_INTRINSIC(Crc, Data); } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } // namespace __scudo diff --git a/compiler-rt/lib/scudo/scudo_crc32.h b/compiler-rt/lib/scudo/scudo_crc32.h index ef40595..8cacdee 100644 --- a/compiler-rt/lib/scudo/scudo_crc32.h +++ b/compiler-rt/lib/scudo/scudo_crc32.h @@ -16,13 +16,14 @@ #include "sanitizer_common/sanitizer_internal_defs.h" // Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 +// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2) // - for ARM & AArch64: -march=armv8-a+crc or -mcrc // An additional check must be performed at runtime as well to make sure the // emitted instructions are valid on the target host. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) -# ifdef __SSE4_2__ +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +# if defined(__CRC32__) || defined(__SSE4_2__) +// NB: clang has but GCC does not # include # define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) # endif @@ -30,7 +31,7 @@ # include # define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) # endif -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) namespace __scudo { diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt index 739f131..ae5c3547 100644 --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -97,8 +97,11 @@ set(SCUDO_SOURCES string_utils.cpp ) -# Enable the SSE 4.2 instruction set for crc32_hw.cpp, if available. -if (COMPILER_RT_HAS_MSSE4_2_FLAG) +# Enable the necessary instruction set for scudo_crc32.cpp, if available. +# Newer compiler versions use -mcrc32 rather than -msse4.2. +if (COMPILER_RT_HAS_MCRC32_FLAG) + set_source_files_properties(crc32_hw.cpp PROPERTIES COMPILE_FLAGS -mcrc32) +elseif (COMPILER_RT_HAS_MSSE4_2_FLAG) set_source_files_properties(crc32_hw.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() diff --git a/compiler-rt/lib/scudo/standalone/checksum.h b/compiler-rt/lib/scudo/standalone/checksum.h index a63b1b4..df32995 100644 --- a/compiler-rt/lib/scudo/standalone/checksum.h +++ b/compiler-rt/lib/scudo/standalone/checksum.h @@ -12,12 +12,13 @@ #include "internal_defs.h" // Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 +// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2) // - for ARM & AArch64: -march=armv8-a+crc or -mcrc // An additional check must be performed at runtime as well to make sure the // emitted instructions are valid on the target host. -#ifdef __SSE4_2__ +#if defined(__CRC32__) || defined(__SSE4_2__) +// NB: clang has but GCC does not #include #define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) #endif diff --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h index 69b8e1b..0581420 100644 --- a/compiler-rt/lib/scudo/standalone/chunk.h +++ b/compiler-rt/lib/scudo/standalone/chunk.h @@ -25,7 +25,7 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { // as opposed to only for crc32_hw.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a result // there is no reason to not use it here. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 Crc = static_cast(CRC32_INTRINSIC(Seed, Value)); for (uptr I = 0; I < ArraySize; I++) Crc = static_cast(CRC32_INTRINSIC(Crc, Array[I])); @@ -42,7 +42,7 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { Checksum = computeBSDChecksum(Checksum, Array[I]); return Checksum; } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } namespace Chunk { diff --git a/compiler-rt/lib/scudo/standalone/crc32_hw.cpp b/compiler-rt/lib/scudo/standalone/crc32_hw.cpp index 62841ba..d13c615 100644 --- a/compiler-rt/lib/scudo/standalone/crc32_hw.cpp +++ b/compiler-rt/lib/scudo/standalone/crc32_hw.cpp @@ -10,10 +10,10 @@ namespace scudo { -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 computeHardwareCRC32(u32 Crc, uptr Data) { return static_cast(CRC32_INTRINSIC(Crc, Data)); } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } // namespace scudo -- 2.7.4