x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation
authorAurelien Jarno <aurelien@aurel32.net>
Mon, 3 Oct 2022 21:46:11 +0000 (23:46 +0200)
committerAurelien Jarno <aurelien@aurel32.net>
Mon, 3 Oct 2022 21:46:11 +0000 (23:46 +0200)
The AVX2 memrchr implementation uses the 'shlxl' instruction, which
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
belongs to the LZCNT CPU feature.

Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
sysdeps/x86/isa-level.h
sysdeps/x86_64/multiarch/ifunc-avx2.h
sysdeps/x86_64/multiarch/ifunc-impl-list.c

index 3c4480a..bbb90f5 100644 (file)
@@ -80,6 +80,7 @@
 #define AVX_X86_ISA_LEVEL 3
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
+#define LZCNT_X86_ISA_LEVEL 3
 #define MOVBE_X86_ISA_LEVEL 3
 
 /* ISA level >= 2 guaranteed includes.  */
index a57a995..f174108 100644 (file)
@@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
                                      AVX_Fast_Unaligned_Load, ))
     {
index 7c84963..ec1c5b5 100644 (file)
@@ -209,13 +209,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, memrchr,
              X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
                                     (CPU_FEATURE_USABLE (AVX512VL)
-                                     && CPU_FEATURE_USABLE (AVX512BW)),
+                                     && CPU_FEATURE_USABLE (AVX512BW)
+                                     && CPU_FEATURE_USABLE (BMI2)
+                                     && CPU_FEATURE_USABLE (LZCNT)),
                                     __memrchr_evex)
              X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
-                                    CPU_FEATURE_USABLE (AVX2),
+                                    (CPU_FEATURE_USABLE (AVX2)
+                                     && CPU_FEATURE_USABLE (BMI2)
+                                     && CPU_FEATURE_USABLE (LZCNT)),
                                     __memrchr_avx2)
              X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
                                     (CPU_FEATURE_USABLE (AVX2)
+                                     && CPU_FEATURE_USABLE (BMI2)
+                                     && CPU_FEATURE_USABLE (LZCNT)
                                      && CPU_FEATURE_USABLE (RTM)),
                                     __memrchr_avx2_rtm)
              /* ISA V2 wrapper for SSE2 implementation because the SSE2