From 08cf777f9e7f6d826658a99c7d77a359f73a45bf Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 26 Jan 2012 09:45:54 -0500 Subject: [PATCH] Really fix AVX tests There is no problem with strcmp, it doesn't use the YMM registers. The math routines might since gcc perhaps generates such code. Introduce bit_YMM_USBALE and use it in the math routines. --- ChangeLog | 11 ++++++++++- sysdeps/x86_64/fpu/multiarch/e_atan2.c | 2 +- sysdeps/x86_64/fpu/multiarch/e_exp.c | 2 +- sysdeps/x86_64/fpu/multiarch/e_log.c | 2 +- sysdeps/x86_64/fpu/multiarch/s_atan.c | 3 ++- sysdeps/x86_64/fpu/multiarch/s_sin.c | 6 ++++-- sysdeps/x86_64/fpu/multiarch/s_tan.c | 3 ++- sysdeps/x86_64/multiarch/init-arch.c | 14 +++++++------- sysdeps/x86_64/multiarch/init-arch.h | 26 +++++++++++++------------- 9 files changed, 41 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1e2284f..b413d27 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,8 +2,17 @@ [BZ #13583] * sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE. + Clean up HAS_* macros. * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If - bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary. + bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if + possible. + * sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not + HAS_AVX. + * sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise. 2012-01-25 Joseph Myers diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c index 6867c6e..3a615fc 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double); libm_ifunc (__ieee754_atan2, HAS_FMA4 ? __ieee754_atan2_fma4 - : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); strong_alias (__ieee754_atan2, __atan2_finite) # define __ieee754_atan2 __ieee754_atan2_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c index 3c65028..7b2320a 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double); libm_ifunc (__ieee754_exp, HAS_FMA4 ? __ieee754_exp_fma4 - : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); + : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); strong_alias (__ieee754_exp, __exp_finite) # define __ieee754_exp __ieee754_exp_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c index 3b468d0..ab277d6 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double); libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 - : (HAS_AVX ? __ieee754_log_avx + : (HAS_YMM_USABLE ? __ieee754_log_avx : __ieee754_log_sse2)); strong_alias (__ieee754_log, __log_finite) diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c index 3160201..78c7e09 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -12,7 +12,8 @@ extern double __atan_fma4 (double); # define __atan_fma4 ((void *) 0) # endif -libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2); +libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : + HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); # define atan __atan_sse2 #endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c index 1ba9dbc..417acd0 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -17,10 +17,12 @@ extern double __sin_fma4 (double); # define __sin_fma4 ((void *) 0) # endif -libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2); +libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : + HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); weak_alias (__cos, cos) -libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2); +libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : + HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); weak_alias (__sin, sin) # define __cos __cos_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c index 8f6601e..3047155 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -12,7 +12,8 @@ extern double __tan_fma4 (double); # define __tan_fma4 ((void *) 0) # endif -libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2); +libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : + HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); # define tan __tan_sse2 #endif diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 4fabbee..76d146c 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -147,13 +147,13 @@ __init_cpu_features (void) if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) { /* Reset the AVX bit in case OSXSAVE is disabled. */ - if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0 - || ({ unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" - : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - (xcrlow & 6) != 6; })) - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; + if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0 + && ({ unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" + : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + (xcrlow & 6) == 6; })) + __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable; } __cpu_features.family = family; diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 408e5ae..2dc75ab 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -22,6 +22,7 @@ #define bit_Prefer_SSE_for_memop (1 << 3) #define bit_Fast_Unaligned_Load (1 << 4) #define bit_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_YMM_Usable (1 << 6) #define bit_SSE2 (1 << 26) #define bit_SSSE3 (1 << 9) @@ -49,6 +50,7 @@ # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE +# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -93,7 +95,7 @@ extern struct cpu_features extern void __init_cpu_features (void) attribute_hidden; -#define INIT_ARCH()\ +# define INIT_ARCH() \ do \ if (__cpu_features.kind == arch_kind_unknown) \ __init_cpu_features (); \ @@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_Slow_BSF FEATURE_INDEX_1 # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 # define index_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_YMM_Usable FEATURE_INDEX_1 -#define HAS_ARCH_FEATURE(idx, bit) \ - ((__get_cpu_features ()->feature[idx] & (bit)) != 0) +# define HAS_ARCH_FEATURE(name) \ + ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) -#define HAS_FAST_REP_STRING \ - HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String) +# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) -#define HAS_FAST_COPY_BACKWARD \ - HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward) +# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) -#define HAS_SLOW_BSF \ - HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) +# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) -#define HAS_PREFER_SSE_FOR_MEMOP \ - HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) +# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) -#define HAS_FAST_UNALIGNED_LOAD \ - HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load) +# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) + +# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable) #endif /* __ASSEMBLER__ */ -- 2.7.4