From ed72b6545f6d20f2d29ed71d65394d4a75ad358e Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 20 Oct 2011 22:43:15 -0400 Subject: [PATCH] Check for FMA4 support and generate appropriate fma functions --- ChangeLog | 9 +++++ config.h.in | 3 ++ sysdeps/i386/configure | 63 ++++++++++++++++++++++++----------- sysdeps/i386/configure.in | 11 ++++++ sysdeps/x86_64/fpu/multiarch/s_fma.c | 22 ++++++++++-- sysdeps/x86_64/fpu/multiarch/s_fmaf.c | 22 ++++++++++-- sysdeps/x86_64/multiarch/init-arch.c | 10 +++++- sysdeps/x86_64/multiarch/init-arch.h | 2 ++ 8 files changed, 115 insertions(+), 27 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6cf9906..b2d629b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2011-10-20 Ulrich Drepper + * sysdeps/i386/configure.in: Test for -mfma4 option. + * config.h.in: Add HAVE_FMA4_SUPPORT entry. + * sysdeps/x86_64/multiarch/init-arch.h: Define HAS_FMA4 and + COMMON_CPUID_INDEX_80000001. + * sysdeps/x86_64/multiarch/init-arch.c: Read 80000001 leaf for AMD. + * sysdeps/x86_64/fpu/multiarch/s_fma.c: Test for FMA4 support and + use it if FMA3 is not supported. + * sysdeps/x86_64/fpu/multiarch/s_fmaf.c: Likewise. + * sysdeps/x86_64/multiarch/s_fma.c: Moved to ../fpu/multiarch. * sysdeps/x86_64/multiarch/s_fmaf.c: Likewise. diff --git a/config.h.in b/config.h.in index 7b50956..7db6630 100644 --- a/config.h.in +++ b/config.h.in @@ -118,6 +118,9 @@ /* Define if gcc supports AVX. */ #undef HAVE_AVX_SUPPORT +/* Define if gcc supports FMA4. */ +#undef HAVE_FMA4_SUPPORT + /* Define if the compiler's exception support is based on libunwind. */ #undef HAVE_CC_WITH_LIBUNWIND diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure index adffe3f..ae494e2 100644 --- a/sysdeps/i386/configure +++ b/sysdeps/i386/configure @@ -167,7 +167,7 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi - eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile @@ -193,7 +193,7 @@ $as_echo "$ac_try_echo"; } >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } >/dev/null && { + test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : @@ -204,7 +204,7 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi - eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp @@ -217,10 +217,10 @@ fi ac_fn_c_check_header_mongrel () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if eval "test \"\${$3+set}\"" = set; then : + if eval \${$3+:} false; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } -if eval "test \"\${$3+set}\"" = set; then : +if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 fi eval ac_res=\$$3 @@ -256,7 +256,7 @@ if ac_fn_c_try_cpp "$LINENO"; then : else ac_header_preproc=no fi -rm -f conftest.err conftest.$ac_ext +rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } @@ -283,7 +283,7 @@ $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } -if eval "test \"\${$3+set}\"" = set; then : +if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=\$ac_header_compiler" @@ -292,7 +292,7 @@ eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi - eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_mongrel @@ -333,7 +333,7 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run @@ -347,7 +347,7 @@ ac_fn_c_check_header_compile () as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } -if eval "test \"\${$3+set}\"" = set; then : +if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -365,7 +365,7 @@ fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } - eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile # This file is generated from configure.in by Autoconf. DO NOT EDIT! @@ -375,7 +375,7 @@ $as_echo "$ac_res" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } -if test "${ac_cv_path_GREP+set}" = set; then : +if ${ac_cv_path_GREP+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then @@ -438,7 +438,7 @@ $as_echo "$ac_cv_path_GREP" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } -if test "${ac_cv_path_EGREP+set}" = set; then : +if ${ac_cv_path_EGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 @@ -505,7 +505,7 @@ $as_echo "$ac_cv_path_EGREP" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } -if test "${ac_cv_header_stdc+set}" = set; then : +if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -633,7 +633,7 @@ done ac_fn_c_check_header_mongrel "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "$ac_includes_default" -if test "x$ac_cv_header_cpuid_h" = x""yes; then : +if test "x$ac_cv_header_cpuid_h" = xyes; then : else as_fn_error $? "gcc must provide the header" "$LINENO" 5 @@ -643,7 +643,7 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking if -g produces usable source locations for assembler-with-cpp" >&5 $as_echo_n "checking if -g produces usable source locations for assembler-with-cpp... " >&6; } -if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then : +if ${libc_cv_cpp_asm_debuginfo+:} false; then : $as_echo_n "(cached) " >&6 else cat > conftest.S <&5 $as_echo_n "checking for SSE4 support... " >&6; } -if test "${libc_cv_cc_sse4+set}" = set; then : +if ${libc_cv_cc_sse4+:} false; then : $as_echo_n "(cached) " >&6 else if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null' @@ -716,7 +716,7 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler -mtune=i686 support" >&5 $as_echo_n "checking for assembler -mtune=i686 support... " >&6; } -if test "${libc_cv_as_i686+set}" = set; then : +if ${libc_cv_as_i686+:} false; then : $as_echo_n "(cached) " >&6 else if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null' @@ -735,7 +735,7 @@ $as_echo "$libc_cv_as_i686" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5 $as_echo_n "checking for AVX support... " >&6; } -if test "${libc_cv_cc_avx+set}" = set; then : +if ${libc_cv_cc_avx+:} false; then : $as_echo_n "(cached) " >&6 else if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null' @@ -756,9 +756,32 @@ if test $libc_cv_cc_avx = yes; then fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5 +$as_echo_n "checking for FMA4 support... " >&6; } +if ${libc_cv_cc_fma4+:} false; then : + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_cc_fma4=yes +else + libc_cv_cc_fma4=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5 +$as_echo "$libc_cv_cc_fma4" >&6; } +if test $libc_cv_cc_fma4 = yes; then + $as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h + +fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5 $as_echo_n "checking for -mno-vzeroupper support... " >&6; } -if test "${libc_cv_cc_novzeroupper+set}" = set; then : +if ${libc_cv_cc_novzeroupper+:} false; then : $as_echo_n "(cached) " >&6 else if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null' diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in index 67fd1d7..5a9840e 100644 --- a/sysdeps/i386/configure.in +++ b/sysdeps/i386/configure.in @@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then AC_DEFINE(HAVE_AVX_SUPPORT) fi +dnl Check if -mfma4 works. +AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl +if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then + libc_cv_cc_fma4=yes +else + libc_cv_cc_fma4=no +fi]) +if test $libc_cv_cc_fma4 = yes; then + AC_DEFINE(HAVE_FMA4_SUPPORT) +fi + dnl Check if -mno-vzeroupper works. AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl if AC_TRY_COMMAND([${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null]); then diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 9a680c6..06f2d00 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -1,5 +1,5 @@ /* FMA version of fma. - Copyright (C) 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -28,13 +28,29 @@ extern double __fma_sse2 (double x, double y, double z) attribute_hidden; static double -__fma_fma (double x, double y, double z) +__fma_fma3 (double x, double y, double z) { asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); return x; } -libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2); + +# ifdef HAVE_FMA4_SUPPORT +static double +__fma_fma4 (double x, double y, double z) +{ + asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z)); + return x; +} +# else +# undef HAS_FMA4 +# define HAS_FMA4 0 +# define __fma_fma4 NULL +# endif + + +libm_ifunc (__fma, HAS_FMA + ? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2)); weak_alias (__fma, fma) # define __fma __fma_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 85ef65a..53c08de 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -1,5 +1,5 @@ /* FMA version of fmaf. - Copyright (C) 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -27,13 +27,29 @@ extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden; static float -__fmaf_fma (float x, float y, float z) +__fmaf_fma3 (float x, float y, float z) { asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); return x; } -libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2); + +# ifdef HAVE_FMA4_SUPPORT +static float +__fmaf_fma4 (float x, float y, float z) +{ + asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z)); + return x; +} +# else +# undef HAS_FMA4 +# define HAS_FMA4 0 +# define __fmaf_fma4 NULL +# endif + + +libm_ifunc (__fmaf, HAS_FMA + ? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2)); weak_alias (__fmaf, fmaf) # define __fmaf __fmaf_sse2 diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 0a145ca..3fde5d9 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -86,7 +86,7 @@ __init_cpu_features (void) default: /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ + of Core i3/i5/i7 processors if AVX is available. */ if ((ecx & bit_AVX) == 0) break; @@ -131,6 +131,14 @@ __init_cpu_features (void) if ((ecx & 0x200)) __cpu_features.feature[index_Prefer_SSE_for_memop] |= bit_Prefer_SSE_for_memop; + + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, + __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, + __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, + __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); } else kind = arch_kind_other; diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index e8d48c2..2fb6f75 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -53,6 +53,7 @@ enum { COMMON_CPUID_INDEX_1 = 0, + COMMON_CPUID_INDEX_80000001, /* for AMD */ /* Keep the following line at the end. */ COMMON_CPUID_INDEX_MAX }; @@ -113,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19) # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20) # define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12) +# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, 16) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 -- 2.7.4