* config.h.in (HAVE_AVX2_SUPPORT): New #undef.
* sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and
config-cflags-avx2.
* sysdeps/x86_64/configure.ac: Likewise.
* sysdeps/i386/configure: Regenerated.
* sysdeps/x86_64/configure: Likewise.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
memset-avx2 only if config-cflags-avx2 is yes.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is
defined.
* sysdeps/x86_64/multiarch/memset.S: Define multiple versions
only if HAVE_AVX2_SUPPORT is defined.
* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
+2014-07-14 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config.h.in (HAVE_AVX2_SUPPORT): New #undef.
+ * sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and
+ config-cflags-avx2.
+ * sysdeps/x86_64/configure.ac: Likewise.
+ * sysdeps/i386/configure: Regenerated.
+ * sysdeps/x86_64/configure: Likewise.
+ * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+ memset-avx2 only if config-cflags-avx2 is yes.
+ * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
+ Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is
+ defined.
+ * sysdeps/x86_64/multiarch/memset.S: Define multiple versions
+ only if HAVE_AVX2_SUPPORT is defined.
+ * sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
+
2014-07-14 Alan Modra <amodra@gmail.com>
[BZ #17153]
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
+/* Define if gcc supports AVX2. */
+#undef HAVE_AVX2_SUPPORT
+
/* Define if the compiler\'s exception support is based on libunwind. */
#undef HAVE_CC_WITH_LIBUNWIND
config_vars="$config_vars
config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${libc_cv_cc_avx2+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_avx2=yes
+else
+ libc_cv_cc_avx2=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5
+$as_echo "$libc_cv_cc_avx2" >&6; }
+if test $libc_cv_cc_avx2 = yes; then
+ $as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+config-cflags-avx2 = $libc_cv_cc_avx2"
+
$as_echo "#define USE_REGPARMS 1" >>confdefs.h
])
LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
+dnl Check if -mavx2 works.
+AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl
+LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no])
+])
+if test $libc_cv_cc_avx2 = yes; then
+ AC_DEFINE(HAVE_AVX2_SUPPORT)
+fi
+LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2])
+
AC_DEFINE(USE_REGPARMS)
dnl It is always possible to access static and hidden symbols in an
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${libc_cv_cc_avx2+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_avx2=yes
+else
+ libc_cv_cc_avx2=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5
+$as_echo "$libc_cv_cc_avx2" >&6; }
+if test $libc_cv_cc_avx2 = yes; then
+ $as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+config-cflags-avx2 = $libc_cv_cc_avx2"
+
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
# work around problem with autoconf and empty lines at the end of files
AC_DEFINE(HAVE_MPX_SUPPORT)
fi
+dnl Check if -mavx2 works.
+AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl
+LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no])
+])
+if test $libc_cv_cc_avx2 = yes; then
+ AC_DEFINE(HAVE_AVX2_SUPPORT)
+fi
+LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2])
+
dnl It is always possible to access static and hidden symbols in an
dnl position independent way.
AC_DEFINE(PI_STATIC_AND_HIDDEN)
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
- strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
- memset-avx2
+ strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
CFLAGS-strpbrk-c.c += -msse4
CFLAGS-strspn-c.c += -msse4
endif
+
+ifeq (yes,$(config-cflags-avx2))
+sysdep_routines += memset-avx2
+endif
endif
ifeq ($(subdir),wcsmbs)
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#ifdef HAVE_AVX2_SUPPORT
/* Support sysdeps/x86_64/multiarch/memset_chk.S. */
IFUNC_IMPL (i, name, __memset_chk,
IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
IFUNC_IMPL (i, name, memset,
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2))
+#endif
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifdef HAVE_AVX2_SUPPORT
#include <sysdep.h>
#include <shlib-compat.h>
#include <init-arch.h>
/* Define multiple versions only for the definition in lib. */
-#ifndef NOT_IN_libc
+# ifndef NOT_IN_libc
ENTRY(memset)
.type memset, @gnu_indirect_function
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
leaq __memset_avx2(%rip), %rax
2: ret
END(memset)
-#endif
+# endif
-#if !defined NOT_IN_libc
-# undef memset
-# define memset __memset_sse2
+# if !defined NOT_IN_libc
+# undef memset
+# define memset __memset_sse2
-# undef __memset_chk
-# define __memset_chk __memset_chk_sse2
+# undef __memset_chk
+# define __memset_chk __memset_chk_sse2
-# ifdef SHARED
+# ifdef SHARED
# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memset calls through a PLT.
The speedup we get from using GPR instruction is likely eaten away
by the indirect call in the PLT. */
# define libc_hidden_builtin_def(name) \
.globl __GI_memset; __GI_memset = __memset_sse2
-# endif
+# endif
-# undef strong_alias
-# define strong_alias(original, alias)
+# undef strong_alias
+# define strong_alias(original, alias)
+# endif
#endif
#include "../memset.S"
/* Define multiple versions only for the definition in lib. */
#ifndef NOT_IN_libc
-# ifdef SHARED
+# if defined SHARED && defined HAVE_AVX2_SUPPORT
ENTRY(__memset_chk)
.type __memset_chk, @gnu_indirect_function
cmpl $0, __cpu_features+KIND_OFFSET(%rip)