@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS))
+
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16 -mcx16
libatomic_convenience_la_SOURCES = $(libatomic_la_SOURCES)
libatomic_convenience_la_LIBADD = $(libatomic_la_LIBADD)
MULTISRCTOP =
}
#ifdef __x86_64__
-# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B)
+# define IFUNC_COND_1 ((load_feat1 () & (bit_AVX | bit_CMPXCHG16B)) \
+ == (bit_AVX | bit_CMPXCHG16B))
+# define IFUNC_COND_2 (load_feat1 () & bit_CMPXCHG16B)
#else
# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B)
#endif
#ifdef __x86_64__
-# define IFUNC_NCOND(N) (N == 16)
+# define IFUNC_NCOND(N) (2 * (N == 16))
#else
# define IFUNC_NCOND(N) (N == 8)
#endif
#ifdef __x86_64__
# undef MAYBE_HAVE_ATOMIC_CAS_16
-# define MAYBE_HAVE_ATOMIC_CAS_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_CAS_16 IFUNC_COND_2
# undef MAYBE_HAVE_ATOMIC_EXCHANGE_16
-# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_2
# undef MAYBE_HAVE_ATOMIC_LDST_16
-# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_2
/* Since load and store are implemented with CAS, they are not fast. */
# undef FAST_ATOMIC_LDST_16
# define FAST_ATOMIC_LDST_16 0
-# if IFUNC_ALT == 1
+# if IFUNC_ALT != 0
# undef HAVE_ATOMIC_CAS_16
# define HAVE_ATOMIC_CAS_16 1
# endif
+# if IFUNC_ALT == 1
+# undef HAVE_ATOMIC_LDST_16
+# define HAVE_ATOMIC_LDST_16 1
+# endif
#else
# undef MAYBE_HAVE_ATOMIC_CAS_8
# define MAYBE_HAVE_ATOMIC_CAS_8 IFUNC_COND_1
# endif
#endif
-#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT != 0
static inline bool
atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
# define atomic_compare_exchange_n atomic_compare_exchange_n
#endif /* Have CAS 16 */
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#define __atomic_load_n(ptr, model) \
+ (sizeof (*ptr) == 16 ? atomic_load_n (ptr, model) \
+ : (__atomic_load_n) (ptr, model))
+#define __atomic_store_n(ptr, val, model) \
+ (sizeof (*ptr) == 16 ? atomic_store_n (ptr, val, model) \
+ : (__atomic_store_n) (ptr, val, model))
+
+static inline UTYPE
+atomic_load_n (UTYPE *ptr, int model UNUSED)
+{
+ UTYPE ret;
+ __asm__ ("vmovdqa\t{%1, %0|%0, %1}" : "=x" (ret) : "m" (*ptr));
+ return ret;
+}
+
+static inline void
+atomic_store_n (UTYPE *ptr, UTYPE val, int model UNUSED)
+{
+ __asm__ ("vmovdqa\t{%1, %0|%0, %1}\n\tmfence" : "=m" (*ptr) : "x" (val));
+}
+#endif
+
#endif /* HAVE_IFUNC */
#include_next <host-config.h>
unsigned int eax, ebx, ecx, edx;
FEAT1_REGISTER = 0;
__get_cpuid (1, &eax, &ebx, &ecx, &edx);
+#ifdef __x86_64__
+ if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
+ == (bit_AVX | bit_CMPXCHG16B))
+ {
+ /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address
+ is atomic, but so far we don't have this guarantee from AMD. */
+ unsigned int ecx2 = 0;
+ __get_cpuid (0, &eax, &ebx, &ecx2, &edx);
+ if (ecx2 != signature_INTEL_ecx)
+ FEAT1_REGISTER &= ~bit_AVX;
+ }
+#endif
/* See the load in load_feat1. */
__atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
return FEAT1_REGISTER;