libatomic: Improve 16-byte atomics on Intel AVX [PR104688]

author Jakub Jelinek <jakub@redhat.com>

Thu, 17 Mar 2022 17:49:00 +0000 (18:49 +0100)

committer Jakub Jelinek <jakub@redhat.com>

Thu, 17 Mar 2022 17:49:00 +0000 (18:49 +0100)
author Jakub Jelinek <jakub@redhat.com>
Thu, 17 Mar 2022 17:49:00 +0000 (18:49 +0100)
committer Jakub Jelinek <jakub@redhat.com>
Thu, 17 Mar 2022 17:49:00 +0000 (18:49 +0100)
diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am

index 389f3dd..d88515e 100644 (file)
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -138,8 +138,9 @@ IFUNC_OPTIONS            = -march=i586
  libatomic_la_LIBADD += $(addsuffix _8_1_.lo,$(SIZEOBJS))
  endif
  if ARCH_X86_64
-IFUNC_OPTIONS       = -mcx16
-libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS))
+IFUNC_OPTIONS       = -mcx16 -mcx16
+libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+                      $(addsuffix _16_2_.lo,$(SIZEOBJS))
  endif
  endif
  
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in

index 0a51bd5..80d2565 100644 (file)
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -96,7 +96,9 @@ target_triplet = @target@
  @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
  @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
  @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@                   $(addsuffix _16_2_.lo,$(SIZEOBJS))
+
  subdir = .
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -435,7 +437,7 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
  @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
  @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
  @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16 -mcx16
  libatomic_convenience_la_SOURCES = $(libatomic_la_SOURCES)
  libatomic_convenience_la_LIBADD = $(libatomic_la_LIBADD)
  MULTISRCTOP = 
diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h

index f20ce09..007b7e1 100644 (file)
--- a/libatomic/config/x86/host-config.h
+++ b/libatomic/config/x86/host-config.h
@@ -55,31 +55,37 @@ load_feat1 (void)
  }
  
  #ifdef __x86_64__
-# define IFUNC_COND_1  (load_feat1 () & bit_CMPXCHG16B)
+# define IFUNC_COND_1  ((load_feat1 () & (bit_AVX | bit_CMPXCHG16B)) \
+                        == (bit_AVX | bit_CMPXCHG16B))
+# define IFUNC_COND_2  (load_feat1 () & bit_CMPXCHG16B)
  #else
  # define IFUNC_COND_1  (load_feat1 () & bit_CMPXCHG8B)
  #endif
  
  #ifdef __x86_64__
-# define IFUNC_NCOND(N) (N == 16)
+# define IFUNC_NCOND(N) (2 * (N == 16))
  #else
  # define IFUNC_NCOND(N) (N == 8)
  #endif
  
  #ifdef __x86_64__
  # undef MAYBE_HAVE_ATOMIC_CAS_16
-# define MAYBE_HAVE_ATOMIC_CAS_16      IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_CAS_16      IFUNC_COND_2
  # undef MAYBE_HAVE_ATOMIC_EXCHANGE_16
-# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_2
  # undef MAYBE_HAVE_ATOMIC_LDST_16
-# define MAYBE_HAVE_ATOMIC_LDST_16     IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_LDST_16     IFUNC_COND_2
  /* Since load and store are implemented with CAS, they are not fast.  */
  # undef FAST_ATOMIC_LDST_16
  # define FAST_ATOMIC_LDST_16           0
-# if IFUNC_ALT == 1
+# if IFUNC_ALT != 0
  #  undef HAVE_ATOMIC_CAS_16
  #  define HAVE_ATOMIC_CAS_16 1
  # endif
+# if IFUNC_ALT == 1
+#  undef HAVE_ATOMIC_LDST_16
+#  define HAVE_ATOMIC_LDST_16 1
+# endif
  #else
  # undef MAYBE_HAVE_ATOMIC_CAS_8
  # define MAYBE_HAVE_ATOMIC_CAS_8       IFUNC_COND_1
@@ -93,7 +99,7 @@ load_feat1 (void)
  # endif
  #endif
  
-#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT != 0
  static inline bool
  atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
                             bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
@@ -108,6 +114,29 @@ atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
  # define atomic_compare_exchange_n atomic_compare_exchange_n
  #endif /* Have CAS 16 */
  
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#define __atomic_load_n(ptr, model) \
+  (sizeof (*ptr) == 16 ? atomic_load_n (ptr, model) \
+                      : (__atomic_load_n) (ptr, model))
+#define __atomic_store_n(ptr, val, model) \
+  (sizeof (*ptr) == 16 ? atomic_store_n (ptr, val, model) \
+                      : (__atomic_store_n) (ptr, val, model))
+
+static inline UTYPE
+atomic_load_n (UTYPE *ptr, int model UNUSED)
+{
+  UTYPE ret;
+  __asm__ ("vmovdqa\t{%1, %0|%0, %1}" : "=x" (ret) : "m" (*ptr));
+  return ret;
+}
+
+static inline void
+atomic_store_n (UTYPE *ptr, UTYPE val, int model UNUSED)
+{
+  __asm__ ("vmovdqa\t{%1, %0|%0, %1}\n\tmfence" : "=m" (*ptr) : "x" (val));
+}
+#endif
+
  #endif /* HAVE_IFUNC */
  
  #include_next <host-config.h>
diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c

index 7bdec72..6f6499c 100644 (file)
--- a/libatomic/config/x86/init.c
+++ b/libatomic/config/x86/init.c
@@ -34,6 +34,18 @@ __libat_feat1_init (void)
    unsigned int eax, ebx, ecx, edx;
    FEAT1_REGISTER = 0;
    __get_cpuid (1, &eax, &ebx, &ecx, &edx);
+#ifdef __x86_64__
+  if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
+      == (bit_AVX | bit_CMPXCHG16B))
+    {
+      /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address
+        is atomic, but so far we don't have this guarantee from AMD.  */
+      unsigned int ecx2 = 0;
+      __get_cpuid (0, &eax, &ebx, &ecx2, &edx);
+      if (ecx2 != signature_INTEL_ecx)
+       FEAT1_REGISTER &= ~bit_AVX;
+    }
+#endif
    /* See the load in load_feat1.  */
    __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
    return FEAT1_REGISTER;
author	Jakub Jelinek <jakub@redhat.com>
	Thu, 17 Mar 2022 17:49:00 +0000 (18:49 +0100)
committer	Jakub Jelinek <jakub@redhat.com>
	Thu, 17 Mar 2022 17:49:00 +0000 (18:49 +0100)
libatomic/Makefile.am		patch \| blob \| history
libatomic/Makefile.in		patch \| blob \| history
libatomic/config/x86/host-config.h		patch \| blob \| history
libatomic/config/x86/init.c		patch \| blob \| history