libatomic: Add support for LSE and LSE2
authorWilco Dijkstra <wilco.dijkstra@arm.com>
Tue, 15 Nov 2022 14:38:55 +0000 (14:38 +0000)
committerWilco Dijkstra <wilco.dijkstra@arm.com>
Tue, 15 Nov 2022 15:08:33 +0000 (15:08 +0000)
Add support for AArch64 LSE and LSE2 to libatomic.  Disable outline atomics,
and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics.
On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks.

Note this is safe since we swap all 16-byte atomics using the same ifunc,
so they either use locks or LSE2 atomics, but never a mix. This also improves
ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with
the new libatomic if LSE2 is supported.

libatomic/
* Makefile.in: Regenerated with automake 1.15.1.
* Makefile.am: Add atomic_16.S for AArch64.
* configure.tgt: Disable outline atomics in AArch64 build.
* config/linux/aarch64/atomic_16.S: New file - implementation of
ifuncs for 16-byte atomics.
* config/linux/aarch64/host-config.h: Enable ifuncs, use LSE
(HWCAP_ATOMICS) for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for
16-byte atomics.

libatomic/Makefile.am
libatomic/Makefile.in
libatomic/config/linux/aarch64/atomic_16.S [new file with mode: 0644]
libatomic/config/linux/aarch64/host-config.h
libatomic/configure.tgt

index d88515e..41e5da2 100644 (file)
@@ -127,6 +127,8 @@ if HAVE_IFUNC
 if ARCH_AARCH64_LINUX
 IFUNC_OPTIONS       = -march=armv8-a+lse
 libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
+libatomic_la_SOURCES += atomic_16.S
+
 endif
 if ARCH_ARM_LINUX
 IFUNC_OPTIONS       = -march=armv7-a+fp -DHAVE_KERNEL64
index 80d2565..89e29fc 100644 (file)
@@ -90,13 +90,14 @@ build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
-@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
+@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
-@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
 @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@                   $(addsuffix _16_2_.lo,$(SIZEOBJS))
 
 subdir = .
@@ -154,8 +155,11 @@ am__uninstall_files_from_dir = { \
   }
 am__installdirs = "$(DESTDIR)$(toolexeclibdir)"
 LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 =  \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@     atomic_16.lo
 am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \
-       glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo
+       glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \
+       $(am__objects_1)
 libatomic_la_OBJECTS = $(am_libatomic_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -165,9 +169,9 @@ libatomic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
        $(libatomic_la_LDFLAGS) $(LDFLAGS) -o $@
 libatomic_convenience_la_DEPENDENCIES = $(libatomic_la_LIBADD)
-am__objects_1 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
-       init.lo fenv.lo fence.lo flag.lo
-am_libatomic_convenience_la_OBJECTS = $(am__objects_1)
+am__objects_2 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
+       init.lo fenv.lo fence.lo flag.lo $(am__objects_1)
+am_libatomic_convenience_la_OBJECTS = $(am__objects_2)
 libatomic_convenience_la_OBJECTS =  \
        $(am_libatomic_convenience_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
@@ -185,6 +189,16 @@ am__v_at_1 =
 depcomp = $(SHELL) $(top_srcdir)/../depcomp
 am__depfiles_maybe = depfiles
 am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+       $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+       $(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo "  CPPAS   " $@;
+am__v_CPPAS_1 = 
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
 LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -369,6 +383,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
@@ -404,9 +419,8 @@ noinst_LTLIBRARIES = libatomic_convenience.la
 @LIBAT_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBAT_BUILD_VERSIONED_SHLIB_TRUE@libatomic_version_dep = libatomic.map-sun
 libatomic_version_info = -version-info $(libtool_VERSION)
 libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) $(lt_host_flags)
-libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c init.c \
-       fenv.c fence.c flag.c
-
+libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
+       init.c fenv.c fence.c flag.c $(am__append_2)
 SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
 EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
 libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
@@ -432,8 +446,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c))
 # Then sort through them to find the one we want, and select the first.
 M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
 libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
-       _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \
-       $(am__append_3) $(am__append_4)
+       _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
+       $(am__append_4) $(am__append_5)
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
@@ -450,7 +464,7 @@ all: auto-config.h
        $(MAKE) $(AM_MAKEFLAGS) all-recursive
 
 .SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .S .c .lo .o .obj
 am--refresh: Makefile
        @:
 $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/../multilib.am $(am__configure_deps)
@@ -559,6 +573,7 @@ mostlyclean-compile:
 distclean-compile:
        -rm -f *.tab.c
 
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic_16.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fence.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fenv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flag.Plo@am__quote@
@@ -570,6 +585,27 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
 
+.S.o:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@    $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@    DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@        $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@    $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@    DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@        $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@    $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@    DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@        $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
 .c.o:
 @am__fastdepCC_TRUE@   $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
 @am__fastdepCC_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
new file mode 100644 (file)
index 0000000..bced729
--- /dev/null
@@ -0,0 +1,462 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU Atomic Library (libatomic).
+
+   Libatomic is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+       .arch   armv8-a+lse
+
+#define ENTRY(name)            \
+       .global name;           \
+       .hidden name;           \
+       .type name,%function;   \
+       .p2align 4;             \
+name:                          \
+       .cfi_startproc;         \
+       hint    34      // bti c
+
+#define END(name)              \
+       .cfi_endproc;           \
+       .size name, .-name;
+
+#define res0 x0
+#define res1 x1
+#define in0  x2
+#define in1  x3
+#define tmp0 x6
+#define tmp1 x7
+#define exp0 x8
+#define exp1 x9
+
+#ifdef __AARCH64EB__
+# define reslo x1
+# define reshi x0
+# define inlo  x3
+# define inhi  x2
+# define tmplo x7
+# define tmphi x6
+#else
+# define reslo x0
+# define reshi x1
+# define inlo  x2
+# define inhi  x3
+# define tmplo x6
+# define tmphi x7
+#endif
+
+#define RELAXED 0
+#define CONSUME 1
+#define ACQUIRE 2
+#define RELEASE 3
+#define ACQ_REL 4
+#define SEQ_CST 5
+
+
+ENTRY (libat_load_16_i1)
+       cbnz    w1, 1f
+       ldp     res0, res1, [x0]
+       ret
+1:
+       cmp     w1, ACQUIRE
+       b.hi    2f
+       ldp     res0, res1, [x0]
+       dmb     ishld
+       ret
+2:
+       ldp     res0, res1, [x0]
+       dmb     ish
+       ret
+END (libat_load_16_i1)
+
+
+ENTRY (libat_store_16_i1)
+       cbnz    w4, 1f
+       stp     in0, in1, [x0]
+       ret
+1:
+       dmb     ish
+       stp     in0, in1, [x0]
+       cmp     w4, SEQ_CST
+       beq     2f
+       ret
+2:
+       dmb     ish
+       ret
+END (libat_store_16_i1)
+
+
+ENTRY (libat_exchange_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       stxp    w4, in0, in1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       cmp     w4, ACQUIRE
+       b.hi    4f
+3:
+       ldaxp   res0, res1, [x5]
+       stxp    w4, in0, in1, [x5]
+       cbnz    w4, 3b
+       ret
+4:
+       cmp     w4, RELEASE
+       b.ne    6f
+5:
+       ldxp    res0, res1, [x5]
+       stlxp   w4, in0, in1, [x5]
+       cbnz    w4, 5b
+       ret
+6:
+       ldaxp   res0, res1, [x5]
+       stlxp   w4, in0, in1, [x5]
+       cbnz    w4, 6b
+       ret
+END (libat_exchange_16_i1)
+
+
+ENTRY (libat_compare_exchange_16_i1)
+       ldp     exp0, exp1, [x1]
+       mov     tmp0, exp0
+       mov     tmp1, exp1
+       cbz     w4, 2f
+       cmp     w4, RELEASE
+       b.hs    3f
+       caspa   exp0, exp1, in0, in1, [x0]
+0:
+       cmp     exp0, tmp0
+       ccmp    exp1, tmp1, 0, eq
+       bne     1f
+       mov     x0, 1
+       ret
+1:
+       stp     exp0, exp1, [x1]
+       mov     x0, 0
+       ret
+2:
+       casp    exp0, exp1, in0, in1, [x0]
+       b       0b
+3:
+       b.hi    4f
+       caspl   exp0, exp1, in0, in1, [x0]
+       b       0b
+4:
+       caspal  exp0, exp1, in0, in1, [x0]
+       b       0b
+END (libat_compare_exchange_16_i1)
+
+
+ENTRY (libat_fetch_add_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       adds    tmplo, reslo, inlo
+       adc     tmphi, reshi, inhi
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       adds    tmplo, reslo, inlo
+       adc     tmphi, reshi, inhi
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_fetch_add_16_i1)
+
+
+ENTRY (libat_add_fetch_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       adds    reslo, reslo, inlo
+       adc     reshi, reshi, inhi
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       adds    reslo, reslo, inlo
+       adc     reshi, reshi, inhi
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_add_fetch_16_i1)
+
+
+ENTRY (libat_fetch_sub_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       subs    tmplo, reslo, inlo
+       sbc     tmphi, reshi, inhi
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       subs    tmplo, reslo, inlo
+       sbc     tmphi, reshi, inhi
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_fetch_sub_16_i1)
+
+
+ENTRY (libat_sub_fetch_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       subs    reslo, reslo, inlo
+       sbc     reshi, reshi, inhi
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       subs    reslo, reslo, inlo
+       sbc     reshi, reshi, inhi
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_sub_fetch_16_i1)
+
+
+ENTRY (libat_fetch_or_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       orr     tmp0, res0, in0
+       orr     tmp1, res1, in1
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       orr     tmp0, res0, in0
+       orr     tmp1, res1, in1
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_fetch_or_16_i1)
+
+
+ENTRY (libat_or_fetch_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       orr     res0, res0, in0
+       orr     res1, res1, in1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       orr     res0, res0, in0
+       orr     res1, res1, in1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_or_fetch_16_i1)
+
+
+ENTRY (libat_fetch_and_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       and     tmp0, res0, in0
+       and     tmp1, res1, in1
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       and     tmp0, res0, in0
+       and     tmp1, res1, in1
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_fetch_and_16_i1)
+
+
+ENTRY (libat_and_fetch_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       and     res0, res0, in0
+       and     res1, res1, in1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       and     res0, res0, in0
+       and     res1, res1, in1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_and_fetch_16_i1)
+
+
+ENTRY (libat_fetch_xor_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       eor     tmp0, res0, in0
+       eor     tmp1, res1, in1
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       eor     tmp0, res0, in0
+       eor     tmp1, res1, in1
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_fetch_xor_16_i1)
+
+
+ENTRY (libat_xor_fetch_16_i1)
+       mov     x5, x0
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       eor     res0, res0, in0
+       eor     res1, res1, in1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       eor     res0, res0, in0
+       eor     res1, res1, in1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_xor_fetch_16_i1)
+
+
+ENTRY (libat_fetch_nand_16_i1)
+       mov     x5, x0
+       mvn     in0, in0
+       mvn     in1, in1
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       orn     tmp0, in0, res0
+       orn     tmp1, in1, res1
+       stxp    w4, tmp0, tmp1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       orn     tmp0, in0, res0
+       orn     tmp1, in1, res1
+       stlxp   w4, tmp0, tmp1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_fetch_nand_16_i1)
+
+
+ENTRY (libat_nand_fetch_16_i1)
+       mov     x5, x0
+       mvn     in0, in0
+       mvn     in1, in1
+       cbnz    w4, 2f
+1:
+       ldxp    res0, res1, [x5]
+       orn     res0, in0, res0
+       orn     res1, in1, res1
+       stxp    w4, res0, res1, [x5]
+       cbnz    w4, 1b
+       ret
+2:
+       ldaxp   res0, res1, [x5]
+       orn     res0, in0, res0
+       orn     res1, in1, res1
+       stlxp   w4, res0, res1, [x5]
+       cbnz    w4, 2b
+       ret
+END (libat_nand_fetch_16_i1)
+
+
+ENTRY (libat_test_and_set_16_i1)
+       mov     w2, 1
+       cbnz    w1, 2f
+       swpb    w0, w2, [x0]
+       ret
+
+2:     swpalb  w0, w2, [x0]
+       ret
+END (libat_test_and_set_16_i1)
+
+
+/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code.  */
+#define FEATURE_1_AND 0xc0000000
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+
+/* Supported features based on the code generation options.  */
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+# define BTI_FLAG FEATURE_1_BTI
+#else
+# define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+# define PAC_FLAG FEATURE_1_PAC
+#else
+# define PAC_FLAG 0
+#endif
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
+#define GNU_PROPERTY(type, value)      \
+  .section .note.gnu.property, "a";     \
+  .p2align 3;                          \
+  .word 4;                             \
+  .word 16;                            \
+  .word 5;                             \
+  .asciz "GNU";                                \
+  .word type;                          \
+  .word 4;                             \
+  .word value;                         \
+  .word 0;
+
+#if defined(__linux__) || defined(__FreeBSD__)
+.section .note.GNU-stack, "", %progbits
+
+/* Add GNU property note if built with branch protection.  */
+# if (BTI_FLAG|PAC_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# endif
+#endif
index 769ba6e..d9b5ab3 100644 (file)
    <http://www.gnu.org/licenses/>.  */
 
 #if HAVE_IFUNC
-#include <stdlib.h>
+#include <sys/auxv.h>
 
-# ifdef HWCAP_ATOMICS
-#  define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
+#ifdef HWCAP_USCAT
+# if N == 16
+#  define IFUNC_COND_1 (hwcap & HWCAP_USCAT)
 # else
-#  define IFUNC_COND_1 (false)
+#  define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
 # endif
-# define IFUNC_NCOND(N)        (1)
+#else
+#  define IFUNC_COND_1 (false)
+#endif
+#define IFUNC_NCOND(N) (1)
+
+#if N == 16 && IFUNC_ALT != 0
+# define DONE 1
+#endif
 
 #endif /* HAVE_IFUNC */
 
index 86a5947..57f0936 100644 (file)
@@ -49,6 +49,7 @@ case "${target_cpu}" in
                fi
                ;;
        esac
+       XCFLAGS="${XCFLAGS} -mno-outline-atomics"
        ;;
   arm*)
        ARCH=arm