AArch64: Update A64FX memset not to degrade at 16KB

author Naohiro Tamura via Libc-alpha <libc-alpha@sourceware.org>

Fri, 27 Aug 2021 05:03:04 +0000 (05:03 +0000)

committer Szabolcs Nagy <szabolcs.nagy@arm.com>

Fri, 3 Sep 2021 14:59:46 +0000 (15:59 +0100)
author Naohiro Tamura via Libc-alpha <libc-alpha@sourceware.org>
Fri, 27 Aug 2021 05:03:04 +0000 (05:03 +0000)
committer Szabolcs Nagy <szabolcs.nagy@arm.com>
Fri, 3 Sep 2021 14:59:46 +0000 (15:59 +0100)
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S

index 7bf759b6a7530e76e2ec8f2d2f173eec0adead61..f7dfdaace7cfe74139d6d2261e88219035cbd6a1 100644 (file)
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -96,7 +96,14 @@ L(vl_agnostic): // VL Agnostic
  L(unroll8):
         sub     count, count, tmp1
         .p2align 4
-1:     st1b_unroll 0, 7
+       // The 2 instructions at the beginning of the following loop,
+       // cmp and branch, are a workaround so as not to degrade at
+       // the peak performance 16KB.
+       // It is found heuristically and the branch condition, b.ne,
+       // is chosen intentionally never to jump.
+1:     cmp     xzr, xzr
+       b.ne    1b
+       st1b_unroll 0, 7
         add     dst, dst, tmp1
         subs    count, count, tmp1
         b.hi    1b
author	Naohiro Tamura via Libc-alpha <libc-alpha@sourceware.org>
	Fri, 27 Aug 2021 05:03:04 +0000 (05:03 +0000)
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>
	Fri, 3 Sep 2021 14:59:46 +0000 (15:59 +0100)