s390: select ARCH_SUPPORTS_INT128
authorHeiko Carstens <hca@linux.ibm.com>
Tue, 25 Apr 2023 11:42:11 +0000 (13:42 +0200)
committerAlexander Gordeev <agordeev@linux.ibm.com>
Mon, 15 May 2023 12:12:14 +0000 (14:12 +0200)
s390 has instructions to support 128 bit arithmetics, e.g. a 64 bit
multiply instruction with a 128 bit result. Also 128 bit integer
artithmetics are already used in s390 specific architecture code (see
e.g. read_persistent_clock64()).

Therefore select ARCH_SUPPORTS_INT128.

However limit this to clang for now, since gcc generates inefficient code,
which may lead to stack overflows, when compiling
lib/crypto/curve25519-hacl64.c which depends on ARCH_SUPPORTS_INT128. The
gcc generated functions have 6kb stack frames, compared to only 1kb of the
code generated with clang.

If the kernel is compiled with -Os library calls for __ashlti3(),
__ashrti3(), and __lshrti3() may be generated. Similar to arm64
and riscv provide assembler implementations for these functions.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
arch/s390/Kconfig
arch/s390/include/asm/asm-prototypes.h
arch/s390/lib/Makefile
arch/s390/lib/tishift.S [new file with mode: 0644]

index db20c15..0c1bfac 100644 (file)
@@ -117,6 +117,7 @@ config S390
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
        select ARCH_SUPPORTS_HUGETLBFS
+       select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
        select ARCH_SUPPORTS_NUMA_BALANCING
        select ARCH_SUPPORTS_PER_VMA_LOCK
        select ARCH_USE_BUILTIN_BSWAP
index c37eb92..a873e87 100644 (file)
@@ -6,4 +6,8 @@
 #include <asm/fpu/api.h>
 #include <asm-generic/asm-prototypes.h>
 
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+
 #endif /* _ASM_S390_PROTOTYPES_H */
index 580d2e3..7c50eca 100644 (file)
@@ -3,7 +3,7 @@
 # Makefile for s390-specific library files..
 #
 
-lib-y += delay.o string.o uaccess.o find.o spinlock.o
+lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
 obj-y += mem.o xor.o
 lib-$(CONFIG_KPROBES) += probes.o
 lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S
new file mode 100644 (file)
index 0000000..de33cf0
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/export.h>
+
+       .section .noinstr.text, "ax"
+
+       GEN_BR_THUNK %r14
+
+SYM_FUNC_START(__ashlti3)
+       lmg     %r0,%r1,0(%r3)
+       cije    %r4,0,1f
+       lhi     %r3,64
+       sr      %r3,%r4
+       jnh     0f
+       srlg    %r3,%r1,0(%r3)
+       sllg    %r0,%r0,0(%r4)
+       sllg    %r1,%r1,0(%r4)
+       ogr     %r0,%r3
+       j       1f
+0:     sllg    %r0,%r1,-64(%r4)
+       lghi    %r1,0
+1:     stmg    %r0,%r1,0(%r2)
+       BR_EX   %r14
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+       lmg     %r0,%r1,0(%r3)
+       cije    %r4,0,1f
+       lhi     %r3,64
+       sr      %r3,%r4
+       jnh     0f
+       sllg    %r3,%r0,0(%r3)
+       srlg    %r1,%r1,0(%r4)
+       srag    %r0,%r0,0(%r4)
+       ogr     %r1,%r3
+       j       1f
+0:     srag    %r1,%r0,-64(%r4)
+       srag    %r0,%r0,63
+1:     stmg    %r0,%r1,0(%r2)
+       BR_EX   %r14
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+       lmg     %r0,%r1,0(%r3)
+       cije    %r4,0,1f
+       lhi     %r3,64
+       sr      %r3,%r4
+       jnh     0f
+       sllg    %r3,%r0,0(%r3)
+       srlg    %r1,%r1,0(%r4)
+       srlg    %r0,%r0,0(%r4)
+       ogr     %r1,%r3
+       j       1f
+0:     srlg    %r1,%r0,-64(%r4)
+       lghi    %r0,0
+1:     stmg    %r0,%r1,0(%r2)
+       BR_EX   %r14
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)