s390/vdso: Get rid of permutation constants
authorHeiko Carstens <hca@linux.ibm.com>
Thu, 19 Sep 2024 12:40:04 +0000 (14:40 +0200)
committerVasily Gorbik <gor@linux.ibm.com>
Mon, 23 Sep 2024 15:57:04 +0000 (17:57 +0200)
The three byte masks for VECTOR PERMUTE are not needed, since the
instruction VECTOR SHIFT LEFT DOUBLE BY BYTE can be used to implement
the required "rotate left" easily.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
arch/s390/kernel/vdso64/vgetrandom-chacha.S

index d802b0a96f4141a98fe45a11f21e699505b514cc..475711e0c6a36a1bdda9d4a8564c3d20ba6d374f 100644 (file)
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#include <linux/stringify.h>
 #include <linux/linkage.h>
 #include <asm/alternative.h>
 #include <asm/fpu-insn.h>
@@ -12,9 +13,6 @@
 #define COPY1  %v5
 #define COPY2  %v6
 #define COPY3  %v7
-#define PERM4  %v16
-#define PERM8  %v17
-#define PERM12 %v18
 #define BEPERM %v19
 #define TMP0   %v20
 #define TMP1   %v21
 
        .section .rodata
 
-       .balign 128
+       .balign 32
 .Lconstants:
        .long   0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
-       .long   0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl  4 bytes
-       .long   0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl  8 bytes
-       .long   0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes
        .long   0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
 
        .text
@@ -48,8 +43,8 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
        /* COPY0 = "expand 32-byte k" */
        VL      COPY0,0,,%r1
 
-       /* PERM4-PERM12,BEPERM = byte selectors for VPERM */
-       VLM     PERM4,BEPERM,16,%r1
+       /* BEPERM = byte selectors for VPERM */
+       ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
 
        /* COPY1,COPY2 = key */
        VLM     COPY1,COPY2,0,%r3
@@ -89,11 +84,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
        VERLLF  STATE1,STATE1,7
 
        /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
-       VPERM   STATE1,STATE1,STATE1,PERM4
+       VSLDB   STATE1,STATE1,STATE1,4
        /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
-       VPERM   STATE2,STATE2,STATE2,PERM8
+       VSLDB   STATE2,STATE2,STATE2,8
        /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
-       VPERM   STATE3,STATE3,STATE3,PERM12
+       VSLDB   STATE3,STATE3,STATE3,12
 
        /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
        VAF     STATE0,STATE0,STATE1
@@ -116,11 +111,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
        VERLLF  STATE1,STATE1,7
 
        /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
-       VPERM   STATE1,STATE1,STATE1,PERM12
+       VSLDB   STATE1,STATE1,STATE1,12
        /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
-       VPERM   STATE2,STATE2,STATE2,PERM8
+       VSLDB   STATE2,STATE2,STATE2,8
        /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
-       VPERM   STATE3,STATE3,STATE3,PERM4
+       VSLDB   STATE3,STATE3,STATE3,4
        brctg   %r0,.Ldoubleround
 
        /* OUTPUT0 = STATE0 + STATE0 */