crypto: x86/aria - Use RIP-relative addressing

author Ard Biesheuvel <ardb@kernel.org>

Wed, 12 Apr 2023 11:00:25 +0000 (13:00 +0200)

committer Herbert Xu <herbert@gondor.apana.org.au>

Thu, 20 Apr 2023 10:20:04 +0000 (18:20 +0800)
author Ard Biesheuvel <ardb@kernel.org>
Wed, 12 Apr 2023 11:00:25 +0000 (13:00 +0200)
committer Herbert Xu <herbert@gondor.apana.org.au>
Thu, 20 Apr 2023 10:20:04 +0000 (18:20 +0800)
diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S

index 9243f62..7c1abc5 100644 (file)
--- a/arch/x86/crypto/aria-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S
@@ -80,7 +80,7 @@
         transpose_4x4(c0, c1, c2, c3, a0, a1);          \
         transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                         \
-       vmovdqu .Lshufb_16x16b, a0;                     \
+       vmovdqu .Lshufb_16x16b(%rip), a0;               \
         vmovdqu st1, a1;                                \
         vpshufb a0, a2, a2;                             \
         vpshufb a0, a3, a3;                             \
@@ -132,7 +132,7 @@
         transpose_4x4(c0, c1, c2, c3, a0, a1);          \
         transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                         \
-       vmovdqu .Lshufb_16x16b, a0;                     \
+       vmovdqu .Lshufb_16x16b(%rip), a0;               \
         vmovdqu st1, a1;                                \
         vpshufb a0, a2, a2;                             \
         vpshufb a0, a3, a3;                             \
@@ -300,11 +300,11 @@
                             x4, x5, x6, x7,             \
                             t0, t1, t2, t3,             \
                             t4, t5, t6, t7)             \
-       vmovdqa .Ltf_s2_bitmatrix, t0;                  \
-       vmovdqa .Ltf_inv_bitmatrix, t1;                 \
-       vmovdqa .Ltf_id_bitmatrix, t2;                  \
-       vmovdqa .Ltf_aff_bitmatrix, t3;                 \
-       vmovdqa .Ltf_x2_bitmatrix, t4;                  \
+       vmovdqa .Ltf_s2_bitmatrix(%rip), t0;            \
+       vmovdqa .Ltf_inv_bitmatrix(%rip), t1;           \
+       vmovdqa .Ltf_id_bitmatrix(%rip), t2;            \
+       vmovdqa .Ltf_aff_bitmatrix(%rip), t3;           \
+       vmovdqa .Ltf_x2_bitmatrix(%rip), t4;            \
         vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
         vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
         vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
@@ -324,13 +324,13 @@
                        x4, x5, x6, x7,                  \
                        t0, t1, t2, t3,                  \
                        t4, t5, t6, t7)                  \
-       vmovdqa .Linv_shift_row, t0;                    \
-       vmovdqa .Lshift_row, t1;                        \
-       vbroadcastss .L0f0f0f0f, t6;                    \
-       vmovdqa .Ltf_lo__inv_aff__and__s2, t2;          \
-       vmovdqa .Ltf_hi__inv_aff__and__s2, t3;          \
-       vmovdqa .Ltf_lo__x2__and__fwd_aff, t4;          \
-       vmovdqa .Ltf_hi__x2__and__fwd_aff, t5;          \
+       vmovdqa .Linv_shift_row(%rip), t0;              \
+       vmovdqa .Lshift_row(%rip), t1;                  \
+       vbroadcastss .L0f0f0f0f(%rip), t6;              \
+       vmovdqa .Ltf_lo__inv_aff__and__s2(%rip), t2;    \
+       vmovdqa .Ltf_hi__inv_aff__and__s2(%rip), t3;    \
+       vmovdqa .Ltf_lo__x2__and__fwd_aff(%rip), t4;    \
+       vmovdqa .Ltf_hi__x2__and__fwd_aff(%rip), t5;    \
                                                         \
         vaesenclast t7, x0, x0;                         \
         vaesenclast t7, x4, x4;                         \
diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S

index 82a14b4..c60fa29 100644 (file)
--- a/arch/x86/crypto/aria-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S
@@ -96,7 +96,7 @@
         transpose_4x4(c0, c1, c2, c3, a0, a1);          \
         transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                         \
-       vbroadcasti128 .Lshufb_16x16b, a0;              \
+       vbroadcasti128 .Lshufb_16x16b(%rip), a0;        \
         vmovdqu st1, a1;                                \
         vpshufb a0, a2, a2;                             \
         vpshufb a0, a3, a3;                             \
@@ -148,7 +148,7 @@
         transpose_4x4(c0, c1, c2, c3, a0, a1);          \
         transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                         \
-       vbroadcasti128 .Lshufb_16x16b, a0;              \
+       vbroadcasti128 .Lshufb_16x16b(%rip), a0;        \
         vmovdqu st1, a1;                                \
         vpshufb a0, a2, a2;                             \
         vpshufb a0, a3, a3;                             \
@@ -307,11 +307,11 @@
                             x4, x5, x6, x7,             \
                             t0, t1, t2, t3,             \
                             t4, t5, t6, t7)             \
-       vpbroadcastq .Ltf_s2_bitmatrix, t0;             \
-       vpbroadcastq .Ltf_inv_bitmatrix, t1;            \
-       vpbroadcastq .Ltf_id_bitmatrix, t2;             \
-       vpbroadcastq .Ltf_aff_bitmatrix, t3;            \
-       vpbroadcastq .Ltf_x2_bitmatrix, t4;             \
+       vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0;       \
+       vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1;      \
+       vpbroadcastq .Ltf_id_bitmatrix(%rip), t2;       \
+       vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3;      \
+       vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4;       \
         vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
         vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
         vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
@@ -332,12 +332,12 @@
                        t4, t5, t6, t7)                  \
         vpxor t7, t7, t7;                               \
         vpxor t6, t6, t6;                               \
-       vbroadcasti128 .Linv_shift_row, t0;             \
-       vbroadcasti128 .Lshift_row, t1;                 \
-       vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2;   \
-       vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3;   \
-       vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4;   \
-       vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5;   \
+       vbroadcasti128 .Linv_shift_row(%rip), t0;       \
+       vbroadcasti128 .Lshift_row(%rip), t1;           \
+       vbroadcasti128 .Ltf_lo__inv_aff__and__s2(%rip), t2; \
+       vbroadcasti128 .Ltf_hi__inv_aff__and__s2(%rip), t3; \
+       vbroadcasti128 .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
+       vbroadcasti128 .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
                                                         \
         vextracti128 $1, x0, t6##_x;                    \
         vaesenclast t7##_x, x0##_x, x0##_x;             \
@@ -369,7 +369,7 @@
         vaesdeclast t7##_x, t6##_x, t6##_x;             \
         vinserti128 $1, t6##_x, x6, x6;                 \
                                                         \
-       vpbroadcastd .L0f0f0f0f, t6;                    \
+       vpbroadcastd .L0f0f0f0f(%rip), t6;              \
                                                         \
         /* AES inverse shift rows */                    \
         vpshufb t0, x0, x0;                             \
diff --git a/arch/x86/crypto/aria-gfni-avx512-asm_64.S b/arch/x86/crypto/aria-gfni-avx512-asm_64.S

index 3193f07..860887e 100644 (file)
--- a/arch/x86/crypto/aria-gfni-avx512-asm_64.S
+++ b/arch/x86/crypto/aria-gfni-avx512-asm_64.S
@@ -80,7 +80,7 @@
         transpose_4x4(c0, c1, c2, c3, a0, a1);          \
         transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                         \
-       vbroadcasti64x2 .Lshufb_16x16b, a0;             \
+       vbroadcasti64x2 .Lshufb_16x16b(%rip), a0;       \
         vmovdqu64 st1, a1;                              \
         vpshufb a0, a2, a2;                             \
         vpshufb a0, a3, a3;                             \
@@ -132,7 +132,7 @@
         transpose_4x4(c0, c1, c2, c3, a0, a1);          \
         transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                         \
-       vbroadcasti64x2 .Lshufb_16x16b, a0;             \
+       vbroadcasti64x2 .Lshufb_16x16b(%rip), a0;       \
         vmovdqu64 st1, a1;                              \
         vpshufb a0, a2, a2;                             \
         vpshufb a0, a3, a3;                             \
@@ -308,11 +308,11 @@
                             x4, x5, x6, x7,             \
                             t0, t1, t2, t3,             \
                             t4, t5, t6, t7)             \
-       vpbroadcastq .Ltf_s2_bitmatrix, t0;             \
-       vpbroadcastq .Ltf_inv_bitmatrix, t1;            \
-       vpbroadcastq .Ltf_id_bitmatrix, t2;             \
-       vpbroadcastq .Ltf_aff_bitmatrix, t3;            \
-       vpbroadcastq .Ltf_x2_bitmatrix, t4;             \
+       vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0;       \
+       vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1;      \
+       vpbroadcastq .Ltf_id_bitmatrix(%rip), t2;       \
+       vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3;      \
+       vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4;       \
         vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
         vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
         vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
@@ -332,11 +332,11 @@
                              y4, y5, y6, y7,            \
                              t0, t1, t2, t3,            \
                              t4, t5, t6, t7)            \
-       vpbroadcastq .Ltf_s2_bitmatrix, t0;             \
-       vpbroadcastq .Ltf_inv_bitmatrix, t1;            \
-       vpbroadcastq .Ltf_id_bitmatrix, t2;             \
-       vpbroadcastq .Ltf_aff_bitmatrix, t3;            \
-       vpbroadcastq .Ltf_x2_bitmatrix, t4;             \
+       vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0;       \
+       vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1;      \
+       vpbroadcastq .Ltf_id_bitmatrix(%rip), t2;       \
+       vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3;      \
+       vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4;       \
         vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
         vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
         vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
author	Ard Biesheuvel <ardb@kernel.org>
	Wed, 12 Apr 2023 11:00:25 +0000 (13:00 +0200)
committer	Herbert Xu <herbert@gondor.apana.org.au>
	Thu, 20 Apr 2023 10:20:04 +0000 (18:20 +0800)
arch/x86/crypto/aria-aesni-avx-asm_64.S		patch \| blob \| history
arch/x86/crypto/aria-aesni-avx2-asm_64.S		patch \| blob \| history
arch/x86/crypto/aria-gfni-avx512-asm_64.S		patch \| blob \| history