crypto: x86/aria - Use RIP-relative addressing
authorArd Biesheuvel <ardb@kernel.org>
Wed, 12 Apr 2023 11:00:25 +0000 (13:00 +0200)
committerHerbert Xu <herbert@gondor.apana.org.au>
Thu, 20 Apr 2023 10:20:04 +0000 (18:20 +0800)
Prefer RIP-relative addressing where possible, which removes the need
for boot time relocation fixups.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/aria-aesni-avx-asm_64.S
arch/x86/crypto/aria-aesni-avx2-asm_64.S
arch/x86/crypto/aria-gfni-avx512-asm_64.S

index 9243f62..7c1abc5 100644 (file)
@@ -80,7 +80,7 @@
        transpose_4x4(c0, c1, c2, c3, a0, a1);          \
        transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                        \
-       vmovdqu .Lshufb_16x16b, a0;                     \
+       vmovdqu .Lshufb_16x16b(%rip), a0;               \
        vmovdqu st1, a1;                                \
        vpshufb a0, a2, a2;                             \
        vpshufb a0, a3, a3;                             \
        transpose_4x4(c0, c1, c2, c3, a0, a1);          \
        transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                        \
-       vmovdqu .Lshufb_16x16b, a0;                     \
+       vmovdqu .Lshufb_16x16b(%rip), a0;               \
        vmovdqu st1, a1;                                \
        vpshufb a0, a2, a2;                             \
        vpshufb a0, a3, a3;                             \
                            x4, x5, x6, x7,             \
                            t0, t1, t2, t3,             \
                            t4, t5, t6, t7)             \
-       vmovdqa .Ltf_s2_bitmatrix, t0;                  \
-       vmovdqa .Ltf_inv_bitmatrix, t1;                 \
-       vmovdqa .Ltf_id_bitmatrix, t2;                  \
-       vmovdqa .Ltf_aff_bitmatrix, t3;                 \
-       vmovdqa .Ltf_x2_bitmatrix, t4;                  \
+       vmovdqa .Ltf_s2_bitmatrix(%rip), t0;            \
+       vmovdqa .Ltf_inv_bitmatrix(%rip), t1;           \
+       vmovdqa .Ltf_id_bitmatrix(%rip), t2;            \
+       vmovdqa .Ltf_aff_bitmatrix(%rip), t3;           \
+       vmovdqa .Ltf_x2_bitmatrix(%rip), t4;            \
        vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
        vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
        vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
                       x4, x5, x6, x7,                  \
                       t0, t1, t2, t3,                  \
                       t4, t5, t6, t7)                  \
-       vmovdqa .Linv_shift_row, t0;                    \
-       vmovdqa .Lshift_row, t1;                        \
-       vbroadcastss .L0f0f0f0f, t6;                    \
-       vmovdqa .Ltf_lo__inv_aff__and__s2, t2;          \
-       vmovdqa .Ltf_hi__inv_aff__and__s2, t3;          \
-       vmovdqa .Ltf_lo__x2__and__fwd_aff, t4;          \
-       vmovdqa .Ltf_hi__x2__and__fwd_aff, t5;          \
+       vmovdqa .Linv_shift_row(%rip), t0;              \
+       vmovdqa .Lshift_row(%rip), t1;                  \
+       vbroadcastss .L0f0f0f0f(%rip), t6;              \
+       vmovdqa .Ltf_lo__inv_aff__and__s2(%rip), t2;    \
+       vmovdqa .Ltf_hi__inv_aff__and__s2(%rip), t3;    \
+       vmovdqa .Ltf_lo__x2__and__fwd_aff(%rip), t4;    \
+       vmovdqa .Ltf_hi__x2__and__fwd_aff(%rip), t5;    \
                                                        \
        vaesenclast t7, x0, x0;                         \
        vaesenclast t7, x4, x4;                         \
index 82a14b4..c60fa29 100644 (file)
@@ -96,7 +96,7 @@
        transpose_4x4(c0, c1, c2, c3, a0, a1);          \
        transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                        \
-       vbroadcasti128 .Lshufb_16x16b, a0;              \
+       vbroadcasti128 .Lshufb_16x16b(%rip), a0;        \
        vmovdqu st1, a1;                                \
        vpshufb a0, a2, a2;                             \
        vpshufb a0, a3, a3;                             \
        transpose_4x4(c0, c1, c2, c3, a0, a1);          \
        transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                        \
-       vbroadcasti128 .Lshufb_16x16b, a0;              \
+       vbroadcasti128 .Lshufb_16x16b(%rip), a0;        \
        vmovdqu st1, a1;                                \
        vpshufb a0, a2, a2;                             \
        vpshufb a0, a3, a3;                             \
                            x4, x5, x6, x7,             \
                            t0, t1, t2, t3,             \
                            t4, t5, t6, t7)             \
-       vpbroadcastq .Ltf_s2_bitmatrix, t0;             \
-       vpbroadcastq .Ltf_inv_bitmatrix, t1;            \
-       vpbroadcastq .Ltf_id_bitmatrix, t2;             \
-       vpbroadcastq .Ltf_aff_bitmatrix, t3;            \
-       vpbroadcastq .Ltf_x2_bitmatrix, t4;             \
+       vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0;       \
+       vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1;      \
+       vpbroadcastq .Ltf_id_bitmatrix(%rip), t2;       \
+       vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3;      \
+       vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4;       \
        vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
        vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
        vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
                       t4, t5, t6, t7)                  \
        vpxor t7, t7, t7;                               \
        vpxor t6, t6, t6;                               \
-       vbroadcasti128 .Linv_shift_row, t0;             \
-       vbroadcasti128 .Lshift_row, t1;                 \
-       vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2;   \
-       vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3;   \
-       vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4;   \
-       vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5;   \
+       vbroadcasti128 .Linv_shift_row(%rip), t0;       \
+       vbroadcasti128 .Lshift_row(%rip), t1;           \
+       vbroadcasti128 .Ltf_lo__inv_aff__and__s2(%rip), t2; \
+       vbroadcasti128 .Ltf_hi__inv_aff__and__s2(%rip), t3; \
+       vbroadcasti128 .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
+       vbroadcasti128 .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
                                                        \
        vextracti128 $1, x0, t6##_x;                    \
        vaesenclast t7##_x, x0##_x, x0##_x;             \
        vaesdeclast t7##_x, t6##_x, t6##_x;             \
        vinserti128 $1, t6##_x, x6, x6;                 \
                                                        \
-       vpbroadcastd .L0f0f0f0f, t6;                    \
+       vpbroadcastd .L0f0f0f0f(%rip), t6;              \
                                                        \
        /* AES inverse shift rows */                    \
        vpshufb t0, x0, x0;                             \
index 3193f07..860887e 100644 (file)
@@ -80,7 +80,7 @@
        transpose_4x4(c0, c1, c2, c3, a0, a1);          \
        transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                        \
-       vbroadcasti64x2 .Lshufb_16x16b, a0;             \
+       vbroadcasti64x2 .Lshufb_16x16b(%rip), a0;       \
        vmovdqu64 st1, a1;                              \
        vpshufb a0, a2, a2;                             \
        vpshufb a0, a3, a3;                             \
        transpose_4x4(c0, c1, c2, c3, a0, a1);          \
        transpose_4x4(d0, d1, d2, d3, a0, a1);          \
                                                        \
-       vbroadcasti64x2 .Lshufb_16x16b, a0;             \
+       vbroadcasti64x2 .Lshufb_16x16b(%rip), a0;       \
        vmovdqu64 st1, a1;                              \
        vpshufb a0, a2, a2;                             \
        vpshufb a0, a3, a3;                             \
                            x4, x5, x6, x7,             \
                            t0, t1, t2, t3,             \
                            t4, t5, t6, t7)             \
-       vpbroadcastq .Ltf_s2_bitmatrix, t0;             \
-       vpbroadcastq .Ltf_inv_bitmatrix, t1;            \
-       vpbroadcastq .Ltf_id_bitmatrix, t2;             \
-       vpbroadcastq .Ltf_aff_bitmatrix, t3;            \
-       vpbroadcastq .Ltf_x2_bitmatrix, t4;             \
+       vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0;       \
+       vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1;      \
+       vpbroadcastq .Ltf_id_bitmatrix(%rip), t2;       \
+       vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3;      \
+       vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4;       \
        vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
        vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
        vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \
                             y4, y5, y6, y7,            \
                             t0, t1, t2, t3,            \
                             t4, t5, t6, t7)            \
-       vpbroadcastq .Ltf_s2_bitmatrix, t0;             \
-       vpbroadcastq .Ltf_inv_bitmatrix, t1;            \
-       vpbroadcastq .Ltf_id_bitmatrix, t2;             \
-       vpbroadcastq .Ltf_aff_bitmatrix, t3;            \
-       vpbroadcastq .Ltf_x2_bitmatrix, t4;             \
+       vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0;       \
+       vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1;      \
+       vpbroadcastq .Ltf_id_bitmatrix(%rip), t2;       \
+       vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3;      \
+       vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4;       \
        vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;   \
        vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;   \
        vgf2p8affineqb $(tf_inv_const), t1, x2, x2;     \